diff --git a/idl/bulk_load.thrift b/idl/bulk_load.thrift index 90fcf5b690..cb2e77c4b8 100644 --- a/idl/bulk_load.thrift +++ b/idl/bulk_load.thrift @@ -89,15 +89,16 @@ struct partition_bulk_load_state // meta server -> replica server struct bulk_load_request { - 1:dsn.gpid pid; - 2:string app_name; - 3:dsn.rpc_address primary_addr; - 4:string remote_provider_name; - 5:string cluster_name; - 6:i64 ballot; - 7:bulk_load_status meta_bulk_load_status; - 8:bool query_bulk_load_metadata; - 9:string remote_root_path; + 1:dsn.gpid pid; + 2:string app_name; + 3:dsn.rpc_address primary_addr; + 4:string remote_provider_name; + 5:string cluster_name; + 6:i64 ballot; + 7:bulk_load_status meta_bulk_load_status; + 8:bool query_bulk_load_metadata; + 9:string remote_root_path; + 10:optional dsn.host_port hp_primary; } struct bulk_load_response @@ -109,28 +110,30 @@ struct bulk_load_response // - ERR_FILE_OPERATION_FAILED: local file system error during bulk load downloading // - ERR_FS_INTERNAL: remote file provider error during bulk load downloading // - ERR_CORRUPTION: metadata corruption during bulk load downloading - 1:dsn.error_code err; - 2:dsn.gpid pid; - 3:string app_name; - 4:bulk_load_status primary_bulk_load_status; - 5:map group_bulk_load_state; - 6:optional bulk_load_metadata metadata; - 7:optional i32 total_download_progress; - 8:optional bool is_group_ingestion_finished; - 9:optional bool is_group_bulk_load_context_cleaned_up; - 10:optional bool is_group_bulk_load_paused; + 1:dsn.error_code err; + 2:dsn.gpid pid; + 3:string app_name; + 4:bulk_load_status primary_bulk_load_status; + 5:map group_bulk_load_state; + 6:optional bulk_load_metadata metadata; + 7:optional i32 total_download_progress; + 8:optional bool is_group_ingestion_finished; + 9:optional bool is_group_bulk_load_context_cleaned_up; + 10:optional bool is_group_bulk_load_paused; + 11:optional map hp_group_bulk_load_state; } // primary -> secondary struct group_bulk_load_request { - 1:string app_name; - 2:dsn.rpc_address target_address; - 3:metadata.replica_configuration config; - 4:string provider_name; - 5:string cluster_name; - 6:bulk_load_status meta_bulk_load_status; - 7:string remote_root_path; + 1:string app_name; + 2:dsn.rpc_address target; + 3:metadata.replica_configuration config; + 4:string provider_name; + 5:string cluster_name; + 6:bulk_load_status meta_bulk_load_status; + 7:string remote_root_path; + 8:optional dsn.host_port hp_target; } struct group_bulk_load_response @@ -209,15 +212,16 @@ struct query_bulk_load_response // - ERR_CORRUPTION: file not exist or damaged // - ERR_INGESTION_FAILED: ingest failed // - ERR_RETRY_EXHAUSTED: retry too many times - 1:dsn.error_code err; - 2:string app_name; - 3:bulk_load_status app_status; - 4:list partitions_status; - 5:i32 max_replica_count; + 1:dsn.error_code err; + 2:string app_name; + 3:bulk_load_status app_status; + 4:list partitions_status; + 5:i32 max_replica_count; // detailed bulk load state for each replica - 6:list> bulk_load_states; - 7:optional string hint_msg; - 8:optional bool is_bulk_loading; + 6:list> bulk_load_states; + 7:optional string hint_msg; + 8:optional bool is_bulk_loading; + 9:optional list> hp_bulk_load_states; } struct clear_bulk_load_state_request diff --git a/idl/dsn.layer2.thrift b/idl/dsn.layer2.thrift index 086040c9cb..762a564e79 100644 --- a/idl/dsn.layer2.thrift +++ b/idl/dsn.layer2.thrift @@ -33,14 +33,17 @@ namespace py pypegasus.replication struct partition_configuration { - 1:dsn.gpid pid; - 2:i64 ballot; - 3:i32 max_replica_count; - 4:dsn.rpc_address primary; - 5:list secondaries; - 6:list last_drops; - 7:i64 last_committed_decree; - 8:i32 partition_flags; + 1:dsn.gpid pid; + 2:i64 ballot; + 3:i32 max_replica_count; + 4:dsn.rpc_address primary; + 5:list secondaries; + 6:list last_drops; + 7:i64 last_committed_decree; + 8:i32 partition_flags; + 9:optional dsn.host_port hp_primary; + 10:optional list hp_secondaries; + 11:optional list hp_last_drops; } struct query_cfg_request diff --git a/idl/duplication.thrift b/idl/duplication.thrift index 90fcdfd096..5bf2566960 100644 --- a/idl/duplication.thrift +++ b/idl/duplication.thrift @@ -150,6 +150,7 @@ struct duplication_sync_request 1:dsn.rpc_address node; 2:map> confirm_list; + 3:dsn.host_port hp_node; } struct duplication_sync_response diff --git a/idl/meta_admin.thrift b/idl/meta_admin.thrift index f24a1834ba..f5ad79b400 100644 --- a/idl/meta_admin.thrift +++ b/idl/meta_admin.thrift @@ -67,15 +67,16 @@ struct configuration_update_request { 1:dsn.layer2.app_info info; 2:dsn.layer2.partition_configuration config; - 3:config_type type = config_type.CT_INVALID; - 4:dsn.rpc_address node; - 5:dsn.rpc_address host_node; // deprecated, only used by stateless apps + 3:config_type type = config_type.CT_INVALID; + 4:dsn.rpc_address node; + 5:dsn.rpc_address host_node; // deprecated, only used by stateless apps // Used for partition split // if replica is splitting (whose split_status is not NOT_SPLIT) // the `meta_split_status` will be set // only used when on_config_sync - 6:optional metadata.split_status meta_split_status; + 6:optional metadata.split_status meta_split_status; + 7:optional dsn.host_port hp_node; } // meta server (config mgr) => primary | secondary (downgrade) (w/ new config) @@ -100,9 +101,10 @@ struct replica_server_info struct configuration_query_by_node_request { - 1:dsn.rpc_address node; + 1:dsn.rpc_address node; 2:optional list stored_replicas; - 3:optional replica_server_info info; + 3:optional replica_server_info info; + 4:optional dsn.host_port hp_node; } struct configuration_query_by_node_response @@ -114,9 +116,10 @@ struct configuration_query_by_node_response struct configuration_recovery_request { - 1:list recovery_set; - 2:bool skip_bad_nodes; - 3:bool skip_lost_partitions; + 1:list recovery_set; + 2:bool skip_bad_nodes; + 3:bool skip_lost_partitions; + 4:optional list hp_recovery_set; } struct configuration_recovery_response @@ -204,7 +207,8 @@ struct configuration_list_apps_response struct query_app_info_request { - 1:dsn.rpc_address meta_server; + 1:dsn.rpc_address meta_server; + 2:optional dsn.host_port hp_meta_server; } struct query_app_info_response @@ -278,8 +282,9 @@ struct query_app_manual_compact_response struct node_info { - 1:node_status status = node_status.NS_INVALID; - 2:dsn.rpc_address address; + 1:node_status status = node_status.NS_INVALID; + 2:dsn.rpc_address address; + 3:optional dsn.host_port hp_address; } struct configuration_list_nodes_request @@ -342,13 +347,15 @@ enum balancer_request_type struct configuration_proposal_action { - 1:dsn.rpc_address target; - 2:dsn.rpc_address node; - 3:config_type type; + 1:dsn.rpc_address target; + 2:dsn.rpc_address node; + 3:config_type type; // depricated now // new fields of this struct should start with 5 // 4:i64 period_ts; + 5:optional dsn.host_port hp_target; + 6:optional dsn.host_port hp_node; } struct configuration_balancer_request @@ -374,13 +381,14 @@ struct ddd_diagnose_request struct ddd_node_info { - 1:dsn.rpc_address node; - 2:i64 drop_time_ms; - 3:bool is_alive; // if the node is alive now - 4:bool is_collected; // if replicas has been collected from this node - 5:i64 ballot; // collected && ballot == -1 means replica not exist on this node - 6:i64 last_committed_decree; - 7:i64 last_prepared_decree; + 1:dsn.rpc_address node; + 2:i64 drop_time_ms; + 3:bool is_alive; // if the node is alive now + 4:bool is_collected; // if replicas has been collected from this node + 5:i64 ballot; // collected && ballot == -1 means replica not exist on this node + 6:i64 last_committed_decree; + 7:i64 last_prepared_decree; + 8:optional dsn.host_port hp_node; } struct ddd_partition_info diff --git a/idl/metadata.thrift b/idl/metadata.thrift index 5a7d3e4b3e..bafb4e2d6f 100644 --- a/idl/metadata.thrift +++ b/idl/metadata.thrift @@ -84,20 +84,21 @@ struct file_meta struct replica_configuration { - 1:dsn.gpid pid; - 2:i64 ballot; - 3:dsn.rpc_address primary; - 4:partition_status status = partition_status.PS_INVALID; - 5:i64 learner_signature; + 1:dsn.gpid pid; + 2:i64 ballot; + 3:dsn.rpc_address primary; + 4:partition_status status = partition_status.PS_INVALID; + 5:i64 learner_signature; // Used for bulk load // secondary will pop all committed mutations even if buffer is not full - 6:optional bool pop_all = false; + 6:optional bool pop_all = false; // Used for partition split when primary send prepare message to secondary // 1. true - secondary should copy mutation in this prepare message synchronously, // and _is_sync_to_child in mutation structure should set true // 2. false - secondary copy mutation in this prepare message asynchronously // NOTICE: it should always be false when update_local_configuration - 7:optional bool split_sync_to_child = false; + 7:optional bool split_sync_to_child = false; + 8:optional dsn.host_port hp_primary; } struct replica_info diff --git a/idl/partition_split.thrift b/idl/partition_split.thrift index a5724ea773..4e44c51b3a 100644 --- a/idl/partition_split.thrift +++ b/idl/partition_split.thrift @@ -95,10 +95,11 @@ struct query_split_response // child to primary parent, notifying that itself has caught up with parent struct notify_catch_up_request { - 1:dsn.gpid parent_gpid; - 2:dsn.gpid child_gpid; - 3:i64 child_ballot; - 4:dsn.rpc_address child_address; + 1:dsn.gpid parent_gpid; + 2:dsn.gpid child_gpid; + 3:i64 child_ballot; + 4:dsn.rpc_address child; + 5:optional dsn.host_port hp_child; } struct notify_cacth_up_response @@ -112,10 +113,11 @@ struct notify_cacth_up_response // primary parent -> child replicas to update partition count struct update_child_group_partition_count_request { - 1:dsn.rpc_address target_address; - 2:i32 new_partition_count; - 3:dsn.gpid child_pid; - 4:i64 ballot; + 1:dsn.rpc_address target; + 2:i32 new_partition_count; + 3:dsn.gpid child_pid; + 4:i64 ballot; + 5:optional dsn.host_port hp_target; } struct update_child_group_partition_count_response @@ -129,10 +131,11 @@ struct update_child_group_partition_count_response // primary parent -> meta server, register child on meta_server struct register_child_request { - 1:dsn.layer2.app_info app; - 2:dsn.layer2.partition_configuration parent_config; - 3:dsn.layer2.partition_configuration child_config; - 4:dsn.rpc_address primary_address; + 1:dsn.layer2.app_info app; + 2:dsn.layer2.partition_configuration parent_config; + 3:dsn.layer2.partition_configuration child_config; + 4:dsn.rpc_address primary; + 5:optional dsn.host_port hp_primary; } struct register_child_response diff --git a/idl/replica_admin.thrift b/idl/replica_admin.thrift index ef6bf6f028..122c6f6548 100644 --- a/idl/replica_admin.thrift +++ b/idl/replica_admin.thrift @@ -68,8 +68,9 @@ struct disk_info // This request is sent from client to replica_server. struct query_disk_info_request { - 1:dsn.rpc_address node; - 2:string app_name; + 1:dsn.rpc_address node; + 2:string app_name; + 3:optional dsn.host_port hp_node; } // This response is from replica_server to client. diff --git a/run.sh b/run.sh index f7c7870f5c..a8ec64edd7 100755 --- a/run.sh +++ b/run.sh @@ -27,7 +27,8 @@ export REPORT_DIR="$ROOT/test_report" export THIRDPARTY_ROOT=$ROOT/thirdparty export LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/amd64/server:${ROOT}/lib:${BUILD_LATEST_DIR}/output/lib:${THIRDPARTY_ROOT}/output/lib:${LD_LIBRARY_PATH} # Disable AddressSanitizerOneDefinitionRuleViolation, see https://github.com/google/sanitizers/issues/1017 for details. -export ASAN_OPTIONS=detect_odr_violation=0 +# Add parameters in order to be able to generate coredump file when run ASAN tests +export ASAN_OPTIONS=detect_odr_violation=0:abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1 # See https://github.com/gperftools/gperftools/wiki/gperftools'-stacktrace-capturing-methods-and-their-issues. # Now we choose libgcc, because of https://github.com/apache/incubator-pegasus/issues/1685. export TCMALLOC_STACKTRACE_METHOD=libgcc # Can be generic_fp, generic_fp_unsafe, libunwind or libgcc @@ -241,6 +242,7 @@ function run_build() if [ ! -z "${SANITIZER}" ]; then CMAKE_OPTIONS="${CMAKE_OPTIONS} -DSANITIZER=${SANITIZER}" + echo "ASAN_OPTIONS=$ASAN_OPTIONS" fi MAKE_OPTIONS="-j$JOB_NUM" @@ -484,7 +486,9 @@ function run_test() # Update options if needed, this should be done before starting onebox to make new options take effect. if [ "${module}" == "recovery_test" ]; then master_count=1 - opts="meta_state_service_type=meta_state_service_simple;distributed_lock_service_type=distributed_lock_service_simple" + # all test case in recovery_test just run one meta_server, so we should change it + fqdn=`hostname -f` + opts="server_list=$fqdn:34601;meta_state_service_type=meta_state_service_simple;distributed_lock_service_type=distributed_lock_service_simple" fi if [ "${module}" == "backup_restore_test" ]; then opts="cold_backup_disabled=false;cold_backup_checkpoint_reserve_minutes=0;cold_backup_root=onebox" @@ -509,6 +513,20 @@ function run_test() # Run server test. pushd ${BUILD_LATEST_DIR}/bin/${module} + local function_tests=( + backup_restore_test + recovery_test + restore_test + base_api_test + throttle_test + bulk_load_test + detect_hotspot_test + partition_split_test + ) + # function_tests need client used meta_server_list to connect + if [[ "${function_tests[@]}" =~ "${module}" ]]; then + sed -i "s/@LOCAL_HOSTNAME@/${LOCAL_HOSTNAME}/g" ./config.ini + fi REPORT_DIR=${REPORT_DIR} TEST_BIN=${module} TEST_OPTS=${test_opts} ./run.sh if [ $? != 0 ]; then echo "run test \"$module\" in `pwd` failed" diff --git a/scripts/recompile_thrift.sh b/scripts/recompile_thrift.sh index 444a503483..52e5397be2 100755 --- a/scripts/recompile_thrift.sh +++ b/scripts/recompile_thrift.sh @@ -30,7 +30,7 @@ rm -rf $TMP_DIR mkdir -p $TMP_DIR $THIRDPARTY_ROOT/output/bin/thrift --gen cpp:moveable_types -out $TMP_DIR ../idl/rrdb.thrift -sed 's/#include "dsn_types.h"/#include "runtime\/rpc\/rpc_address.h"\n#include "runtime\/task\/task_code.h"\n#include "utils\/blob.h"/' $TMP_DIR/rrdb_types.h > ../src/include/rrdb/rrdb_types.h +sed 's/#include "dsn_types.h"/#include "runtime\/rpc\/rpc_address.h"\n#include "runtime\/rpc\/rpc_host_port.h"\n#include "runtime\/task\/task_code.h"\n#include "utils\/blob.h"/' $TMP_DIR/rrdb_types.h > ../src/include/rrdb/rrdb_types.h sed 's/#include "rrdb_types.h"/#include /' $TMP_DIR/rrdb_types.cpp > ../src/base/rrdb_types.cpp rm -rf $TMP_DIR diff --git a/src/client/partition_resolver.cpp b/src/client/partition_resolver.cpp index 993e487741..96d505e924 100644 --- a/src/client/partition_resolver.cpp +++ b/src/client/partition_resolver.cpp @@ -31,6 +31,7 @@ #include "partition_resolver_manager.h" #include "runtime/api_layer1.h" #include "runtime/api_task.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/task/task_spec.h" #include "utils/fmt_logging.h" #include "utils/threadpool_code.h" @@ -39,7 +40,7 @@ namespace dsn { namespace replication { /*static*/ partition_resolver_ptr partition_resolver::get_resolver(const char *cluster_name, - const std::vector &meta_list, + const std::vector &meta_list, const char *app_name) { return partition_resolver_manager::instance().find_or_create(cluster_name, meta_list, app_name); @@ -129,7 +130,7 @@ void partition_resolver::call_task(const rpc_response_task_ptr &t) } hdr.gpid = result.pid; } - dsn_rpc_call(result.address, t.get()); + dsn_rpc_call(dns_resolver::instance().resolve_address(result.hp), t.get()); }, hdr.client.timeout_ms); } diff --git a/src/client/partition_resolver.h b/src/client/partition_resolver.h index 8ffdbf5d5d..8d54c272c2 100644 --- a/src/client/partition_resolver.h +++ b/src/client/partition_resolver.h @@ -34,7 +34,7 @@ #include #include "common/gpid.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -53,7 +53,7 @@ class partition_resolver : public ref_counter public: static dsn::ref_ptr get_resolver(const char *cluster_name, - const std::vector &meta_list, + const std::vector &meta_list, const char *app_name); /** @@ -93,12 +93,12 @@ class partition_resolver : public ref_counter std::string get_app_name() const { return _app_name; } - dsn::rpc_address get_meta_server() const { return _meta_server; } + dsn::host_port get_meta_server() const { return _meta_server; } const char *log_prefix() const { return _app_name.c_str(); } protected: - partition_resolver(rpc_address meta_server, const char *app_name) + partition_resolver(host_port meta_server, const char *app_name) : _app_name(app_name), _meta_server(meta_server) { } @@ -113,13 +113,13 @@ class partition_resolver : public ref_counter ///< should call resolve_async in this case error_code err; ///< IPv4 of the target to send request to - rpc_address address; + host_port hp; ///< global partition indentity dsn::gpid pid; }; /** - * resolve partition_hash into IP or group addresses to know what to connect next + * resolve partition_hash into IP or group host_port to know what to connect next * * \param partition_hash the partition hash * \param callback callback invoked on completion or timeout @@ -137,13 +137,13 @@ class partition_resolver : public ref_counter \param partition_index zero-based index of the partition. \param err error code - this is usually to trigger new round of address resolve + this is usually to trigger new round of host_port resolve */ virtual void on_access_failure(int partition_index, error_code err) = 0; std::string _cluster_name; std::string _app_name; - rpc_address _meta_server; + host_port _meta_server; }; typedef ref_ptr partition_resolver_ptr; diff --git a/src/client/partition_resolver_manager.cpp b/src/client/partition_resolver_manager.cpp index 2c0ae7f012..b4c2a6273a 100644 --- a/src/client/partition_resolver_manager.cpp +++ b/src/client/partition_resolver_manager.cpp @@ -25,16 +25,18 @@ */ #include +#include #include "client/partition_resolver.h" #include "partition_resolver_manager.h" #include "partition_resolver_simple.h" -#include "runtime/rpc/group_address.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/group_host_port.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/autoref_ptr.h" #include "utils/fmt_logging.h" namespace dsn { + namespace replication { template @@ -54,21 +56,21 @@ bool vector_equal(const std::vector &a, const std::vector &b) } partition_resolver_ptr partition_resolver_manager::find_or_create( - const char *cluster_name, const std::vector &meta_list, const char *app_name) + const char *cluster_name, const std::vector &meta_list, const char *app_name) { dsn::zauto_lock l(_lock); std::map &app_map = _resolvers[cluster_name]; partition_resolver_ptr &ptr = app_map[app_name]; if (ptr == nullptr) { - dsn::rpc_address meta_group; + dsn::host_port meta_group; meta_group.assign_group(cluster_name); - meta_group.group_address()->add_list(meta_list); + meta_group.group_host_port()->add_list(meta_list); ptr = new partition_resolver_simple(meta_group, app_name); return ptr; } else { - dsn::rpc_address meta_group = ptr->get_meta_server(); - const std::vector &existing_list = meta_group.group_address()->members(); + const auto &meta_group = ptr->get_meta_server(); + const auto &existing_list = meta_group.group_host_port()->members(); if (!vector_equal(meta_list, existing_list)) { LOG_ERROR("meta list not match for cluster({})", cluster_name); return nullptr; diff --git a/src/client/partition_resolver_manager.h b/src/client/partition_resolver_manager.h index 97e895cec7..f006d13b55 100644 --- a/src/client/partition_resolver_manager.h +++ b/src/client/partition_resolver_manager.h @@ -35,7 +35,7 @@ #include "utils/zlocks.h" namespace dsn { -class rpc_address; +class host_port; namespace replication { @@ -43,7 +43,7 @@ class partition_resolver_manager : public dsn::utils::singleton &meta_list, + const std::vector &meta_list, const char *app_name); private: diff --git a/src/client/partition_resolver_simple.cpp b/src/client/partition_resolver_simple.cpp index ed2ef12489..bb35f82f46 100644 --- a/src/client/partition_resolver_simple.cpp +++ b/src/client/partition_resolver_simple.cpp @@ -35,6 +35,7 @@ #include "dsn.layer2_types.h" #include "partition_resolver_simple.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -48,7 +49,7 @@ namespace dsn { namespace replication { -partition_resolver_simple::partition_resolver_simple(rpc_address meta_server, const char *app_name) +partition_resolver_simple::partition_resolver_simple(host_port meta_server, const char *app_name) : partition_resolver(meta_server, app_name), _app_id(-1), _app_partition_count(-1), @@ -63,12 +64,12 @@ void partition_resolver_simple::resolve(uint64_t partition_hash, int idx = -1; if (_app_partition_count != -1) { idx = get_partition_index(_app_partition_count, partition_hash); - rpc_address target; - auto err = get_address(idx, target); + host_port target; + auto err = get_host_port(idx, target); if (dsn_unlikely(err == ERR_CHILD_NOT_READY)) { // child partition is not ready, its requests should be sent to parent partition idx -= _app_partition_count / 2; - err = get_address(idx, target); + err = get_host_port(idx, target); } if (dsn_likely(err == ERR_OK)) { callback(resolve_result{ERR_OK, target, {_app_id, idx}}); @@ -128,7 +129,7 @@ void partition_resolver_simple::clear_all_pending_requests() pc.second->query_config_task->cancel(true); for (auto &rc : pc.second->requests) { - end_request(std::move(rc), ERR_TIMEOUT, rpc_address()); + end_request(std::move(rc), ERR_TIMEOUT, host_port()); } delete pc.second; } @@ -137,12 +138,12 @@ void partition_resolver_simple::clear_all_pending_requests() void partition_resolver_simple::on_timeout(request_context_ptr &&rc) const { - end_request(std::move(rc), ERR_TIMEOUT, rpc_address(), true); + end_request(std::move(rc), ERR_TIMEOUT, host_port(), true); } void partition_resolver_simple::end_request(request_context_ptr &&request, error_code err, - rpc_address addr, + host_port hp, bool called_by_timer) const { zauto_lock l(request->lock); @@ -153,7 +154,7 @@ void partition_resolver_simple::end_request(request_context_ptr &&request, if (!called_by_timer && request->timeout_timer != nullptr) request->timeout_timer->cancel(false); - request->callback(resolve_result{err, addr, {_app_id, request->partition_index}}); + request->callback(resolve_result{err, hp, {_app_id, request->partition_index}}); request->completed = true; } @@ -164,13 +165,13 @@ void partition_resolver_simple::call(request_context_ptr &&request, bool from_me { int pindex = request->partition_index; if (-1 != pindex) { - // fill target address if possible - rpc_address addr; - auto err = get_address(pindex, addr); + // fill target host_port if possible + host_port hp; + auto err = get_host_port(pindex, hp); - // target address known + // target host_port known if (err == ERR_OK) { - end_request(std::move(request), ERR_OK, addr); + end_request(std::move(request), ERR_OK, hp); return; } } @@ -180,7 +181,7 @@ void partition_resolver_simple::call(request_context_ptr &&request, bool from_me // timeout will happen very soon, no way to get the rpc call done if (nts + 100 >= request->timeout_ts_us) // within 100 us { - end_request(std::move(request), ERR_TIMEOUT, rpc_address()); + end_request(std::move(request), ERR_TIMEOUT, host_port()); return; } @@ -260,7 +261,7 @@ task_ptr partition_resolver_simple::query_config(int partition_index, int timeou marshall(msg, req); return rpc::call( - _meta_server, + dns_resolver::instance().resolve_address(_meta_server), msg, &_tracker, [this, partition_index](error_code err, dsn::message_ex *req, dsn::message_ex *resp) { @@ -303,9 +304,10 @@ void partition_resolver_simple::query_config_reply(error_code err, for (auto it = resp.partitions.begin(); it != resp.partitions.end(); ++it) { auto &new_config = *it; - LOG_DEBUG_PREFIX("query config reply, gpid = {}, ballot = {}, primary = {}", + LOG_DEBUG_PREFIX("query config reply, gpid = {}, ballot = {}, primary = {}({})", new_config.pid, new_config.ballot, + new_config.hp_primary, new_config.primary); auto it2 = _config_cache.find(new_config.pid.get_partition_index()); @@ -393,16 +395,16 @@ void partition_resolver_simple::handle_pending_requests(std::dequepartition_index, addr); + host_port hp; + err = get_host_port(req->partition_index, hp); if (err == ERR_OK) { - end_request(std::move(req), err, addr); + end_request(std::move(req), err, hp); } else { call(std::move(req), true); } } else if (err == ERR_HANDLER_NOT_FOUND || err == ERR_APP_NOT_EXIST || err == ERR_OPERATION_DISABLED) { - end_request(std::move(req), err, rpc_address()); + end_request(std::move(req), err, host_port()); } else { call(std::move(req), true); } @@ -411,20 +413,20 @@ void partition_resolver_simple::handle_pending_requests(std::dequesecond->config); - if (addr.is_invalid()) { + hp = get_host_port(it->second->config); + if (hp.is_invalid()) { return ERR_IO_PENDING; } else { return ERR_OK; diff --git a/src/client/partition_resolver_simple.h b/src/client/partition_resolver_simple.h index 2cfb97d832..41ec74e791 100644 --- a/src/client/partition_resolver_simple.h +++ b/src/client/partition_resolver_simple.h @@ -34,7 +34,7 @@ #include "client/partition_resolver.h" #include "common/serialization_helper/dsn.layer2_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_tracker.h" #include "utils/autoref_ptr.h" @@ -49,7 +49,7 @@ namespace replication { class partition_resolver_simple : public partition_resolver { public: - partition_resolver_simple(rpc_address meta_server, const char *app_name); + partition_resolver_simple(host_port meta_server, const char *app_name); virtual ~partition_resolver_simple(); @@ -107,8 +107,8 @@ class partition_resolver_simple : public partition_resolver private: // local routines - rpc_address get_address(const partition_configuration &config) const; - error_code get_address(int partition_index, /*out*/ rpc_address &addr); + host_port get_host_port(const partition_configuration &config) const; + error_code get_host_port(int partition_index, /*out*/ host_port &hp); void handle_pending_requests(std::deque &reqs, error_code err); void clear_all_pending_requests(); @@ -118,7 +118,7 @@ class partition_resolver_simple : public partition_resolver // request_context_ptr rc); void end_request(request_context_ptr &&request, error_code err, - rpc_address addr, + host_port addr, bool called_by_timer = false) const; void on_timeout(request_context_ptr &&rc) const; diff --git a/src/client/replication_ddl_client.cpp b/src/client/replication_ddl_client.cpp index 15dc4fec69..e7380c65f9 100644 --- a/src/client/replication_ddl_client.cpp +++ b/src/client/replication_ddl_client.cpp @@ -48,7 +48,8 @@ #include "fmt/format.h" #include "meta/meta_rpc_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/group_host_port.h" +#include "runtime/rpc/rpc_address.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" #include "utils/output_utils.h" @@ -83,11 +84,11 @@ error_s replication_ddl_client::validate_app_name(const std::string &app_name) return error_s::ok(); } -replication_ddl_client::replication_ddl_client(const std::vector &meta_servers) +replication_ddl_client::replication_ddl_client(const std::vector &meta_servers) { _meta_server.assign_group("meta-servers"); for (const auto &m : meta_servers) { - if (!_meta_server.group_address()->add(m)) { + if (!_meta_server.group_host_port()->add(m)) { LOG_WARNING("duplicate adress {}", m); } } @@ -95,6 +96,37 @@ replication_ddl_client::replication_ddl_client(const std::vector(); + + auto resp_task = request_meta(RPC_CM_CLUSTER_INFO, req); + resp_task->wait(); + if (resp_task->error() != dsn::ERR_OK) { + LOG_ERROR("get cluster_info failed!"); + return; + } + + configuration_cluster_info_response resp; + ::dsn::unmarshall(resp_task->get_response(), resp); + if (resp.err != dsn::ERR_OK) { + LOG_ERROR("get cluster_info failed!"); + return; + } + + for (int i = 0; i < resp.keys.size(); i++) { + if (resp.keys[i] == "primary_meta_server") { + auto hp = host_port::from_string(resp.values[i]); + if (_meta_server.group_host_port()->contains(hp)) { + _meta_server.group_host_port()->set_leader(hp); + } else { + LOG_ERROR("meta_servers not contains {}", hp); + } + break; + } + } +} + dsn::error_code replication_ddl_client::wait_app_ready(const std::string &app_name, int partition_count, int max_replica_count) @@ -131,7 +163,8 @@ dsn::error_code replication_ddl_client::wait_app_ready(const std::string &app_na int ready_count = 0; for (int i = 0; i < partition_count; i++) { const partition_configuration &pc = query_resp.partitions[i]; - if (!pc.primary.is_invalid() && (pc.secondaries.size() + 1 >= max_replica_count)) { + if (!pc.hp_primary.is_invalid() && + (pc.hp_secondaries.size() + 1 >= max_replica_count)) { ready_count++; } } @@ -401,11 +434,11 @@ dsn::error_code replication_ddl_client::list_apps(const dsn::app_status::type st for (int i = 0; i < partitions.size(); i++) { const dsn::partition_configuration &p = partitions[i]; int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; } - replica_count += p.secondaries.size(); - if (!p.primary.is_invalid()) { + replica_count += p.hp_secondaries.size(); + if (!p.hp_primary.is_invalid()) { if (replica_count >= p.max_replica_count) fully_healthy++; else if (replica_count < 2) @@ -453,7 +486,7 @@ dsn::error_code replication_ddl_client::list_apps(const dsn::app_status::type st dsn::error_code replication_ddl_client::list_nodes( const dsn::replication::node_status::type status, - std::map &nodes) + std::map &nodes) { auto req = std::make_shared(); req->status = status; @@ -469,8 +502,10 @@ dsn::error_code replication_ddl_client::list_nodes( return resp.err; } - for (const dsn::replication::node_info &n : resp.infos) { - nodes[n.address] = n.status; + for (const auto &n : resp.infos) { + host_port hp; + GET_HOST_PORT(n, address, hp); + nodes[hp] = n.status; } return dsn::ERR_OK; @@ -503,13 +538,13 @@ dsn::error_code replication_ddl_client::list_nodes(const dsn::replication::node_ const std::string &file_name, bool resolve_ip) { - std::map nodes; + std::map nodes; auto r = list_nodes(status, nodes); if (r != dsn::ERR_OK) { return r; } - std::map tmp_map; + std::map tmp_map; int alive_node_count = 0; for (auto &kv : nodes) { if (kv.second == dsn::replication::node_status::NS_ALIVE) @@ -539,14 +574,14 @@ dsn::error_code replication_ddl_client::list_nodes(const dsn::replication::node_ for (int i = 0; i < partitions.size(); i++) { const dsn::partition_configuration &p = partitions[i]; - if (!p.primary.is_invalid()) { - auto find = tmp_map.find(p.primary); + if (!p.hp_primary.is_invalid()) { + auto find = tmp_map.find(p.hp_primary); if (find != tmp_map.end()) { find->second.primary_count++; } } - for (int j = 0; j < p.secondaries.size(); j++) { - auto find = tmp_map.find(p.secondaries[j]); + for (int j = 0; j < p.hp_secondaries.size(); j++) { + auto find = tmp_map.find(p.hp_secondaries[j]); if (find != tmp_map.end()) { find->second.secondary_count++; } @@ -725,7 +760,7 @@ dsn::error_code replication_ddl_client::list_app(const std::string &app_name, tp_details.add_column("replica_count"); tp_details.add_column("primary"); tp_details.add_column("secondaries"); - std::map> node_stat; + std::map> node_stat; int total_prim_count = 0; int total_sec_count = 0; @@ -734,14 +769,14 @@ dsn::error_code replication_ddl_client::list_app(const std::string &app_name, int read_unhealthy = 0; for (const auto &p : partitions) { int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; - node_stat[p.primary].first++; + node_stat[p.hp_primary].first++; total_prim_count++; } - replica_count += p.secondaries.size(); - total_sec_count += p.secondaries.size(); - if (!p.primary.is_invalid()) { + replica_count += p.hp_secondaries.size(); + total_sec_count += p.hp_secondaries.size(); + if (!p.hp_primary.is_invalid()) { if (replica_count >= p.max_replica_count) fully_healthy++; else if (replica_count < 2) @@ -755,16 +790,15 @@ dsn::error_code replication_ddl_client::list_app(const std::string &app_name, std::stringstream oss; oss << replica_count << "/" << p.max_replica_count; tp_details.append_data(oss.str()); - tp_details.append_data(p.primary ? host_name_resolve(resolve_ip, p.primary.to_string()) - : "-"); + tp_details.append_data((p.hp_primary.is_invalid() ? "-" : p.hp_primary.to_string())); oss.str(""); oss << "["; // TODO (yingchun) join - for (int j = 0; j < p.secondaries.size(); j++) { + for (int j = 0; j < p.hp_secondaries.size(); j++) { if (j != 0) oss << ","; - oss << host_name_resolve(resolve_ip, p.secondaries[j].to_string()); - node_stat[p.secondaries[j]].second++; + oss << p.hp_secondaries[j]; + node_stat[p.hp_secondaries[j]].second++; } oss << "]"; tp_details.append_data(oss.str()); @@ -863,7 +897,7 @@ replication_ddl_client::send_balancer_proposal(const configuration_balancer_requ return resp.err; } -dsn::error_code replication_ddl_client::do_recovery(const std::vector &replica_nodes, +dsn::error_code replication_ddl_client::do_recovery(const std::vector &replica_nodes, int wait_seconds, bool skip_bad_nodes, bool skip_lost_partitions, @@ -882,15 +916,17 @@ dsn::error_code replication_ddl_client::do_recovery(const std::vector(); req->recovery_set.clear(); - for (const dsn::rpc_address &node : replica_nodes) { - if (std::find(req->recovery_set.begin(), req->recovery_set.end(), node) != - req->recovery_set.end()) { + req->__set_hp_recovery_set(std::vector()); + for (const auto &node : replica_nodes) { + if (std::find(req->hp_recovery_set.begin(), req->hp_recovery_set.end(), node) != + req->hp_recovery_set.end()) { out << "duplicate replica node " << node << ", just ingore it" << std::endl; } else { - req->recovery_set.push_back(node); + req->hp_recovery_set.push_back(node); + req->recovery_set.push_back(dsn::dns_resolver::instance().resolve_address(node)); } } - if (req->recovery_set.empty()) { + if (req->hp_recovery_set.empty()) { out << "node set for recovery it empty" << std::endl; return ERR_INVALID_PARAMETERS; } @@ -902,7 +938,7 @@ dsn::error_code replication_ddl_client::do_recovery(const std::vectorrecovery_set) { + for (auto &node : req->hp_recovery_set) { out << node << std::endl; } out << "=============================" << std::endl; @@ -1022,6 +1058,7 @@ dsn::error_code replication_ddl_client::add_backup_policy(const std::string &pol error_with replication_ddl_client::backup_app( int32_t app_id, const std::string &backup_provider_type, const std::string &backup_path) { + set_meta_servers_leader(); auto req = std::make_unique(); req->app_id = app_id; req->backup_provider_type = backup_provider_type; @@ -1398,7 +1435,7 @@ void replication_ddl_client::end_meta_request(const rpc_response_task_ptr &callb return; } - rpc::call(_meta_server, + rpc::call(dsn::dns_resolver::instance().resolve_address(_meta_server), request, &_tracker, [this, attempt_count, callback]( @@ -1533,14 +1570,15 @@ replication_ddl_client::ddd_diagnose(gpid pid, std::vector & } void replication_ddl_client::query_disk_info( - const std::vector &targets, + const std::vector &targets, const std::string &app_name, - /*out*/ std::map> &resps) + /*out*/ std::map> &resps) { - std::map query_disk_info_rpcs; + std::map query_disk_info_rpcs; for (const auto &target : targets) { auto request = std::make_unique(); - request->node = target; + request->node = dsn::dns_resolver::instance().resolve_address(target); + request->__set_hp_node(target); request->app_name = app_name; query_disk_info_rpcs.emplace(target, query_disk_info_rpc(std::move(request), RPC_QUERY_DISK_INFO)); @@ -1591,14 +1629,14 @@ replication_ddl_client::clear_bulk_load(const std::string &app_name) return call_rpc_sync(clear_bulk_load_rpc(std::move(req), RPC_CM_CLEAR_BULK_LOAD)); } -error_code replication_ddl_client::detect_hotkey(const dsn::rpc_address &target, +error_code replication_ddl_client::detect_hotkey(const dsn::host_port &target, detect_hotkey_request &req, detect_hotkey_response &resp) { - std::map detect_hotkey_rpcs; + std::map detect_hotkey_rpcs; auto request = std::make_unique(req); detect_hotkey_rpcs.emplace(target, detect_hotkey_rpc(std::move(request), RPC_DETECT_HOTKEY)); - std::map> resps; + std::map> resps; call_rpcs_sync(detect_hotkey_rpcs, resps); resp = resps.begin()->second.get_value(); return resps.begin()->second.get_error().code(); @@ -1656,16 +1694,16 @@ replication_ddl_client::query_partition_split(const std::string &app_name) return call_rpc_sync(query_split_rpc(std::move(req), RPC_CM_QUERY_PARTITION_SPLIT)); } -error_with -replication_ddl_client::add_new_disk(const rpc_address &target_node, const std::string &disk_str) +error_with replication_ddl_client::add_new_disk(const host_port &target_node, + const std::string &disk_str) { auto req = std::make_unique(); req->disk_str = disk_str; - std::map add_new_disk_rpcs; + std::map add_new_disk_rpcs; add_new_disk_rpcs.emplace(target_node, add_new_disk_rpc(std::move(req), RPC_ADD_NEW_DISK)); - std::map> resps; + std::map> resps; call_rpcs_sync(add_new_disk_rpcs, resps); return resps.begin()->second.get_value(); } diff --git a/src/client/replication_ddl_client.h b/src/client/replication_ddl_client.h index 38b94a6e1b..e7c01958df 100644 --- a/src/client/replication_ddl_client.h +++ b/src/client/replication_ddl_client.h @@ -43,8 +43,9 @@ #include "meta_admin_types.h" #include "partition_split_types.h" #include "replica_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -73,7 +74,7 @@ class start_backup_app_response; class replication_ddl_client { public: - replication_ddl_client(const std::vector &meta_servers); + replication_ddl_client(const std::vector &meta_servers); ~replication_ddl_client(); dsn::error_code create_app(const std::string &app_name, @@ -109,7 +110,7 @@ class replication_ddl_client dsn::error_code list_nodes(const dsn::replication::node_status::type status, - std::map &nodes); + std::map &nodes); dsn::error_code cluster_name(int64_t timeout_ms, std::string &cluster_name); @@ -134,7 +135,7 @@ class replication_ddl_client dsn::error_code wait_app_ready(const std::string &app_name, int partition_count, int max_replica_count); - dsn::error_code do_recovery(const std::vector &replica_nodes, + dsn::error_code do_recovery(const std::vector &replica_nodes, int wait_seconds, bool skip_bad_nodes, bool skip_lost_partitions, @@ -206,10 +207,10 @@ class replication_ddl_client dsn::error_code ddd_diagnose(gpid pid, std::vector &ddd_partitions); - void query_disk_info( - const std::vector &targets, - const std::string &app_name, - /*out*/ std::map> &resps); + void + query_disk_info(const std::vector &targets, + const std::string &app_name, + /*out*/ std::map> &resps); error_with start_bulk_load(const std::string &app_name, const std::string &cluster_name, @@ -224,7 +225,7 @@ class replication_ddl_client error_with clear_bulk_load(const std::string &app_name); - error_code detect_hotkey(const dsn::rpc_address &target, + error_code detect_hotkey(const dsn::host_port &target, detect_hotkey_request &req, detect_hotkey_response &resp); @@ -245,7 +246,7 @@ class replication_ddl_client error_with query_partition_split(const std::string &app_name); - error_with add_new_disk(const rpc_address &target_node, + error_with add_new_disk(const host_port &target_node, const std::string &disk_str); error_with @@ -264,6 +265,7 @@ class replication_ddl_client set_max_replica_count(const std::string &app_name, int32_t max_replica_count); void set_max_wait_app_ready_secs(uint32_t max_wait_secs) { _max_wait_secs = max_wait_secs; } + void set_meta_servers_leader(); static error_s validate_app_name(const std::string &app_name); @@ -287,7 +289,7 @@ class replication_ddl_client auto task = dsn::rpc::create_rpc_response_task(msg, nullptr, empty_rpc_handler, reply_thread_hash); - rpc::call(_meta_server, + rpc::call(dsn::dns_resolver::instance().resolve_address(_meta_server), msg, &_tracker, [this, task]( @@ -372,7 +374,7 @@ class replication_ddl_client static constexpr int MAX_RETRY = 2; error_code err = ERR_UNKNOWN; for (int retry = 0; retry < MAX_RETRY; retry++) { - task_ptr task = rpc.call(_meta_server, + task_ptr task = rpc.call(dsn::dns_resolver::instance().resolve_address(_meta_server), &_tracker, [&err](error_code code) { err = code; }, reply_thread_hash); @@ -389,31 +391,32 @@ class replication_ddl_client /// Send request to multi replica server synchronously. template - void call_rpcs_sync(std::map &rpcs, - std::map> &resps, + void call_rpcs_sync(std::map &rpcs, + std::map> &resps, int reply_thread_hash = 0, bool enable_retry = true) { dsn::task_tracker tracker; error_code err = ERR_UNKNOWN; for (auto &rpc : rpcs) { - rpc.second.call( - rpc.first, &tracker, [&err, &resps, &rpcs, &rpc](error_code code) mutable { - err = code; - if (err == dsn::ERR_OK) { - resps.emplace(rpc.first, std::move(rpc.second.response())); - rpcs.erase(rpc.first); - } else { - resps.emplace( - rpc.first, - std::move(error_s::make(err, "unable to send rpc to server"))); - } - }); + rpc.second.call(dsn::dns_resolver::instance().resolve_address(rpc.first), + &tracker, + [&err, &resps, &rpcs, &rpc](error_code code) mutable { + err = code; + if (err == dsn::ERR_OK) { + resps.emplace(rpc.first, std::move(rpc.second.response())); + rpcs.erase(rpc.first); + } else { + resps.emplace(rpc.first, + std::move(error_s::make( + err, "unable to send rpc to server"))); + } + }); } tracker.wait_outstanding_tasks(); if (enable_retry && rpcs.size() > 0) { - std::map> retry_resps; + std::map> retry_resps; call_rpcs_sync(rpcs, retry_resps, reply_thread_hash, false); for (auto &resp : retry_resps) { resps.emplace(resp.first, std::move(resp.second)); @@ -422,7 +425,7 @@ class replication_ddl_client } private: - dsn::rpc_address _meta_server; + dsn::host_port _meta_server; dsn::task_tracker _tracker; uint32_t _max_wait_secs = 3600; // Wait at most 1 hour by default. diff --git a/src/client/test/ddl_client_test.cpp b/src/client/test/ddl_client_test.cpp index e0e6b90996..f44a0382b0 100644 --- a/src/client/test/ddl_client_test.cpp +++ b/src/client/test/ddl_client_test.cpp @@ -27,7 +27,7 @@ #include "gtest/gtest.h" #include "meta_admin_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "utils/autoref_ptr.h" #include "utils/error_code.h" @@ -126,7 +126,7 @@ TEST(DDLClientTest, RetryMetaRequest) dsn::ERR_BUSY_CREATING}, }; - const std::vector meta_list = {rpc_address::from_ip_port("127.0.0.1", 34601)}; + const std::vector meta_list = {host_port("localhost", 34601)}; auto req = std::make_shared(); for (const auto &test : tests) { fail::setup(); diff --git a/src/client_lib/pegasus_client_impl.cpp b/src/client_lib/pegasus_client_impl.cpp index b3e30f5e4c..917c7b39cc 100644 --- a/src/client_lib/pegasus_client_impl.cpp +++ b/src/client_lib/pegasus_client_impl.cpp @@ -35,7 +35,8 @@ #include "pegasus_key_schema.h" #include "pegasus_utils.h" #include "rrdb/rrdb.client.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/group_host_port.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" #include "runtime/task/task_code.h" @@ -62,12 +63,12 @@ std::unordered_map pegasus_client_impl::_server_error_to_client; pegasus_client_impl::pegasus_client_impl(const char *cluster_name, const char *app_name) : _cluster_name(cluster_name), _app_name(app_name) { - std::vector meta_servers; + std::vector meta_servers; dsn::replication::replica_helper::load_meta_servers( meta_servers, dsn::PEGASUS_CLUSTER_SECTION_NAME.c_str(), cluster_name); CHECK_GT(meta_servers.size(), 0); _meta_server.assign_group("meta-servers"); - _meta_server.group_address()->add_list(meta_servers); + _meta_server.group_host_port()->add_list(meta_servers); _client = new ::dsn::apps::rrdb_client(cluster_name, meta_servers, app_name); } @@ -1253,7 +1254,7 @@ void pegasus_client_impl::async_get_unordered_scanners( query_cfg_request req; req.app_name = _app_name; - ::dsn::rpc::call(_meta_server, + ::dsn::rpc::call(dns_resolver::instance().resolve_address(_meta_server), RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, req, nullptr, diff --git a/src/client_lib/pegasus_client_impl.h b/src/client_lib/pegasus_client_impl.h index 52c0787c7c..689fe74925 100644 --- a/src/client_lib/pegasus_client_impl.h +++ b/src/client_lib/pegasus_client_impl.h @@ -32,7 +32,7 @@ #include #include "rrdb/rrdb_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/zlocks.h" @@ -364,7 +364,7 @@ class pegasus_client_impl : public pegasus_client private: std::string _cluster_name; std::string _app_name; - ::dsn::rpc_address _meta_server; + ::dsn::host_port _meta_server; ::dsn::apps::rrdb_client *_client; /// diff --git a/src/common/consensus.thrift b/src/common/consensus.thrift index ec862b3364..26312b8e36 100644 --- a/src/common/consensus.thrift +++ b/src/common/consensus.thrift @@ -132,30 +132,32 @@ enum learner_status struct learn_request { - 1:dsn.gpid pid; - 2:dsn.rpc_address learner; // learner's address - 3:i64 signature; // learning signature - 4:i64 last_committed_decree_in_app; // last committed decree of learner's app - 5:i64 last_committed_decree_in_prepare_list; // last committed decree of learner's prepare list - 6:dsn.blob app_specific_learn_request; // learning request data by app.prepare_learn_request() + 1:dsn.gpid pid; + 2:dsn.rpc_address learner; // learner's address + 3:i64 signature; // learning signature + 4:i64 last_committed_decree_in_app; // last committed decree of learner's app + 5:i64 last_committed_decree_in_prepare_list; // last committed decree of learner's prepare list + 6:dsn.blob app_specific_learn_request; // learning request data by app.prepare_learn_request() // Used by duplication to determine if learner has enough logs on disk to // be duplicated (ie. max_gced_decree < confirmed_decree), if not, // learnee will copy the missing logs. - 7:optional i64 max_gced_decree; + 7:optional i64 max_gced_decree; + 8:optional dsn.host_port hp_learner; } struct learn_response { - 1:dsn.error_code err; // error code + 1:dsn.error_code err; // error code 2:metadata.replica_configuration config; // learner's replica config - 3:i64 last_committed_decree; // learnee's last committed decree - 4:i64 prepare_start_decree; // prepare start decree - 5:learn_type type = learn_type.LT_INVALID; // learning type: CACHE, LOG, APP - 6:learn_state state; // learning data, including memory data and files - 7:dsn.rpc_address address; // learnee's address - 8:string base_local_dir; // base dir of files on learnee - 9:optional string replica_disk_tag; // the disk tag of learnee located + 3:i64 last_committed_decree; // learnee's last committed decree + 4:i64 prepare_start_decree; // prepare start decree + 5:learn_type type = learn_type.LT_INVALID; // learning type: CACHE, LOG, APP + 6:learn_state state; // learning data, including memory data and files + 7:dsn.rpc_address learnee; // learnee's address + 8:string base_local_dir; // base dir of files on learnee + 9:optional string replica_disk_tag; // the disk tag of learnee located + 10:optional dsn.host_port hp_learnee; // learnee's host_port } struct learn_notify_response @@ -167,33 +169,35 @@ struct learn_notify_response struct group_check_request { - 1:dsn.layer2.app_info app; - 2:dsn.rpc_address node; + 1:dsn.layer2.app_info app; + 2:dsn.rpc_address node; 3:metadata.replica_configuration config; - 4:i64 last_committed_decree; + 4:i64 last_committed_decree; // Used to sync duplication progress between primaries // and secondaries, so that secondaries can be allowed to GC // their WALs after this decree. - 5:optional i64 confirmed_decree; + 5:optional i64 confirmed_decree; // Used to deliver child gpid and meta_split_status during partition split - 6:optional dsn.gpid child_gpid; + 6:optional dsn.gpid child_gpid; 7:optional metadata.split_status meta_split_status; + 8:optional dsn.host_port hp_node; } struct group_check_response { - 1:dsn.gpid pid; - 2:dsn.error_code err; - 3:i64 last_committed_decree_in_app; - 4:i64 last_committed_decree_in_prepare_list; - 5:learner_status learner_status_ = learner_status.LearningInvalid; - 6:i64 learner_signature; - 7:dsn.rpc_address node; + 1:dsn.gpid pid; + 2:dsn.error_code err; + 3:i64 last_committed_decree_in_app; + 4:i64 last_committed_decree_in_prepare_list; + 5:learner_status learner_status_ = learner_status.LearningInvalid; + 6:i64 learner_signature; + 7:dsn.rpc_address node; // Used for pause or cancel partition split // if secondary pause or cancel split succeed, is_split_stopped = true - 8:optional bool is_split_stopped; + 8:optional bool is_split_stopped; 9:optional metadata.disk_status disk_status = metadata.disk_status.NORMAL; + 10:optional dsn.host_port hp_node; } diff --git a/src/common/fs_manager.cpp b/src/common/fs_manager.cpp index 2aaa953c1b..1b633b1d24 100644 --- a/src/common/fs_manager.cpp +++ b/src/common/fs_manager.cpp @@ -268,7 +268,7 @@ void fs_manager::add_replica(const gpid &pid, const std::string &pid_dir) const auto &dn = get_dir_node(pid_dir); if (dsn_unlikely(nullptr == dn)) { LOG_ERROR( - "{}: dir({}) of gpid({}) haven't registered", dsn_primary_address(), pid_dir, pid); + "{}: dir({}) of gpid({}) haven't registered", dsn_primary_host_port(), pid_dir, pid); return; } @@ -280,11 +280,11 @@ void fs_manager::add_replica(const gpid &pid, const std::string &pid_dir) } if (!emplace_success) { LOG_WARNING( - "{}: gpid({}) already in the dir_node({})", dsn_primary_address(), pid, dn->tag); + "{}: gpid({}) already in the dir_node({})", dsn_primary_host_port(), pid, dn->tag); return; } - LOG_INFO("{}: add gpid({}) to dir_node({})", dsn_primary_address(), pid, dn->tag); + LOG_INFO("{}: add gpid({}) to dir_node({})", dsn_primary_host_port(), pid, dn->tag); } dir_node *fs_manager::find_best_dir_for_new_replica(const gpid &pid) const @@ -318,7 +318,7 @@ dir_node *fs_manager::find_best_dir_for_new_replica(const gpid &pid) const if (selected != nullptr) { LOG_INFO( "{}: put pid({}) to dir({}), which has {} replicas of current app, {} replicas totally", - dsn_primary_address(), + dsn_primary_host_port(), pid, selected->tag, least_app_replicas_count, @@ -358,7 +358,7 @@ void fs_manager::remove_replica(const gpid &pid) pid, dn->tag); if (r != 0) { - LOG_INFO("{}: remove gpid({}) from dir({})", dsn_primary_address(), pid, dn->tag); + LOG_INFO("{}: remove gpid({}) from dir({})", dsn_primary_host_port(), pid, dn->tag); } remove_count += r; } diff --git a/src/common/json_helper.h b/src/common/json_helper.h index 648928f7bf..41345a1484 100644 --- a/src/common/json_helper.h +++ b/src/common/json_helper.h @@ -715,7 +715,10 @@ NON_MEMBER_JSON_SERIALIZATION(dsn::partition_configuration, secondaries, last_drops, last_committed_decree, - partition_flags) + partition_flags, + hp_primary, + hp_secondaries, + hp_last_drops) NON_MEMBER_JSON_SERIALIZATION(dsn::app_info, status, diff --git a/src/common/replication_common.cpp b/src/common/replication_common.cpp index c08670d7c2..e823772a4a 100644 --- a/src/common/replication_common.cpp +++ b/src/common/replication_common.cpp @@ -36,6 +36,7 @@ #include "common/replication_other_types.h" #include "dsn.layer2_types.h" #include "fmt/core.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/service_app.h" #include "utils/config_api.h" #include "utils/filesystem.h" @@ -161,33 +162,22 @@ int32_t replication_options::app_mutation_2pc_min_replica_count(int32_t app_max_ } } -/*static*/ bool replica_helper::remove_node(::dsn::rpc_address node, - /*inout*/ std::vector<::dsn::rpc_address> &nodeList) -{ - auto it = std::find(nodeList.begin(), nodeList.end(), node); - if (it != nodeList.end()) { - nodeList.erase(it); - return true; - } else { - return false; - } -} - /*static*/ bool replica_helper::get_replica_config(const partition_configuration &partition_config, - ::dsn::rpc_address node, + ::dsn::host_port node, /*out*/ replica_configuration &replica_config) { replica_config.pid = partition_config.pid; replica_config.primary = partition_config.primary; replica_config.ballot = partition_config.ballot; replica_config.learner_signature = invalid_signature; + replica_config.__set_hp_primary(partition_config.hp_primary); - if (node == partition_config.primary) { + if (node == partition_config.hp_primary) { replica_config.status = partition_status::PS_PRIMARY; return true; - } else if (std::find(partition_config.secondaries.begin(), - partition_config.secondaries.end(), - node) != partition_config.secondaries.end()) { + } else if (std::find(partition_config.hp_secondaries.begin(), + partition_config.hp_secondaries.end(), + node) != partition_config.hp_secondaries.end()) { replica_config.status = partition_status::PS_SECONDARY; return true; } else { @@ -196,7 +186,7 @@ int32_t replication_options::app_mutation_2pc_min_replica_count(int32_t app_max_ } } -bool replica_helper::load_meta_servers(/*out*/ std::vector &servers, +bool replica_helper::load_meta_servers(/*out*/ std::vector &servers, const char *section, const char *key) { @@ -205,19 +195,22 @@ bool replica_helper::load_meta_servers(/*out*/ std::vector &se std::vector host_ports; ::dsn::utils::split_args(server_list.c_str(), host_ports, ','); for (const auto &host_port : host_ports) { - auto addr = dsn::rpc_address::from_host_port(host_port); - if (!addr) { - LOG_ERROR("invalid address '{}' specified in config [{}]{}", host_port, section, key); + auto hp = dsn::host_port::from_string(host_port); + if (!hp) { + LOG_ERROR("invalid host_port '{}' specified in config [{}]{}", host_port, section, key); return false; } - servers.push_back(addr); + servers.push_back(hp); } - // TODO(yingchun): check there is no duplicates if (servers.empty()) { LOG_ERROR("no meta server specified in config [{}].{}", section, key); return false; } + if (servers.size() != host_ports.size()) { + LOG_ERROR("server_list {} have duplicate server", server_list); + return false; + } return true; } diff --git a/src/common/replication_common.h b/src/common/replication_common.h index 522bdf8af8..9d88b9d96a 100644 --- a/src/common/replication_common.h +++ b/src/common/replication_common.h @@ -32,7 +32,7 @@ #include #include "metadata_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_holder.h" #include "runtime/task/task.h" @@ -45,8 +45,8 @@ class query_app_info_response; class query_replica_info_request; class query_replica_info_response; -typedef std::unordered_map<::dsn::rpc_address, partition_status::type> node_statuses; -typedef std::unordered_map<::dsn::rpc_address, dsn::task_ptr> node_tasks; +typedef std::unordered_map<::dsn::host_port, partition_status::type> node_statuses; +typedef std::unordered_map<::dsn::host_port, dsn::task_ptr> node_tasks; typedef rpc_holder update_app_env_rpc; @@ -59,7 +59,7 @@ class replication_options static const std::string kRepsDir; static const std::string kReplicaAppType; - std::vector<::dsn::rpc_address> meta_servers; + std::vector<::dsn::host_port> meta_servers; std::string app_name; std::string app_dir; diff --git a/src/common/replication_other_types.h b/src/common/replication_other_types.h index b5f62205c3..ee441711f4 100644 --- a/src/common/replication_other_types.h +++ b/src/common/replication_other_types.h @@ -36,6 +36,8 @@ #include "consensus_types.h" #include "replica_admin_types.h" #include "common/replication_enums.h" +#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" namespace dsn { namespace replication { @@ -49,16 +51,17 @@ typedef int64_t decree; #define invalid_offset (-1LL) #define invalid_signature 0 -inline bool is_primary(const partition_configuration &pc, const rpc_address &node) +inline bool is_primary(const partition_configuration &pc, const host_port &node) { - return !node.is_invalid() && pc.primary == node; + return !node.is_invalid() && pc.hp_primary == node; } -inline bool is_secondary(const partition_configuration &pc, const rpc_address &node) +inline bool is_secondary(const partition_configuration &pc, const host_port &node) { return !node.is_invalid() && - std::find(pc.secondaries.begin(), pc.secondaries.end(), node) != pc.secondaries.end(); + std::find(pc.hp_secondaries.begin(), pc.hp_secondaries.end(), node) != + pc.hp_secondaries.end(); } -inline bool is_member(const partition_configuration &pc, const rpc_address &node) +inline bool is_member(const partition_configuration &pc, const host_port &node) { return is_primary(pc, node) || is_secondary(pc, node); } @@ -66,26 +69,36 @@ inline bool is_partition_config_equal(const partition_configuration &pc1, const partition_configuration &pc2) { // secondaries no need to be same order - for (const rpc_address &addr : pc1.secondaries) + for (const host_port &addr : pc1.hp_secondaries) if (!is_secondary(pc2, addr)) return false; // last_drops is not considered into equality check return pc1.ballot == pc2.ballot && pc1.pid == pc2.pid && pc1.max_replica_count == pc2.max_replica_count && pc1.primary == pc2.primary && - pc1.secondaries.size() == pc2.secondaries.size() && + pc1.hp_primary == pc2.hp_primary && pc1.secondaries.size() == pc2.secondaries.size() && + pc1.hp_secondaries.size() == pc2.hp_secondaries.size() && pc1.last_committed_decree == pc2.last_committed_decree; } class replica_helper { public: - static bool remove_node(::dsn::rpc_address node, - /*inout*/ std::vector<::dsn::rpc_address> &nodeList); + template + static bool remove_node(const T node, + /*inout*/ std::vector &nodes) + { + auto it = std::find(nodes.begin(), nodes.end(), node); + if (it != nodes.end()) { + nodes.erase(it); + return true; + } + return false; + } static bool get_replica_config(const partition_configuration &partition_config, - ::dsn::rpc_address node, + ::dsn::host_port node, /*out*/ replica_configuration &replica_config); // true if meta_list's value of config is valid, otherwise return false - static bool load_meta_servers(/*out*/ std::vector &servers, + static bool load_meta_servers(/*out*/ std::vector &servers, const char *section = "meta_server", const char *key = "server_list"); }; diff --git a/src/failure_detector/failure_detector.cpp b/src/failure_detector/failure_detector.cpp index f76e3adc00..bd528b5d85 100644 --- a/src/failure_detector/failure_detector.cpp +++ b/src/failure_detector/failure_detector.cpp @@ -37,6 +37,8 @@ #include "failure_detector/fd.code.definition.h" #include "fd_types.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/serverlet.h" #include "runtime/task/async_calls.h" #include "runtime/task/task_spec.h" @@ -119,7 +121,7 @@ void failure_detector::stop() _workers.clear(); } -void failure_detector::register_master(::dsn::rpc_address target) +void failure_detector::register_master(::dsn::host_port target) { bool setup_timer = false; @@ -153,8 +155,8 @@ void failure_detector::register_master(::dsn::rpc_address target) } } -bool failure_detector::switch_master(::dsn::rpc_address from, - ::dsn::rpc_address to, +bool failure_detector::switch_master(::dsn::host_port from, + ::dsn::host_port to, uint32_t delay_milliseconds) { /* the caller of switch master shoud lock necessarily to protect _masters */ @@ -192,7 +194,7 @@ bool failure_detector::switch_master(::dsn::rpc_address from, bool failure_detector::is_time_greater_than(uint64_t ts, uint64_t base) { return ts > base; } -void failure_detector::report(::dsn::rpc_address node, bool is_master, bool is_connected) +void failure_detector::report(::dsn::host_port node, bool is_master, bool is_connected) { LOG_INFO( "{} {}connected: {}", is_master ? "master" : "worker", is_connected ? "" : "dis", node); @@ -216,7 +218,7 @@ void failure_detector::check_all_records() return; } - std::vector expire; + std::vector expire; { zauto_lock l(_lock); @@ -302,28 +304,28 @@ void failure_detector::check_all_records() } } -void failure_detector::add_allow_list(::dsn::rpc_address node) +void failure_detector::add_allow_list(::dsn::host_port node) { zauto_lock l(_lock); _allow_list.insert(node); } -bool failure_detector::remove_from_allow_list(::dsn::rpc_address node) +bool failure_detector::remove_from_allow_list(::dsn::host_port node) { zauto_lock l(_lock); return _allow_list.erase(node) > 0; } -void failure_detector::set_allow_list(const std::vector &replica_addrs) +void failure_detector::set_allow_list(const std::vector &replica_hps) { CHECK(!_is_started, "FD is already started, the allow list should really not be modified"); - std::vector nodes; - for (const auto &addr : replica_addrs) { - const auto node = dsn::rpc_address::from_host_port(addr); + std::vector nodes; + for (auto &hp : replica_hps) { + const auto node = dsn::host_port::from_string(hp); if (!node) { LOG_WARNING("replica_white_list has invalid ip {}, the allow list won't be modified", - addr); + hp); return; } nodes.push_back(node); @@ -351,33 +353,38 @@ std::string failure_detector::get_allow_list(const std::vector &arg void failure_detector::on_ping_internal(const beacon_msg &beacon, /*out*/ beacon_ack &ack) { + host_port hp_from_node, hp_to_node; + GET_HOST_PORT(beacon, from_node, hp_from_node); + GET_HOST_PORT(beacon, to_node, hp_to_node); + ack.time = beacon.time; - ack.this_node = beacon.to_addr; + ack.this_node = beacon.to_node; + ack.__set_hp_this_node(hp_to_node); ack.primary_node = dsn_primary_address(); + ack.__set_hp_primary_node(dsn_primary_host_port()); ack.is_master = true; ack.allowed = true; zauto_lock l(_lock); uint64_t now = dsn_now_ms(); - auto node = beacon.from_addr; - worker_map::iterator itr = _workers.find(node); + worker_map::iterator itr = _workers.find(hp_from_node); if (itr == _workers.end()) { // if is a new worker, check allow list first if need - if (_use_allow_list && _allow_list.find(node) == _allow_list.end()) { - LOG_WARNING("new worker[{}] is rejected", node); + if (_use_allow_list && _allow_list.find(hp_from_node) == _allow_list.end()) { + LOG_WARNING("new worker[{}] is rejected", hp_from_node); ack.allowed = false; return; } // create new entry for node - worker_record record(node, now); + worker_record record(hp_from_node, now); record.is_alive = true; - _workers.insert(std::make_pair(node, record)); + _workers.insert(std::make_pair(hp_from_node, record)); - report(node, false, true); - on_worker_connected(node); + report(hp_from_node, false, true); + on_worker_connected(hp_from_node); } else if (is_time_greater_than(now, itr->second.last_beacon_recv_time)) { // update last_beacon_recv_time itr->second.last_beacon_recv_time = now; @@ -389,8 +396,8 @@ void failure_detector::on_ping_internal(const beacon_msg &beacon, /*out*/ beacon if (itr->second.is_alive == false) { itr->second.is_alive = true; - report(node, false, true); - on_worker_connected(node); + report(hp_from_node, false, true); + on_worker_connected(hp_from_node); } } else { LOG_INFO("now[{}] <= last_recv_time[{}]", now, itr->second.last_beacon_recv_time); @@ -414,33 +421,40 @@ bool failure_detector::end_ping_internal(::dsn::error_code err, const beacon_ack /* * the caller of the end_ping_internal should lock necessarily!!! */ + host_port hp_this_node, hp_primary_node; + GET_HOST_PORT(ack, this_node, hp_this_node); + GET_HOST_PORT(ack, primary_node, hp_primary_node); + uint64_t beacon_send_time = ack.time; - auto node = ack.this_node; if (err != ERR_OK) { LOG_WARNING("ping master({}) failed, timeout_ms = {}, err = {}", - node, + hp_this_node, _beacon_timeout_milliseconds, err); METRIC_VAR_INCREMENT(beacon_failed_count); } - master_map::iterator itr = _masters.find(node); + master_map::iterator itr = _masters.find(hp_this_node); if (itr == _masters.end()) { LOG_WARNING("received beacon ack without corresponding master, ignore it, " - "remote_master[{}], local_worker[{}]", - node, + "remote_master[{}({})], local_worker[{}({})]", + hp_this_node, + ack.this_node, + dsn_primary_host_port(), dsn_primary_address()); return false; } master_record &record = itr->second; if (!ack.allowed) { - LOG_WARNING( - "worker rejected, stop sending beacon message, remote_master[{}], local_worker[{}]", - node, - dsn_primary_address()); + LOG_WARNING("worker rejected, stop sending beacon message, remote_master[{}({})], " + "local_worker[{}({})]", + hp_this_node, + ack.this_node, + dsn_primary_host_port(), + dsn_primary_address()); record.rejected = true; record.send_beacon_timer->cancel(true); return false; @@ -461,8 +475,11 @@ bool failure_detector::end_ping_internal(::dsn::error_code err, const beacon_ack // if ack is not from master meta, worker should not update its last send time if (!ack.is_master) { - LOG_WARNING( - "node[{}] is not master, ack.primary_node[{}] is real master", node, ack.primary_node); + LOG_WARNING("node[{}({})] is not master, ack.primary_node[{}({})] is real master", + hp_this_node, + ack.this_node, + hp_primary_node, + ack.primary_node); return true; } @@ -479,15 +496,15 @@ bool failure_detector::end_ping_internal(::dsn::error_code err, const beacon_ack if (!record.is_alive && is_time_greater_than(now, record.last_send_time_for_beacon_with_ack) && now - record.last_send_time_for_beacon_with_ack <= _lease_milliseconds) { // report master connected - report(node, true, true); + report(hp_this_node, true, true); itr->second.is_alive = true; - on_master_connected(node); + on_master_connected(hp_this_node); } return true; } -bool failure_detector::unregister_master(::dsn::rpc_address node) +bool failure_detector::unregister_master(::dsn::host_port node) { zauto_lock l(_lock); auto it = _masters.find(node); @@ -503,7 +520,7 @@ bool failure_detector::unregister_master(::dsn::rpc_address node) } } -bool failure_detector::is_master_connected(::dsn::rpc_address node) const +bool failure_detector::is_master_connected(::dsn::host_port node) const { zauto_lock l(_lock); auto it = _masters.find(node); @@ -513,7 +530,7 @@ bool failure_detector::is_master_connected(::dsn::rpc_address node) const return false; } -void failure_detector::register_worker(::dsn::rpc_address target, bool is_connected) +void failure_detector::register_worker(::dsn::host_port target, bool is_connected) { /* * callers should use the fd::_lock necessarily @@ -529,7 +546,7 @@ void failure_detector::register_worker(::dsn::rpc_address target, bool is_connec } } -bool failure_detector::unregister_worker(::dsn::rpc_address node) +bool failure_detector::unregister_worker(::dsn::host_port node) { /* * callers should use the fd::_lock necessarily @@ -555,7 +572,7 @@ void failure_detector::clear_workers() _workers.clear(); } -bool failure_detector::is_worker_connected(::dsn::rpc_address node) const +bool failure_detector::is_worker_connected(::dsn::host_port node) const { zauto_lock l(_lock); auto it = _workers.find(node); @@ -565,18 +582,25 @@ bool failure_detector::is_worker_connected(::dsn::rpc_address node) const return false; } -void failure_detector::send_beacon(::dsn::rpc_address target, uint64_t time) +void failure_detector::send_beacon(::dsn::host_port target, uint64_t time) { + const auto &addr_target = dsn::dns_resolver::instance().resolve_address(target); beacon_msg beacon; beacon.time = time; - beacon.from_addr = dsn_primary_address(); - beacon.to_addr = target; + beacon.from_node = dsn_primary_address(); + beacon.__set_hp_from_node(dsn_primary_host_port()); + beacon.to_node = addr_target; + beacon.__set_hp_to_node(target); beacon.__set_start_time(static_cast(dsn::utils::process_start_millis())); - LOG_INFO( - "send ping message, from[{}], to[{}], time[{}]", beacon.from_addr, beacon.to_addr, time); + LOG_INFO("send ping message, from[{}({})], to[{}({})], time[{}]", + beacon.hp_from_node, + beacon.from_node, + beacon.hp_to_node, + beacon.to_node, + time); - ::dsn::rpc::call(target, + ::dsn::rpc::call(addr_target, RPC_FD_FAILURE_DETECTOR_PING, beacon, &_tracker, @@ -584,8 +608,10 @@ void failure_detector::send_beacon(::dsn::rpc_address target, uint64_t time) if (err != ::dsn::ERR_OK) { beacon_ack ack; ack.time = beacon.time; - ack.this_node = beacon.to_addr; + ack.this_node = beacon.to_node; + ack.__set_hp_this_node(beacon.hp_to_node); ack.primary_node.set_invalid(); + ack.__set_hp_primary_node(host_port()); ack.is_master = false; ack.allowed = true; end_ping(err, ack, nullptr); diff --git a/src/failure_detector/failure_detector.h b/src/failure_detector/failure_detector.h index b1c896360f..af50122d2b 100644 --- a/src/failure_detector/failure_detector.h +++ b/src/failure_detector/failure_detector.h @@ -35,7 +35,7 @@ #include "failure_detector/fd.client.h" #include "failure_detector/fd.server.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_code.h" #include "runtime/task/task_tracker.h" @@ -63,12 +63,12 @@ class failure_detector_callback virtual ~failure_detector_callback() {} // worker side - virtual void on_master_disconnected(const std::vector<::dsn::rpc_address> &nodes) = 0; - virtual void on_master_connected(::dsn::rpc_address node) = 0; + virtual void on_master_disconnected(const std::vector<::dsn::host_port> &nodes) = 0; + virtual void on_master_connected(::dsn::host_port node) = 0; // master side - virtual void on_worker_disconnected(const std::vector<::dsn::rpc_address> &nodes) = 0; - virtual void on_worker_connected(::dsn::rpc_address node) = 0; + virtual void on_worker_disconnected(const std::vector<::dsn::host_port> &nodes) = 0; + virtual void on_worker_connected(::dsn::host_port node) = 0; }; // The interface for a perfect failure detector. @@ -121,32 +121,32 @@ class failure_detector : public failure_detector_service, uint32_t get_lease_ms() const { return _lease_milliseconds; } uint32_t get_grace_ms() const { return _grace_milliseconds; } - void register_master(::dsn::rpc_address target); + void register_master(::dsn::host_port target); - bool switch_master(::dsn::rpc_address from, ::dsn::rpc_address to, uint32_t delay_milliseconds); + bool switch_master(::dsn::host_port from, ::dsn::host_port to, uint32_t delay_milliseconds); - bool unregister_master(::dsn::rpc_address node); + bool unregister_master(::dsn::host_port node); - virtual bool is_master_connected(::dsn::rpc_address node) const; + virtual bool is_master_connected(::dsn::host_port node) const; // ATTENTION: be very careful to set is_connected to false as // workers are always considered *connected* initially which is ok even when workers think // master is disconnected // Considering workers *disconnected* initially is *dangerous* coz it may violate the invariance // when workers think they are online - void register_worker(::dsn::rpc_address node, bool is_connected = true); + void register_worker(::dsn::host_port node, bool is_connected = true); - bool unregister_worker(::dsn::rpc_address node); + bool unregister_worker(::dsn::host_port node); void clear_workers(); - virtual bool is_worker_connected(::dsn::rpc_address node) const; + virtual bool is_worker_connected(::dsn::host_port node) const; - void add_allow_list(::dsn::rpc_address node); + void add_allow_list(::dsn::host_port node); - bool remove_from_allow_list(::dsn::rpc_address node); + bool remove_from_allow_list(::dsn::host_port node); - void set_allow_list(const std::vector &replica_addrs); + void set_allow_list(const std::vector &replica_hps); std::string get_allow_list(const std::vector &args) const; @@ -162,7 +162,7 @@ class failure_detector : public failure_detector_service, bool is_time_greater_than(uint64_t ts, uint64_t base); - void report(::dsn::rpc_address node, bool is_master, bool is_connected); + void report(::dsn::host_port node, bool is_master, bool is_connected); private: void check_all_records(); @@ -171,7 +171,7 @@ class failure_detector : public failure_detector_service, class master_record { public: - ::dsn::rpc_address node; + ::dsn::host_port node; uint64_t last_send_time_for_beacon_with_ack; bool is_alive; bool rejected; @@ -179,7 +179,7 @@ class failure_detector : public failure_detector_service, // masters are always considered *disconnected* initially which is ok even when master // thinks workers are connected - master_record(::dsn::rpc_address n, uint64_t last_send_time_for_beacon_with_ack_) + master_record(::dsn::host_port n, uint64_t last_send_time_for_beacon_with_ack_) { node = n; last_send_time_for_beacon_with_ack = last_send_time_for_beacon_with_ack_; @@ -191,13 +191,13 @@ class failure_detector : public failure_detector_service, class worker_record { public: - ::dsn::rpc_address node; + ::dsn::host_port node; uint64_t last_beacon_recv_time; bool is_alive; // workers are always considered *connected* initially which is ok even when workers think // master is disconnected - worker_record(::dsn::rpc_address node, uint64_t last_beacon_recv_time) + worker_record(::dsn::host_port node, uint64_t last_beacon_recv_time) { this->node = node; this->last_beacon_recv_time = last_beacon_recv_time; @@ -206,11 +206,11 @@ class failure_detector : public failure_detector_service, }; private: - typedef std::unordered_map<::dsn::rpc_address, master_record> master_map; - typedef std::unordered_map<::dsn::rpc_address, worker_record> worker_map; + typedef std::unordered_map<::dsn::host_port, master_record> master_map; + typedef std::unordered_map<::dsn::host_port, worker_record> worker_map; // allow list are set on machine name (port can vary) - typedef std::unordered_set<::dsn::rpc_address> allow_list; + typedef std::unordered_set<::dsn::host_port> allow_list; master_map _masters; worker_map _workers; @@ -235,7 +235,7 @@ class failure_detector : public failure_detector_service, dsn::task_tracker _tracker; // subClass can rewrite these method. - virtual void send_beacon(::dsn::rpc_address node, uint64_t time); + virtual void send_beacon(::dsn::host_port node, uint64_t time); }; } } // end namespace diff --git a/src/failure_detector/failure_detector_multimaster.cpp b/src/failure_detector/failure_detector_multimaster.cpp index 66b5d190e4..32bca4b422 100644 --- a/src/failure_detector/failure_detector_multimaster.cpp +++ b/src/failure_detector/failure_detector_multimaster.cpp @@ -29,8 +29,7 @@ #include "failure_detector/failure_detector_multimaster.h" #include "fd_types.h" -#include "runtime/rpc/group_address.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/error_code.h" #include "utils/rand.h" @@ -38,83 +37,90 @@ namespace dsn { namespace dist { slave_failure_detector_with_multimaster::slave_failure_detector_with_multimaster( - std::vector<::dsn::rpc_address> &meta_servers, + std::vector<::dsn::host_port> &meta_servers, std::function &&master_disconnected_callback, std::function &&master_connected_callback) { _meta_servers.assign_group("meta-servers"); for (const auto &s : meta_servers) { - if (!_meta_servers.group_address()->add(s)) { + if (!_meta_servers.group_host_port()->add(s)) { LOG_WARNING("duplicate adress {}", s); } } - _meta_servers.group_address()->set_leader( + _meta_servers.group_host_port()->set_leader( meta_servers[rand::next_u32(0, (uint32_t)meta_servers.size() - 1)]); // ATTENTION: here we disable dsn_group_set_update_leader_automatically to avoid // failure detecting logic is affected by rpc failure or rpc forwarding. - _meta_servers.group_address()->set_update_leader_automatically(false); + _meta_servers.group_host_port()->set_update_leader_automatically(false); _master_disconnected_callback = std::move(master_disconnected_callback); _master_connected_callback = std::move(master_connected_callback); } -void slave_failure_detector_with_multimaster::set_leader_for_test(rpc_address meta) +void slave_failure_detector_with_multimaster::set_leader_for_test(host_port meta) { - _meta_servers.group_address()->set_leader(meta); + _meta_servers.group_host_port()->set_leader(meta); } void slave_failure_detector_with_multimaster::end_ping(::dsn::error_code err, const fd::beacon_ack &ack, void *) { - LOG_INFO("end ping result, error[{}], time[{}], ack.this_node[{}], ack.primary_node[{}], " - "ack.is_master[{}], ack.allowed[{}]", - err, - ack.time, - ack.this_node, - ack.primary_node, - ack.is_master ? "true" : "false", - ack.allowed ? "true" : "false"); + host_port hp_this_node, hp_primary_node; + GET_HOST_PORT(ack, this_node, hp_this_node); + GET_HOST_PORT(ack, primary_node, hp_primary_node); + + LOG_INFO( + "end ping result, error[{}], time[{}], ack.this_node[{}({})], ack.primary_node[{}({})], " + "ack.is_master[{}], ack.allowed[{}]", + err, + ack.time, + hp_this_node, + ack.this_node, + hp_primary_node, + ack.primary_node, + ack.is_master ? "true" : "false", + ack.allowed ? "true" : "false"); zauto_lock l(failure_detector::_lock); if (!failure_detector::end_ping_internal(err, ack)) return; - CHECK_EQ(ack.this_node, _meta_servers.group_address()->leader()); + CHECK_EQ(hp_this_node, _meta_servers.group_host_port()->leader()); if (ERR_OK != err) { - rpc_address next = _meta_servers.group_address()->next(ack.this_node); - if (next != ack.this_node) { - _meta_servers.group_address()->set_leader(next); + auto next = _meta_servers.group_host_port()->next(hp_this_node); + if (next != hp_this_node) { + _meta_servers.group_host_port()->set_leader(next); // do not start next send_beacon() immediately to avoid send rpc too frequently - switch_master(ack.this_node, next, 1000); + switch_master(hp_this_node, next, 1000); } } else { if (ack.is_master) { // do nothing - } else if (ack.primary_node.is_invalid()) { - rpc_address next = _meta_servers.group_address()->next(ack.this_node); - if (next != ack.this_node) { - _meta_servers.group_address()->set_leader(next); + } else if (hp_primary_node.is_invalid()) { + auto next = _meta_servers.group_host_port()->next(hp_this_node); + if (next != hp_this_node) { + _meta_servers.group_host_port()->set_leader(next); // do not start next send_beacon() immediately to avoid send rpc too frequently - switch_master(ack.this_node, next, 1000); + switch_master(hp_this_node, next, 1000); } } else { - _meta_servers.group_address()->set_leader(ack.primary_node); + _meta_servers.group_host_port()->set_leader(hp_primary_node); // start next send_beacon() immediately because the leader is possibly right. - switch_master(ack.this_node, ack.primary_node, 0); + switch_master(hp_this_node, hp_primary_node, 0); } } } // client side void slave_failure_detector_with_multimaster::on_master_disconnected( - const std::vector<::dsn::rpc_address> &nodes) + const std::vector<::dsn::host_port> &nodes) { bool primary_disconnected = false; - rpc_address leader = _meta_servers.group_address()->leader(); + const auto &leader = _meta_servers.group_host_port()->leader(); for (auto it = nodes.begin(); it != nodes.end(); ++it) { if (leader == *it) primary_disconnected = true; @@ -125,13 +131,13 @@ void slave_failure_detector_with_multimaster::on_master_disconnected( } } -void slave_failure_detector_with_multimaster::on_master_connected(::dsn::rpc_address node) +void slave_failure_detector_with_multimaster::on_master_connected(::dsn::host_port node) { /* * well, this is called in on_ping_internal, which is called by rep::end_ping. * So this function is called in the lock context of fd::_lock */ - bool is_primary = (_meta_servers.group_address()->leader() == node); + bool is_primary = (_meta_servers.group_host_port()->leader() == node); if (is_primary) { _master_connected_callback(); } diff --git a/src/failure_detector/failure_detector_multimaster.h b/src/failure_detector/failure_detector_multimaster.h index bfaedfcadf..a431cb4d3c 100644 --- a/src/failure_detector/failure_detector_multimaster.h +++ b/src/failure_detector/failure_detector_multimaster.h @@ -27,16 +27,18 @@ #pragma once #include +#include #include #include "failure_detector/failure_detector.h" -#include "runtime/rpc/group_address.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/group_host_port.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fmt_logging.h" #include "utils/zlocks.h" namespace dsn { class error_code; + namespace fd { class beacon_ack; } // namespace fd @@ -46,7 +48,7 @@ namespace dist { class slave_failure_detector_with_multimaster : public dsn::fd::failure_detector { public: - slave_failure_detector_with_multimaster(std::vector<::dsn::rpc_address> &meta_servers, + slave_failure_detector_with_multimaster(std::vector<::dsn::host_port> &meta_servers, std::function &&master_disconnected_callback, std::function &&master_connected_callback); virtual ~slave_failure_detector_with_multimaster() {} @@ -54,35 +56,35 @@ class slave_failure_detector_with_multimaster : public dsn::fd::failure_detector void end_ping(::dsn::error_code err, const fd::beacon_ack &ack, void *context) override; // client side - void on_master_disconnected(const std::vector<::dsn::rpc_address> &nodes) override; - void on_master_connected(::dsn::rpc_address node) override; + void on_master_disconnected(const std::vector<::dsn::host_port> &nodes) override; + void on_master_connected(::dsn::host_port node) override; // server side - void on_worker_disconnected(const std::vector<::dsn::rpc_address> &nodes) override + void on_worker_disconnected(const std::vector<::dsn::host_port> &nodes) override { CHECK(false, "invalid execution flow"); } - void on_worker_connected(::dsn::rpc_address node) override + void on_worker_connected(::dsn::host_port node) override { CHECK(false, "invalid execution flow"); } - ::dsn::rpc_address current_server_contact() const; - ::dsn::rpc_address get_servers() const { return _meta_servers; } + ::dsn::host_port current_server_contact() const; + host_port get_servers() const { return _meta_servers; } - void set_leader_for_test(dsn::rpc_address meta); + void set_leader_for_test(dsn::host_port meta); private: - dsn::rpc_address _meta_servers; + host_port _meta_servers; std::function _master_disconnected_callback; std::function _master_connected_callback; }; //------------------ inline implementation -------------------------------- -inline ::dsn::rpc_address slave_failure_detector_with_multimaster::current_server_contact() const +inline ::dsn::host_port slave_failure_detector_with_multimaster::current_server_contact() const { zauto_lock l(failure_detector::_lock); - return _meta_servers.group_address()->leader(); + return _meta_servers.group_host_port()->leader(); } } } // end namespace diff --git a/src/failure_detector/fd.thrift b/src/failure_detector/fd.thrift index a85a38a67b..b6549d883b 100644 --- a/src/failure_detector/fd.thrift +++ b/src/failure_detector/fd.thrift @@ -30,23 +30,28 @@ namespace cpp dsn.fd struct beacon_msg { - 1: i64 time; - 2: dsn.rpc_address from_addr; - 3: dsn.rpc_address to_addr; - 4: optional i64 start_time; + 1: i64 time; + 2: dsn.rpc_address from_node; + 3: dsn.rpc_address to_node; + 4: optional i64 start_time; + 5: optional dsn.host_port hp_from_node; + 6: optional dsn.host_port hp_to_node; } struct beacon_ack { - 1: i64 time; - 2: dsn.rpc_address this_node; - 3: dsn.rpc_address primary_node; - 4: bool is_master; - 5: bool allowed; + 1: i64 time; + 2: dsn.rpc_address this_node; + 3: dsn.rpc_address primary_node; + 4: bool is_master; + 5: bool allowed; + 6: optional dsn.host_port hp_this_node; + 7: optional dsn.host_port hp_primary_node; } struct config_master_message { - 1: dsn.rpc_address master; - 2: bool is_register; + 1: dsn.rpc_address master; + 2: bool is_register; + 3: optional dsn.host_port hp_master; } diff --git a/src/failure_detector/test/failure_detector.cpp b/src/failure_detector/test/failure_detector.cpp index d2bc55f6ae..49c223e89b 100644 --- a/src/failure_detector/test/failure_detector.cpp +++ b/src/failure_detector/test/failure_detector.cpp @@ -48,9 +48,11 @@ #include "meta/meta_server_failure_detector.h" #include "replica/replica_stub.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/group_host_port.h" #include "runtime/rpc/network.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/serverlet.h" #include "runtime/service_app.h" @@ -82,11 +84,11 @@ class worker_fd_test : public ::dsn::dist::slave_failure_detector_with_multimast private: volatile bool _send_ping_switch; /* this function only triggerd once*/ - std::function _connected_cb; - std::function &)> _disconnected_cb; + std::function _connected_cb; + std::function &)> _disconnected_cb; protected: - virtual void send_beacon(::dsn::rpc_address node, uint64_t time) override + virtual void send_beacon(::dsn::host_port node, uint64_t time) override { if (_send_ping_switch) failure_detector::send_beacon(node, time); @@ -95,20 +97,20 @@ class worker_fd_test : public ::dsn::dist::slave_failure_detector_with_multimast } } - virtual void on_master_disconnected(const std::vector &nodes) override + virtual void on_master_disconnected(const std::vector &nodes) override { if (_disconnected_cb) _disconnected_cb(nodes); } - virtual void on_master_connected(rpc_address node) override + virtual void on_master_connected(host_port node) override { if (_connected_cb) _connected_cb(node); } public: - worker_fd_test(replication::replica_stub *stub, std::vector &meta_servers) + worker_fd_test(replication::replica_stub *stub, std::vector &meta_servers) : slave_failure_detector_with_multimaster(meta_servers, [=]() { stub->on_meta_server_disconnected(); }, [=]() { stub->on_meta_server_connected(); }) @@ -116,8 +118,8 @@ class worker_fd_test : public ::dsn::dist::slave_failure_detector_with_multimast _send_ping_switch = false; } void toggle_send_ping(bool toggle) { _send_ping_switch = toggle; } - void when_connected(const std::function &func) { _connected_cb = func; } - void when_disconnected(const std::function &nodes)> &func) + void when_connected(const std::function &func) { _connected_cb = func; } + void when_disconnected(const std::function &nodes)> &func) { _disconnected_cb = func; } @@ -131,8 +133,8 @@ class worker_fd_test : public ::dsn::dist::slave_failure_detector_with_multimast class master_fd_test : public replication::meta_server_failure_detector { private: - std::function _connected_cb; - std::function &)> _disconnected_cb; + std::function _connected_cb; + std::function &)> _disconnected_cb; volatile bool _response_ping_switch; public: @@ -143,32 +145,32 @@ class master_fd_test : public replication::meta_server_failure_detector else { LOG_DEBUG("ignore on ping, beacon msg, time[{}], from[{}], to[{}]", beacon.time, - beacon.from_addr, - beacon.to_addr); + beacon.from_node, + beacon.to_node); } } - virtual void on_worker_disconnected(const std::vector &worker_list) override + virtual void on_worker_disconnected(const std::vector &worker_list) override { if (_disconnected_cb) _disconnected_cb(worker_list); } - virtual void on_worker_connected(rpc_address node) override + virtual void on_worker_connected(host_port node) override { if (_connected_cb) _connected_cb(node); } - master_fd_test() : meta_server_failure_detector(rpc_address(), false) + master_fd_test() : meta_server_failure_detector(host_port(), false) { _response_ping_switch = true; } void toggle_response_ping(bool toggle) { _response_ping_switch = toggle; } - void when_connected(const std::function &func) { _connected_cb = func; } - void when_disconnected(const std::function &nodes)> &func) + void when_connected(const std::function &func) { _connected_cb = func; } + void when_disconnected(const std::function &nodes)> &func) { _disconnected_cb = func; } - void test_register_worker(rpc_address node) + void test_register_worker(host_port node) { zauto_lock l(failure_detector::_lock); register_worker(node); @@ -187,9 +189,9 @@ class test_worker : public service_app, public serverlet error_code start(const std::vector &args) override { - std::vector master_group; + std::vector master_group; for (int i = 0; i < 3; ++i) - master_group.push_back(rpc_address::from_host_port("localhost", MPORT_START + i)); + master_group.push_back(host_port("localhost", MPORT_START + i)); _worker_fd = new worker_fd_test(nullptr, master_group); _worker_fd->start(1, 1, 9, 10); ++started_apps; @@ -206,10 +208,14 @@ class test_worker : public service_app, public serverlet LOG_DEBUG("master config, request: {}, type: {}", request.master, request.is_register ? "reg" : "unreg"); + + host_port hp_master; + GET_HOST_PORT(request, master, hp_master); + if (request.is_register) - _worker_fd->register_master(request.master); + _worker_fd->register_master(hp_master); else - _worker_fd->unregister_master(request.master); + _worker_fd->unregister_master(hp_master); response = true; } @@ -227,7 +233,6 @@ class test_master : public service_app { FLAGS_stable_rs_min_running_seconds = 10; FLAGS_max_succssive_unstable_restart = 10; - _master_fd = new master_fd_test(); _master_fd->set_options(&_opts); bool use_allow_list = false; @@ -236,7 +241,8 @@ class test_master : public service_app utils::split_args(args[2].c_str(), ports, ','); for (auto &port : ports) { rpc_address addr(network::get_local_ipv4(), std::stoi(port)); - _master_fd->add_allow_list(addr); + const auto hp = ::dsn::host_port::from_address(addr); + _master_fd->add_allow_list(hp); } use_allow_list = true; } @@ -305,23 +311,22 @@ bool get_worker_and_master(test_worker *&worker, std::vector &mas void master_group_set_leader(std::vector &master_group, int leader_index) { - const auto leader_addr = - rpc_address::from_host_port("localhost", static_cast(MPORT_START + leader_index)); + const auto hp_leader = host_port("localhost", MPORT_START + leader_index); int i = 0; for (test_master *&master : master_group) { - master->fd()->set_leader_for_test(leader_addr, leader_index == i); + master->fd()->set_leader_for_test(hp_leader, leader_index == i); i++; } } void worker_set_leader(test_worker *worker, int leader_contact) { - worker->fd()->set_leader_for_test( - rpc_address::from_host_port("localhost", MPORT_START + leader_contact)); + worker->fd()->set_leader_for_test(host_port("localhost", MPORT_START + leader_contact)); config_master_message msg; msg.master = rpc_address::from_host_port("localhost", MPORT_START + leader_contact); msg.is_register = true; + msg.__set_hp_master(host_port::from_address(msg.master)); error_code err; bool response; std::tie(err, response) = rpc::call_wait( @@ -331,11 +336,13 @@ void worker_set_leader(test_worker *worker, int leader_contact) void clear(test_worker *worker, std::vector masters) { - rpc_address leader = worker->fd()->get_servers().group_address()->leader(); + const auto &hp_leader = worker->fd()->get_servers().group_host_port()->leader(); + const auto &leader = dsn::dns_resolver::instance().resolve_address(hp_leader); config_master_message msg; msg.master = leader; msg.is_register = false; + msg.__set_hp_master(hp_leader); error_code err; bool response; std::tie(err, response) = rpc::call_wait( @@ -356,18 +363,17 @@ void finish(test_worker *worker, test_master *master, int master_index) std::atomic_int wait_count; wait_count.store(2); worker->fd()->when_disconnected( - [&wait_count, master_index](const std::vector &addr_list) mutable { + [&wait_count, master_index](const std::vector &addr_list) mutable { ASSERT_EQ(addr_list.size(), 1); ASSERT_EQ(addr_list[0].port(), MPORT_START + master_index); --wait_count; }); - master->fd()->when_disconnected( - [&wait_count](const std::vector &addr_list) mutable { - ASSERT_EQ(addr_list.size(), 1); - ASSERT_EQ(addr_list[0].port(), WPORT); - --wait_count; - }); + master->fd()->when_disconnected([&wait_count](const std::vector &addr_list) mutable { + ASSERT_EQ(addr_list.size(), 1); + ASSERT_EQ(addr_list[0].port(), WPORT); + --wait_count; + }); // we don't send any ping message now worker->fd()->toggle_send_ping(false); @@ -392,11 +398,11 @@ TEST(fd, dummy_connect_disconnect) // simply wait for two connected std::atomic_int wait_count; wait_count.store(2); - worker->fd()->when_connected([&wait_count](rpc_address leader) mutable { + worker->fd()->when_connected([&wait_count](host_port leader) mutable { ASSERT_EQ(leader.port(), MPORT_START); --wait_count; }); - leader->fd()->when_connected([&wait_count](rpc_address worker_addr) mutable { + leader->fd()->when_connected([&wait_count](host_port worker_addr) mutable { ASSERT_EQ(worker_addr.port(), WPORT); --wait_count; }); @@ -426,8 +432,8 @@ TEST(fd, master_redirect) wait_count.store(2); /* although we contact to the first master, but in the end we must connect to the right leader */ - worker->fd()->when_connected([&wait_count](rpc_address leader) mutable { --wait_count; }); - leader->fd()->when_connected([&wait_count](rpc_address worker_addr) mutable { + worker->fd()->when_connected([&wait_count](host_port leader) mutable { --wait_count; }); + leader->fd()->when_connected([&wait_count](host_port worker_addr) mutable { ASSERT_EQ(worker_addr.port(), WPORT); --wait_count; }); @@ -463,7 +469,7 @@ TEST(fd, switch_new_master_suddenly) std::atomic_int wait_count; wait_count.store(2); - auto cb = [&wait_count](rpc_address) mutable { --wait_count; }; + auto cb = [&wait_count](host_port) mutable { --wait_count; }; worker->fd()->when_connected(cb); tst_master->fd()->when_connected(cb); @@ -482,7 +488,7 @@ TEST(fd, switch_new_master_suddenly) */ tst_master->fd()->clear_workers(); wait_count.store(1); - tst_master->fd()->when_connected([&wait_count](rpc_address addr) mutable { + tst_master->fd()->when_connected([&wait_count](host_port addr) mutable { ASSERT_EQ(addr.port(), WPORT); --wait_count; }); @@ -518,7 +524,7 @@ TEST(fd, old_master_died) std::atomic_int wait_count; wait_count.store(2); - auto cb = [&wait_count](rpc_address) mutable { --wait_count; }; + auto cb = [&wait_count](host_port) mutable { --wait_count; }; worker->fd()->when_connected(cb); tst_master->fd()->when_connected(cb); @@ -529,7 +535,7 @@ TEST(fd, old_master_died) worker->fd()->when_connected(nullptr); tst_master->fd()->when_connected(nullptr); - worker->fd()->when_disconnected([](const std::vector &masters_list) { + worker->fd()->when_disconnected([](const std::vector &masters_list) { ASSERT_EQ(masters_list.size(), 1); LOG_DEBUG("disconnect from master: {}", masters_list[0]); }); @@ -544,7 +550,7 @@ TEST(fd, old_master_died) tst_master->fd()->clear_workers(); wait_count.store(1); - tst_master->fd()->when_connected([&wait_count](rpc_address addr) mutable { + tst_master->fd()->when_connected([&wait_count](host_port addr) mutable { EXPECT_EQ(addr.port(), WPORT); --wait_count; }); @@ -580,7 +586,7 @@ TEST(fd, worker_died_when_switch_master) std::atomic_int wait_count; wait_count.store(2); - auto cb = [&wait_count](rpc_address) mutable { --wait_count; }; + auto cb = [&wait_count](host_port) mutable { --wait_count; }; worker->fd()->when_connected(cb); tst_master->fd()->when_connected(cb); @@ -600,19 +606,19 @@ TEST(fd, worker_died_when_switch_master) wait_count.store(2); tst_master->fd()->when_disconnected( - [&wait_count](const std::vector &worker_list) mutable { + [&wait_count](const std::vector &worker_list) mutable { ASSERT_EQ(worker_list.size(), 1); ASSERT_EQ(worker_list[0].port(), WPORT); wait_count--; }); worker->fd()->when_disconnected( - [&wait_count](const std::vector &master_list) mutable { + [&wait_count](const std::vector &master_list) mutable { ASSERT_EQ(master_list.size(), 1); wait_count--; }); /* we assume the worker is alive */ - tst_master->fd()->test_register_worker(rpc_address::from_host_port("localhost", WPORT)); + tst_master->fd()->test_register_worker(host_port("localhost", WPORT)); master_group_set_leader(masters, index); /* then stop the worker*/ @@ -652,8 +658,10 @@ TEST(fd, update_stability) dsn::rpc_replier r(create_fake_rpc_response()); beacon_msg msg; - msg.from_addr = rpc_address::from_host_port("localhost", 123); - msg.to_addr = rpc_address::from_host_port("localhost", MPORT_START); + msg.from_node = rpc_address::from_host_port("localhost", 123); + msg.__set_hp_from_node(host_port("localhost", 123)); + msg.to_node = rpc_address::from_host_port("localhost", MPORT_START); + msg.__set_hp_to_node(host_port("localhost", MPORT_START)); msg.time = dsn_now_ms(); msg.__isset.start_time = true; msg.start_time = 1000; @@ -661,10 +669,10 @@ TEST(fd, update_stability) // first on ping fd->on_ping(msg, r); ASSERT_EQ(1, smap->size()); - ASSERT_NE(smap->end(), smap->find(msg.from_addr)); + ASSERT_NE(smap->end(), smap->find(msg.hp_from_node)); replication::meta_server_failure_detector::worker_stability &ws = - smap->find(msg.from_addr)->second; + smap->find(msg.hp_from_node)->second; ASSERT_EQ(0, ws.unstable_restart_count); ASSERT_EQ(msg.start_time, ws.last_start_time_ms); ASSERT_TRUE(r.is_empty()); @@ -732,7 +740,7 @@ TEST(fd, update_stability) ASSERT_FALSE(r.is_empty()); // reset stat - fd->reset_stability_stat(msg.from_addr); + fd->reset_stability_stat(msg.hp_from_node); ASSERT_EQ(msg.start_time, ws.last_start_time_ms); ASSERT_EQ(0, ws.unstable_restart_count); } @@ -751,7 +759,7 @@ TEST(fd, not_in_whitelist) std::atomic_int wait_count; wait_count.store(1); - auto cb = [&wait_count](rpc_address) mutable { --wait_count; }; + auto cb = [&wait_count](host_port) mutable { --wait_count; }; worker->fd()->when_connected(cb); worker->fd()->toggle_send_ping(true); diff --git a/src/geo/test/geo_test.cpp b/src/geo/test/geo_test.cpp index bb4cc92da6..a553122e4e 100644 --- a/src/geo/test/geo_test.cpp +++ b/src/geo/test/geo_test.cpp @@ -40,7 +40,7 @@ #include "geo/lib/geo_client.h" #include "gtest/gtest.h" #include "pegasus/client.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/error_code.h" #include "utils/flags.h" @@ -61,7 +61,7 @@ class geo_client_test : public ::testing::Test public: geo_client_test() { - std::vector meta_list; + std::vector meta_list; bool ok = dsn::replication::replica_helper::load_meta_servers( meta_list, dsn::PEGASUS_CLUSTER_SECTION_NAME.c_str(), "onebox"); CHECK(ok, "load_meta_servers failed"); diff --git a/src/include/rrdb/rrdb.client.h b/src/include/rrdb/rrdb.client.h index 8792c08e7f..4645e781f4 100644 --- a/src/include/rrdb/rrdb.client.h +++ b/src/include/rrdb/rrdb.client.h @@ -39,7 +39,7 @@ class rrdb_client public: rrdb_client() {} explicit rrdb_client(const char *cluster_name, - const std::vector &meta_list, + const std::vector &meta_list, const char *app_name) { _resolver = diff --git a/src/meta/app_balance_policy.cpp b/src/meta/app_balance_policy.cpp index cafeed9adc..3f7a0a985e 100644 --- a/src/meta/app_balance_policy.cpp +++ b/src/meta/app_balance_policy.cpp @@ -36,7 +36,6 @@ DSN_DEFINE_bool(meta_server, "only try to make the primary balanced by move"); namespace dsn { -class rpc_address; namespace replication { app_balance_policy::app_balance_policy(meta_service *svc) : load_balance_policy(svc) @@ -117,7 +116,7 @@ bool app_balance_policy::copy_secondary(const std::shared_ptr &app, b int replicas_low = app->partition_count / _alive_nodes; std::unique_ptr operation = std::make_unique( - app, apps, nodes, address_vec, address_id, replicas_low); + app, apps, nodes, host_port_vec, host_port_id, replicas_low); return operation->start(_migration_result); } @@ -125,17 +124,18 @@ copy_secondary_operation::copy_secondary_operation( const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id, + const std::vector &host_port_vec, + const std::unordered_map &host_port_id, int replicas_low) - : copy_replica_operation(app, apps, nodes, address_vec, address_id), _replicas_low(replicas_low) + : copy_replica_operation(app, apps, nodes, host_port_vec, host_port_id), + _replicas_low(replicas_low) { } bool copy_secondary_operation::can_continue() { - int id_min = *_ordered_address_ids.begin(); - int id_max = *_ordered_address_ids.rbegin(); + int id_min = *_ordered_host_port_ids.begin(); + int id_max = *_ordered_host_port_ids.rbegin(); if (_partition_counts[id_max] <= _replicas_low || _partition_counts[id_max] - _partition_counts[id_min] <= 1) { LOG_INFO("{}: stop copy secondary coz it will be balanced later", _app->get_logname()); @@ -151,8 +151,8 @@ int copy_secondary_operation::get_partition_count(const node_state &ns) const bool copy_secondary_operation::can_select(gpid pid, migration_list *result) { - int id_max = *_ordered_address_ids.rbegin(); - const node_state &max_ns = _nodes.at(_address_vec[id_max]); + int id_max = *_ordered_host_port_ids.rbegin(); + const node_state &max_ns = _nodes.at(_host_port_vec[id_max]); if (max_ns.served_as(pid) == partition_status::PS_PRIMARY) { LOG_DEBUG("{}: skip gpid({}.{}) coz it is primary", _app->get_logname(), @@ -170,8 +170,8 @@ bool copy_secondary_operation::can_select(gpid pid, migration_list *result) return false; } - int id_min = *_ordered_address_ids.begin(); - const node_state &min_ns = _nodes.at(_address_vec[id_min]); + int id_min = *_ordered_host_port_ids.begin(); + const node_state &min_ns = _nodes.at(_host_port_vec[id_min]); if (min_ns.served_as(pid) != partition_status::PS_INACTIVE) { LOG_DEBUG("{}: skip gpid({}.{}) coz it is already a member on the target node", _app->get_logname(), diff --git a/src/meta/app_balance_policy.h b/src/meta/app_balance_policy.h index 595e4408fc..a97cb2a0f8 100644 --- a/src/meta/app_balance_policy.h +++ b/src/meta/app_balance_policy.h @@ -28,7 +28,7 @@ namespace dsn { class gpid; -class rpc_address; +class host_port; namespace replication { class meta_service; @@ -59,8 +59,8 @@ class copy_secondary_operation : public copy_replica_operation copy_secondary_operation(const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id, + const std::vector &address_vec, + const std::unordered_map &address_id, int replicas_low); ~copy_secondary_operation() = default; diff --git a/src/meta/backup_engine.cpp b/src/meta/backup_engine.cpp index 88791eb5e3..6090964a8a 100644 --- a/src/meta/backup_engine.cpp +++ b/src/meta/backup_engine.cpp @@ -36,8 +36,9 @@ #include "meta/meta_data.h" #include "meta/meta_service.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/async_calls.h" #include "runtime/task/task.h" #include "runtime/task/task_code.h" @@ -169,7 +170,7 @@ error_code backup_engine::backup_app_meta() void backup_engine::backup_app_partition(const gpid &pid) { - dsn::rpc_address partition_primary; + dsn::host_port partition_primary; { zauto_read_lock l; _backup_service->get_state()->lock_read(l); @@ -181,7 +182,7 @@ void backup_engine::backup_app_partition(const gpid &pid) _is_backup_failed = true; return; } - partition_primary = app->partitions[pid.get_partition_index()].primary; + partition_primary = app->partitions[pid.get_partition_index()].hp_primary; } if (partition_primary.is_invalid()) { @@ -214,10 +215,11 @@ void backup_engine::backup_app_partition(const gpid &pid) pid, partition_primary); backup_rpc rpc(std::move(req), RPC_COLD_BACKUP, 10000_ms, 0, pid.thread_hash()); - rpc.call( - partition_primary, &_tracker, [this, rpc, pid, partition_primary](error_code err) mutable { - on_backup_reply(err, rpc.response(), pid, partition_primary); - }); + rpc.call(dsn::dns_resolver::instance().resolve_address(partition_primary), + &_tracker, + [this, rpc, pid, partition_primary](error_code err) mutable { + on_backup_reply(err, rpc.response(), pid, partition_primary); + }); zauto_lock l(_lock); _backup_status[pid.get_partition_index()] = backup_status::ALIVE; @@ -251,7 +253,7 @@ inline void backup_engine::retry_backup(const dsn::gpid pid) void backup_engine::on_backup_reply(const error_code err, const backup_response &response, const gpid pid, - const rpc_address &primary) + const host_port &primary) { { zauto_lock l(_lock); diff --git a/src/meta/backup_engine.h b/src/meta/backup_engine.h index 20033c5da0..a762f3cc76 100644 --- a/src/meta/backup_engine.h +++ b/src/meta/backup_engine.h @@ -31,7 +31,8 @@ namespace dsn { class blob; class gpid; -class rpc_address; +class host_port; + namespace dist { namespace block_service { class block_filesystem; @@ -96,7 +97,7 @@ class backup_engine void on_backup_reply(error_code err, const backup_response &response, gpid pid, - const rpc_address &primary); + const host_port &primary); void write_backup_info(); void complete_current_backup(); void handle_replica_backup_failed(const backup_response &response, const gpid pid); diff --git a/src/meta/cluster_balance_policy.cpp b/src/meta/cluster_balance_policy.cpp index 6a33cd30a7..8b2788e711 100644 --- a/src/meta/cluster_balance_policy.cpp +++ b/src/meta/cluster_balance_policy.cpp @@ -26,6 +26,7 @@ #include "dsn.layer2_types.h" #include "meta/load_balance_policy.h" +#include "runtime/rpc/dns_resolver.h" #include "utils/flags.h" #include "utils/fmt_logging.h" #include "utils/utils.h" @@ -64,7 +65,7 @@ uint32_t get_partition_count(const node_state &ns, balance_type type, int32_t ap return (uint32_t)count; } -uint32_t get_skew(const std::map &count_map) +uint32_t get_skew(const std::map &count_map) { uint32_t min = UINT_MAX, max = 0; for (const auto &kv : count_map) { @@ -78,11 +79,11 @@ uint32_t get_skew(const std::map &count_map) return max - min; } -void get_min_max_set(const std::map &node_count_map, - /*out*/ std::set &min_set, - /*out*/ std::set &max_set) +void get_min_max_set(const std::map &node_count_map, + /*out*/ std::set &min_set, + /*out*/ std::set &max_set) { - std::multimap count_multimap = utils::flip_map(node_count_map); + std::multimap count_multimap = utils::flip_map(node_count_map); auto range = count_multimap.equal_range(count_multimap.begin()->first); for (auto iter = range.first; iter != range.second; ++iter) { @@ -222,14 +223,14 @@ bool cluster_balance_policy::get_app_migration_info(std::shared_ptr a info.app_name = app->app_name; info.partitions.resize(app->partitions.size()); for (auto i = 0; i < app->partitions.size(); ++i) { - std::map pstatus_map; - pstatus_map[app->partitions[i].primary] = partition_status::PS_PRIMARY; - if (app->partitions[i].secondaries.size() != app->partitions[i].max_replica_count - 1) { + std::map pstatus_map; + pstatus_map[app->partitions[i].hp_primary] = partition_status::PS_PRIMARY; + if (app->partitions[i].hp_secondaries.size() != app->partitions[i].max_replica_count - 1) { // partition is unhealthy return false; } - for (const auto &addr : app->partitions[i].secondaries) { - pstatus_map[addr] = partition_status::PS_SECONDARY; + for (const auto &hp : app->partitions[i].hp_secondaries) { + pstatus_map[hp] = partition_status::PS_SECONDARY; } info.partitions[i] = pstatus_map; } @@ -237,7 +238,7 @@ bool cluster_balance_policy::get_app_migration_info(std::shared_ptr a for (const auto &it : nodes) { const node_state &ns = it.second; auto count = get_partition_count(ns, type, app->app_id); - info.replicas_count[ns.addr()] = count; + info.replicas_count[ns.host_port()] = count; } return true; @@ -247,12 +248,12 @@ void cluster_balance_policy::get_node_migration_info(const node_state &ns, const app_mapper &apps, /*out*/ node_migration_info &info) { - info.address = ns.addr(); + info.hp = ns.host_port(); for (const auto &iter : apps) { std::shared_ptr app = iter.second; for (const auto &context : app->helpers->contexts) { std::string disk_tag; - if (!context.get_disk_tag(ns.addr(), disk_tag)) { + if (!context.get_disk_tag(ns.host_port(), disk_tag)) { continue; } auto pid = context.config_owner->pid; @@ -290,8 +291,8 @@ bool cluster_balance_policy::get_next_move(const cluster_migration_info &cluster * a move that improves the app skew and the cluster skew, if possible. If * not, attempt to pick a move that improves the app skew. **/ - std::set cluster_min_count_nodes; - std::set cluster_max_count_nodes; + std::set cluster_min_count_nodes; + std::set cluster_max_count_nodes; get_min_max_set(cluster_info.replicas_count, cluster_min_count_nodes, cluster_max_count_nodes); bool found = false; @@ -303,8 +304,8 @@ bool cluster_balance_policy::get_next_move(const cluster_migration_info &cluster continue; } auto app_map = it->second.replicas_count; - std::set app_min_count_nodes; - std::set app_max_count_nodes; + std::set app_min_count_nodes; + std::set app_max_count_nodes; get_min_max_set(app_map, app_min_count_nodes, app_max_count_nodes); /** @@ -312,9 +313,9 @@ bool cluster_balance_policy::get_next_move(const cluster_migration_info &cluster * with the replica servers most loaded overall, and likewise for least loaded. * These are our ideal candidates for moving from and to, respectively. **/ - std::set app_cluster_min_set = + std::set app_cluster_min_set = utils::get_intersection(app_min_count_nodes, cluster_min_count_nodes); - std::set app_cluster_max_set = + std::set app_cluster_max_set = utils::get_intersection(app_max_count_nodes, cluster_max_count_nodes); /** @@ -323,7 +324,7 @@ bool cluster_balance_policy::get_next_move(const cluster_migration_info &cluster * replicas of the app. Moving a replica in that case might keep the * cluster skew the same or make it worse while keeping the app balanced. **/ - std::multimap app_count_multimap = utils::flip_map(app_map); + std::multimap app_count_multimap = utils::flip_map(app_map); if (app_count_multimap.rbegin()->first <= app_count_multimap.begin()->first + 1 && (app_cluster_min_set.empty() || app_cluster_max_set.empty())) { LOG_INFO("do not move replicas of a balanced app({}) if the least (most) loaded " @@ -356,8 +357,8 @@ auto select_random(const S &s, size_t n) } bool cluster_balance_policy::pick_up_move(const cluster_migration_info &cluster_info, - const std::set &max_nodes, - const std::set &min_nodes, + const std::set &max_nodes, + const std::set &min_nodes, const int32_t app_id, const partition_set &selected_pid, /*out*/ move_info &move_info) @@ -373,19 +374,19 @@ bool cluster_balance_policy::pick_up_move(const cluster_migration_info &cluster_ max_load_disk.node, max_load_disk.disk_tag, max_load_disk.partitions.size()); - for (const auto &node_addr : min_nodes) { + for (const auto &node_hp : min_nodes) { gpid picked_pid; if (pick_up_partition( - cluster_info, node_addr, max_load_disk.partitions, selected_pid, picked_pid)) { + cluster_info, node_hp, max_load_disk.partitions, selected_pid, picked_pid)) { move_info.pid = picked_pid; move_info.source_node = max_load_disk.node; move_info.source_disk_tag = max_load_disk.disk_tag; - move_info.target_node = node_addr; + move_info.target_node = node_hp; move_info.type = cluster_info.type; LOG_INFO("partition[{}] will migrate from {} to {}", picked_pid, max_load_disk.node, - node_addr); + node_hp); return true; } } @@ -398,22 +399,22 @@ bool cluster_balance_policy::pick_up_move(const cluster_migration_info &cluster_ void cluster_balance_policy::get_max_load_disk_set( const cluster_migration_info &cluster_info, - const std::set &max_nodes, + const std::set &max_nodes, const int32_t app_id, /*out*/ std::set &max_load_disk_set) { // key: partition count (app_disk_info.partitions.size()) // value: app_disk_info structure std::multimap app_disk_info_multimap; - for (const auto &node_addr : max_nodes) { + for (const auto &node_hp : max_nodes) { // key: disk_tag - // value: partition set for app(app id=app_id) in node(addr=node_addr) + // value: partition set for app(app id=app_id) in node(hp=node_hp) std::map disk_partitions = - get_disk_partitions_map(cluster_info, node_addr, app_id); + get_disk_partitions_map(cluster_info, node_hp, app_id); for (const auto &kv : disk_partitions) { app_disk_info info; info.app_id = app_id; - info.node = node_addr; + info.node = node_hp; info.disk_tag = kv.first; info.partitions = kv.second; app_disk_info_multimap.insert( @@ -427,11 +428,11 @@ void cluster_balance_policy::get_max_load_disk_set( } std::map cluster_balance_policy::get_disk_partitions_map( - const cluster_migration_info &cluster_info, const rpc_address &addr, const int32_t app_id) + const cluster_migration_info &cluster_info, const host_port &hp, const int32_t app_id) { std::map disk_partitions; auto app_iter = cluster_info.apps_info.find(app_id); - auto node_iter = cluster_info.nodes_info.find(addr); + auto node_iter = cluster_info.nodes_info.find(hp); if (app_iter == cluster_info.apps_info.end() || node_iter == cluster_info.nodes_info.end()) { return disk_partitions; } @@ -447,7 +448,7 @@ std::map cluster_balance_policy::get_disk_partitions continue; } auto status_map = app_partition[pid.get_partition_index()]; - auto iter = status_map.find(addr); + auto iter = status_map.find(hp); if (iter != status_map.end() && iter->second == status) { disk_partitions[disk_tag].insert(pid); } @@ -457,7 +458,7 @@ std::map cluster_balance_policy::get_disk_partitions } bool cluster_balance_policy::pick_up_partition(const cluster_migration_info &cluster_info, - const rpc_address &min_node_addr, + const host_port &min_node_hp, const partition_set &max_load_partitions, const partition_set &selected_pid, /*out*/ gpid &picked_pid) @@ -476,7 +477,7 @@ bool cluster_balance_policy::pick_up_partition(const cluster_migration_info &clu // partition has already been primary or secondary on min_node app_migration_info info = iter->second; - if (info.get_partition_status(pid.get_partition_index(), min_node_addr) != + if (info.get_partition_status(pid.get_partition_index(), min_node_hp) != partition_status::PS_INACTIVE) { continue; } @@ -494,7 +495,7 @@ bool cluster_balance_policy::apply_move(const move_info &move, /*out*/ cluster_migration_info &cluster_info) { int32_t app_id = move.pid.get_app_id(); - rpc_address source = move.source_node, target = move.target_node; + host_port source = move.source_node, target = move.target_node; if (cluster_info.apps_skew.find(app_id) == cluster_info.apps_skew.end() || cluster_info.replicas_count.find(source) == cluster_info.replicas_count.end() || cluster_info.replicas_count.find(target) == cluster_info.replicas_count.end() || @@ -512,10 +513,10 @@ bool cluster_balance_policy::apply_move(const move_info &move, app_info.replicas_count[target]++; auto &pmap = app_info.partitions[move.pid.get_partition_index()]; - rpc_address primary_addr; + host_port primary_hp; for (const auto &kv : pmap) { if (kv.second == partition_status::PS_PRIMARY) { - primary_addr = kv.first; + primary_hp = kv.first; } } auto status = cluster_info.type == balance_type::COPY_SECONDARY ? partition_status::PS_SECONDARY @@ -544,10 +545,15 @@ bool cluster_balance_policy::apply_move(const move_info &move, // add into migration list and selected_pid partition_configuration pc; pc.pid = move.pid; - pc.primary = primary_addr; - list[move.pid] = generate_balancer_request(*_global_view->apps, pc, move.type, source, target); + pc.hp_primary = primary_hp; + const auto &source_addr = dsn::dns_resolver::instance().resolve_address(source); + const auto &target_addr = dsn::dns_resolver::instance().resolve_address(target); + list[move.pid] = generate_balancer_request( + *_global_view->apps, pc, move.type, source_addr, target_addr, source, target); _migration_result->emplace( - move.pid, generate_balancer_request(*_global_view->apps, pc, move.type, source, target)); + move.pid, + generate_balancer_request( + *_global_view->apps, pc, move.type, source_addr, target_addr, source, target)); selected_pids.insert(move.pid); cluster_info.apps_skew[app_id] = get_skew(app_info.replicas_count); diff --git a/src/meta/cluster_balance_policy.h b/src/meta/cluster_balance_policy.h index 474935d897..196bf5a9b0 100644 --- a/src/meta/cluster_balance_policy.h +++ b/src/meta/cluster_balance_policy.h @@ -32,17 +32,17 @@ #include "load_balance_policy.h" #include "meta/meta_data.h" #include "metadata_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" namespace dsn { namespace replication { class meta_service; uint32_t get_partition_count(const node_state &ns, balance_type type, int32_t app_id); -uint32_t get_skew(const std::map &count_map); -void get_min_max_set(const std::map &node_count_map, - /*out*/ std::set &min_set, - /*out*/ std::set &max_set); +uint32_t get_skew(const std::map &count_map); +void get_min_max_set(const std::map &node_count_map, + /*out*/ std::set &min_set, + /*out*/ std::set &max_set); class cluster_balance_policy : public load_balance_policy { @@ -79,19 +79,19 @@ class cluster_balance_policy : public load_balance_policy const partition_set &selected_pid, /*out*/ move_info &next_move); bool pick_up_move(const cluster_migration_info &cluster_info, - const std::set &max_nodes, - const std::set &min_nodes, + const std::set &max_nodes, + const std::set &min_nodes, const int32_t app_id, const partition_set &selected_pid, /*out*/ move_info &move_info); void get_max_load_disk_set(const cluster_migration_info &cluster_info, - const std::set &max_nodes, + const std::set &max_nodes, const int32_t app_id, /*out*/ std::set &max_load_disk_set); std::map get_disk_partitions_map( - const cluster_migration_info &cluster_info, const rpc_address &addr, const int32_t app_id); + const cluster_migration_info &cluster_info, const host_port &node, const int32_t app_id); bool pick_up_partition(const cluster_migration_info &cluster_info, - const rpc_address &min_node_addr, + const host_port &min_node_hp, const partition_set &max_load_partitions, const partition_set &selected_pid, /*out*/ gpid &picked_pid); @@ -104,8 +104,8 @@ class cluster_balance_policy : public load_balance_policy { int32_t app_id; std::string app_name; - std::vector> partitions; - std::map replicas_count; + std::vector> partitions; + std::map replicas_count; bool operator<(const app_migration_info &another) const { if (app_id < another.app_id) @@ -116,10 +116,10 @@ class cluster_balance_policy : public load_balance_policy { return app_id == another.app_id; } - partition_status::type get_partition_status(int32_t pidx, rpc_address addr) + partition_status::type get_partition_status(int32_t pidx, host_port node) { for (const auto &kv : partitions[pidx]) { - if (kv.first == addr) { + if (kv.first == node) { return kv.second; } } @@ -129,18 +129,12 @@ class cluster_balance_policy : public load_balance_policy struct node_migration_info { - rpc_address address; + host_port hp; // key-disk tag, value-partition set std::map partitions; partition_set future_partitions; - bool operator<(const node_migration_info &another) const - { - return address < another.address; - } - bool operator==(const node_migration_info &another) const - { - return address == another.address; - } + bool operator<(const node_migration_info &another) const { return hp < another.hp; } + bool operator==(const node_migration_info &another) const { return hp == another.hp; } }; struct cluster_migration_info @@ -148,14 +142,14 @@ class cluster_balance_policy : public load_balance_policy balance_type type; std::map apps_skew; std::map apps_info; - std::map nodes_info; - std::map replicas_count; + std::map nodes_info; + std::map replicas_count; }; struct app_disk_info { int32_t app_id; - rpc_address node; + host_port node; std::string disk_tag; partition_set partitions; bool operator==(const app_disk_info &another) const @@ -174,9 +168,9 @@ class cluster_balance_policy : public load_balance_policy struct move_info { gpid pid; - rpc_address source_node; + host_port source_node; std::string source_disk_tag; - rpc_address target_node; + host_port target_node; balance_type type; }; diff --git a/src/meta/duplication/duplication_info.cpp b/src/meta/duplication/duplication_info.cpp index 7f9a295361..1735269157 100644 --- a/src/meta/duplication/duplication_info.cpp +++ b/src/meta/duplication/duplication_info.cpp @@ -212,7 +212,7 @@ duplication_info_s_ptr duplication_info::decode_from_blob(dupid_t dup_id, if (!json::json_forwarder::decode(json, info)) { return nullptr; } - std::vector meta_list; + std::vector meta_list; if (!dsn::replication::replica_helper::load_meta_servers( meta_list, duplication_constants::kClustersSectionName.c_str(), info.remote.c_str())) { return nullptr; diff --git a/src/meta/duplication/duplication_info.h b/src/meta/duplication/duplication_info.h index fc32e05826..029ca1f2f2 100644 --- a/src/meta/duplication/duplication_info.h +++ b/src/meta/duplication/duplication_info.h @@ -31,7 +31,7 @@ #include "common/json_helper.h" #include "common/replication_other_types.h" #include "duplication_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" @@ -58,7 +58,7 @@ class duplication_info int32_t partition_count, uint64_t create_now_ms, std::string follower_cluster_name, - std::vector &&follower_cluster_metas, + std::vector &&follower_cluster_metas, std::string meta_store_path) : id(dupid), app_id(appid), @@ -247,7 +247,7 @@ class duplication_info const int32_t partition_count{0}; const std::string follower_cluster_name; - const std::vector follower_cluster_metas; + const std::vector follower_cluster_metas; const std::string store_path; // store path on meta service = get_duplication_path(app, dupid) const uint64_t create_timestamp_ms{0}; // the time when this dup is created. const std::string prefix_for_log; diff --git a/src/meta/duplication/meta_duplication_service.cpp b/src/meta/duplication/meta_duplication_service.cpp index e5cf61d0dd..62f03d734b 100644 --- a/src/meta/duplication/meta_duplication_service.cpp +++ b/src/meta/duplication/meta_duplication_service.cpp @@ -34,8 +34,10 @@ #include "meta_duplication_service.h" #include "metadata_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/group_host_port.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -176,7 +178,7 @@ void meta_duplication_service::add_duplication(duplication_add_rpc rpc) return; } - std::vector meta_list; + std::vector meta_list; if (!dsn::replication::replica_helper::load_meta_servers( meta_list, duplication_constants::kClustersSectionName.c_str(), @@ -273,7 +275,7 @@ void meta_duplication_service::duplication_sync(duplication_sync_rpc rpc) auto &response = rpc.response(); response.err = ERR_OK; - node_state *ns = get_node_state(_state->_nodes, request.node, false); + node_state *ns = get_node_state(_state->_nodes, host_port::from_address(request.node), false); if (ns == nullptr) { LOG_WARNING("node({}) is not found in meta server", request.node); response.err = ERR_OBJECT_NOT_FOUND; @@ -358,14 +360,14 @@ void meta_duplication_service::create_follower_app_for_duplication( request.options.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, _meta_svc->get_meta_list_string()); - rpc_address meta_servers; + host_port meta_servers; meta_servers.assign_group(dup->follower_cluster_name.c_str()); - meta_servers.group_address()->add_list(dup->follower_cluster_metas); + meta_servers.group_host_port()->add_list(dup->follower_cluster_metas); dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_CREATE_APP); dsn::marshall(msg, request); rpc::call( - meta_servers, + dsn::dns_resolver::instance().resolve_address(meta_servers), msg, _meta_svc->tracker(), [=](error_code err, configuration_create_app_response &&resp) mutable { @@ -406,16 +408,16 @@ void meta_duplication_service::create_follower_app_for_duplication( void meta_duplication_service::check_follower_app_if_create_completed( const std::shared_ptr &dup) { - rpc_address meta_servers; + host_port meta_servers; meta_servers.assign_group(dup->follower_cluster_name.c_str()); - meta_servers.group_address()->add_list(dup->follower_cluster_metas); + meta_servers.group_host_port()->add_list(dup->follower_cluster_metas); query_cfg_request meta_config_request; meta_config_request.app_name = dup->app_name; dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX); dsn::marshall(msg, meta_config_request); - rpc::call(meta_servers, + rpc::call(dsn::dns_resolver::instance().resolve_address(meta_servers), msg, _meta_svc->tracker(), [=](error_code err, query_cfg_response &&resp) mutable { @@ -427,6 +429,10 @@ void meta_duplication_service::check_follower_app_if_create_completed( p.primary = rpc_address::from_ip_port("127.0.0.1", 34801); p.secondaries.emplace_back(rpc_address::from_ip_port("127.0.0.2", 34801)); p.secondaries.emplace_back(rpc_address::from_ip_port("127.0.0.3", 34801)); + p.__set_hp_primary(host_port("localhost", 34801)); + p.__set_hp_secondaries(std::vector()); + p.hp_secondaries.emplace_back(host_port("localhost", 34802)); + p.hp_secondaries.emplace_back(host_port("localhost", 34803)); resp.partitions.emplace_back(p); } }); @@ -439,17 +445,17 @@ void meta_duplication_service::check_follower_app_if_create_completed( query_err = ERR_INCONSISTENT_STATE; } else { for (const auto &partition : resp.partitions) { - if (partition.primary.is_invalid()) { + if (partition.hp_primary.is_invalid()) { query_err = ERR_INACTIVE_STATE; break; } - if (partition.secondaries.empty()) { + if (partition.hp_secondaries.empty()) { query_err = ERR_NOT_ENOUGH_MEMBER; break; } - for (const auto &secondary : partition.secondaries) { + for (const auto &secondary : partition.hp_secondaries) { if (secondary.is_invalid()) { query_err = ERR_INACTIVE_STATE; break; @@ -522,7 +528,7 @@ void meta_duplication_service::do_update_partition_confirmed( std::shared_ptr meta_duplication_service::new_dup_from_init(const std::string &follower_cluster_name, - std::vector &&follower_cluster_metas, + std::vector &&follower_cluster_metas, std::shared_ptr &app) const { duplication_info_s_ptr dup; diff --git a/src/meta/duplication/meta_duplication_service.h b/src/meta/duplication/meta_duplication_service.h index e9aa2f4c80..5317d46ec2 100644 --- a/src/meta/duplication/meta_duplication_service.h +++ b/src/meta/duplication/meta_duplication_service.h @@ -31,7 +31,7 @@ #include "utils/fmt_logging.h" namespace dsn { -class rpc_address; +class host_port; class zrwlock_nr; namespace replication { @@ -122,7 +122,7 @@ class meta_duplication_service // Thread-Safe std::shared_ptr new_dup_from_init(const std::string &follower_cluster_name, - std::vector &&follower_cluster_metas, + std::vector &&follower_cluster_metas, std::shared_ptr &app) const; // get lock to protect access of app table diff --git a/src/meta/greedy_load_balancer.cpp b/src/meta/greedy_load_balancer.cpp index 48f79258aa..6fb7aaadb0 100644 --- a/src/meta/greedy_load_balancer.cpp +++ b/src/meta/greedy_load_balancer.cpp @@ -42,7 +42,7 @@ #include "meta/table_metrics.h" #include "meta_admin_types.h" #include "meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/command_manager.h" #include "utils/flags.h" #include "utils/fmt_logging.h" @@ -144,7 +144,7 @@ void greedy_load_balancer::score(meta_view view, double &primary_stddev, double bool greedy_load_balancer::all_replica_infos_collected(const node_state &ns) { - dsn::rpc_address n = ns.addr(); + const auto &n = ns.host_port(); return ns.for_each_partition([this, n](const dsn::gpid &pid) { config_context &cc = *get_config_context(*(t_global_view->apps), pid); if (cc.find_from_serving(n) == cc.serving.end()) { diff --git a/src/meta/greedy_load_balancer.h b/src/meta/greedy_load_balancer.h index d11801b2bd..45710963a2 100644 --- a/src/meta/greedy_load_balancer.h +++ b/src/meta/greedy_load_balancer.h @@ -33,9 +33,11 @@ #include "meta/meta_data.h" #include "meta_admin_types.h" #include "server_load_balancer.h" +#include "utils/fmt_utils.h" namespace dsn { class command_deregister; +class host_port; class rpc_address; namespace replication { @@ -83,15 +85,22 @@ class greedy_load_balancer : public server_load_balancer bool all_replica_infos_collected(const node_state &ns); }; -inline configuration_proposal_action -new_proposal_action(const rpc_address &target, const rpc_address &node, config_type::type type) +inline configuration_proposal_action new_proposal_action(const rpc_address &target, + const rpc_address &node, + const host_port &hp_target, + const host_port &hp_node, + config_type::type type) { configuration_proposal_action act; act.__set_target(target); act.__set_node(node); + act.__set_hp_target(hp_target); + act.__set_hp_node(hp_node); act.__set_type(type); return act; } } // namespace replication } // namespace dsn + +USER_DEFINED_STRUCTURE_FORMATTER(::dsn::replication::configuration_proposal_action); diff --git a/src/meta/load_balance_policy.cpp b/src/meta/load_balance_policy.cpp index 8cff520c94..f1c5864629 100644 --- a/src/meta/load_balance_policy.cpp +++ b/src/meta/load_balance_policy.cpp @@ -25,24 +25,27 @@ #include #include +#include "absl/strings/string_view.h" #include "dsn.layer2_types.h" #include "meta/greedy_load_balancer.h" #include "meta/meta_data.h" #include "meta_admin_types.h" +#include "runtime/rpc/dns_resolver.h" #include "utils/command_manager.h" #include "utils/fail_point.h" #include "utils/flags.h" #include "utils/fmt_logging.h" #include "utils/string_conv.h" -#include "absl/strings/string_view.h" #include "utils/strings.h" DSN_DECLARE_uint64(min_live_node_count_for_unfreeze); namespace dsn { +class rpc_address; + namespace replication { -void dump_disk_load(app_id id, const rpc_address &node, bool only_primary, const disk_load &load) +void dump_disk_load(app_id id, const host_port &node, bool only_primary, const disk_load &load) { std::ostringstream load_string; load_string << std::endl << "<<<<<<<<<<" << std::endl; @@ -63,7 +66,7 @@ void dump_disk_load(app_id id, const rpc_address &node, bool only_primary, const bool calc_disk_load(node_mapper &nodes, const app_mapper &apps, app_id id, - const rpc_address &node, + const host_port &node, bool only_primary, /*out*/ disk_load &load) { @@ -96,13 +99,12 @@ bool calc_disk_load(node_mapper &nodes, } } -std::unordered_map -get_node_loads(const std::shared_ptr &app, - const app_mapper &apps, - node_mapper &nodes, - bool only_primary) +std::unordered_map get_node_loads(const std::shared_ptr &app, + const app_mapper &apps, + node_mapper &nodes, + bool only_primary) { - std::unordered_map node_loads; + std::unordered_map node_loads; for (auto iter = nodes.begin(); iter != nodes.end(); ++iter) { if (!calc_disk_load( nodes, apps, app->app_id, iter->first, only_primary, node_loads[iter->first])) { @@ -116,7 +118,7 @@ get_node_loads(const std::shared_ptr &app, return node_loads; } -const std::string &get_disk_tag(const app_mapper &apps, const rpc_address &node, const gpid &pid) +const std::string &get_disk_tag(const app_mapper &apps, const host_port &node, const gpid &pid) { const config_context &cc = *get_config_context(apps, pid); auto iter = cc.find_from_serving(node); @@ -129,7 +131,9 @@ generate_balancer_request(const app_mapper &apps, const partition_configuration &pc, const balance_type &type, const rpc_address &from, - const rpc_address &to) + const rpc_address &to, + const host_port &hp_from, + const host_port &hp_to) { FAIL_POINT_INJECT_F("generate_balancer_request", [](absl::string_view name) { return nullptr; }); @@ -142,38 +146,40 @@ generate_balancer_request(const app_mapper &apps, case balance_type::MOVE_PRIMARY: ans = "move_primary"; result.balance_type = balancer_request_type::move_primary; + result.action_list.emplace_back(new_proposal_action( + from, from, hp_from, hp_from, config_type::CT_DOWNGRADE_TO_SECONDARY)); result.action_list.emplace_back( - new_proposal_action(from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); - result.action_list.emplace_back( - new_proposal_action(to, to, config_type::CT_UPGRADE_TO_PRIMARY)); + new_proposal_action(to, to, hp_to, hp_to, config_type::CT_UPGRADE_TO_PRIMARY)); break; case balance_type::COPY_PRIMARY: ans = "copy_primary"; result.balance_type = balancer_request_type::copy_primary; result.action_list.emplace_back( - new_proposal_action(from, to, config_type::CT_ADD_SECONDARY_FOR_LB)); + new_proposal_action(from, to, hp_from, hp_to, config_type::CT_ADD_SECONDARY_FOR_LB)); + result.action_list.emplace_back(new_proposal_action( + from, from, hp_from, hp_from, config_type::CT_DOWNGRADE_TO_SECONDARY)); result.action_list.emplace_back( - new_proposal_action(from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); + new_proposal_action(to, to, hp_to, hp_to, config_type::CT_UPGRADE_TO_PRIMARY)); result.action_list.emplace_back( - new_proposal_action(to, to, config_type::CT_UPGRADE_TO_PRIMARY)); - result.action_list.emplace_back(new_proposal_action(to, from, config_type::CT_REMOVE)); + new_proposal_action(to, from, hp_to, hp_from, config_type::CT_REMOVE)); break; case balance_type::COPY_SECONDARY: ans = "copy_secondary"; result.balance_type = balancer_request_type::copy_secondary; + result.action_list.emplace_back(new_proposal_action( + pc.primary, to, pc.hp_primary, hp_to, config_type::CT_ADD_SECONDARY_FOR_LB)); result.action_list.emplace_back( - new_proposal_action(pc.primary, to, config_type::CT_ADD_SECONDARY_FOR_LB)); - result.action_list.emplace_back( - new_proposal_action(pc.primary, from, config_type::CT_REMOVE)); + new_proposal_action(pc.primary, from, pc.hp_primary, hp_from, config_type::CT_REMOVE)); break; default: CHECK(false, ""); } - LOG_INFO("generate balancer: {} {} from {} of disk_tag({}) to {}", + LOG_INFO("generate balancer: {} {} from {}({}) of disk_tag({}) to {}", pc.pid, ans, + hp_from, from, - get_disk_tag(apps, from, pc.pid), + get_disk_tag(apps, hp_from, pc.pid), to); return std::make_shared(std::move(result)); } @@ -212,7 +218,7 @@ bool load_balance_policy::primary_balance(const std::shared_ptr &app, "too few alive nodes will lead to freeze"); LOG_INFO("primary balancer for app({}:{})", app->app_name, app->app_id); - auto graph = ford_fulkerson::builder(app, *_global_view->nodes, address_id).build(); + auto graph = ford_fulkerson::builder(app, *_global_view->nodes, host_port_id).build(); if (nullptr == graph) { LOG_DEBUG("the primaries are balanced for app({}:{})", app->app_name, app->app_id); return true; @@ -240,8 +246,8 @@ bool load_balance_policy::copy_primary(const std::shared_ptr &app, const app_mapper &apps = *_global_view->apps; int replicas_low = app->partition_count / _alive_nodes; - std::unique_ptr operation = std::make_unique( - app, apps, nodes, address_vec, address_id, still_have_less_than_average, replicas_low); + auto operation = std::make_unique( + app, apps, nodes, host_port_vec, host_port_id, still_have_less_than_average, replicas_low); return operation->start(_migration_result); } @@ -258,17 +264,17 @@ bool load_balance_policy::move_primary(std::unique_ptr path) int current = path->_prev.back(); if (!calc_disk_load( - nodes, apps, path->_app->app_id, address_vec[current], true, *current_load)) { + nodes, apps, path->_app->app_id, host_port_vec[current], true, *current_load)) { LOG_WARNING("stop move primary as some replica infos aren't collected, node({}), app({})", - address_vec[current], + host_port_vec[current], path->_app->get_logname()); return false; } int plan_moving = path->_flow.back(); while (path->_prev[current] != 0) { - rpc_address from = address_vec[path->_prev[current]]; - rpc_address to = address_vec[current]; + const auto &from = host_port_vec[path->_prev[current]]; + const auto &to = host_port_vec[current]; if (!calc_disk_load(nodes, apps, path->_app->app_id, from, true, *prev_load)) { LOG_WARNING( "stop move primary as some replica infos aren't collected, node({}), app({})", @@ -286,8 +292,8 @@ bool load_balance_policy::move_primary(std::unique_ptr path) } void load_balance_policy::start_moving_primary(const std::shared_ptr &app, - const rpc_address &from, - const rpc_address &to, + const host_port &from, + const host_port &to, int plan_moving, disk_load *prev_load, disk_load *current_load) @@ -308,8 +314,13 @@ void load_balance_policy::start_moving_primary(const std::shared_ptr const partition_configuration &pc = app->partitions[selected.get_partition_index()]; auto balancer_result = _migration_result->emplace( selected, - generate_balancer_request( - *_global_view->apps, pc, balance_type::MOVE_PRIMARY, from, to)); + generate_balancer_request(*_global_view->apps, + pc, + balance_type::MOVE_PRIMARY, + dsn::dns_resolver::instance().resolve_address(from), + dsn::dns_resolver::instance().resolve_address(to), + from, + to)); CHECK(balancer_result.second, "gpid({}) already inserted as an action", selected); --(*prev_load)[get_disk_tag(*_global_view->apps, from, selected)]; @@ -318,7 +329,7 @@ void load_balance_policy::start_moving_primary(const std::shared_ptr } std::list load_balance_policy::calc_potential_moving( - const std::shared_ptr &app, const rpc_address &from, const rpc_address &to) + const std::shared_ptr &app, const host_port &from, const host_port &to) { std::list potential_moving; const node_state &ns = _global_view->nodes->find(from)->second; @@ -335,8 +346,8 @@ std::list load_balance_policy::calc_potential_moving( dsn::gpid load_balance_policy::select_moving(std::list &potential_moving, disk_load *prev_load, disk_load *current_load, - rpc_address from, - rpc_address to) + host_port from, + host_port to) { std::list::iterator selected = potential_moving.end(); int max = std::numeric_limits::min(); @@ -471,27 +482,28 @@ void load_balance_policy::number_nodes(const node_mapper &nodes) { int current_id = 1; - address_id.clear(); - address_vec.resize(_alive_nodes + 2); + host_port_id.clear(); + host_port_vec.resize(_alive_nodes + 2); for (auto iter = nodes.begin(); iter != nodes.end(); ++iter) { - CHECK(!iter->first.is_invalid() && !iter->second.addr().is_invalid(), "invalid address"); + CHECK(!iter->first.is_invalid() && !iter->second.host_port().is_invalid(), + "invalid address"); CHECK(iter->second.alive(), "dead node"); - address_id[iter->first] = current_id; - address_vec[current_id] = iter->first; + host_port_id[iter->first] = current_id; + host_port_vec[current_id] = iter->first; ++current_id; } } ford_fulkerson::ford_fulkerson(const std::shared_ptr &app, const node_mapper &nodes, - const std::unordered_map &address_id, + const std::unordered_map &host_port_id, uint32_t higher_count, uint32_t lower_count, int replicas_low) : _app(app), _nodes(nodes), - _address_id(address_id), + _host_port_id(host_port_id), _higher_count(higher_count), _lower_count(lower_count), _replicas_low(replicas_low) @@ -526,7 +538,7 @@ void ford_fulkerson::make_graph() _graph_nodes = _nodes.size() + 2; _network.resize(_graph_nodes, std::vector(_graph_nodes, 0)); for (const auto &node : _nodes) { - int node_id = _address_id.at(node.first); + int node_id = _host_port_id.at(node.first); add_edge(node_id, node.second); update_decree(node_id, node.second); } @@ -547,9 +559,9 @@ void ford_fulkerson::update_decree(int node_id, const node_state &ns) { ns.for_each_primary(_app->app_id, [&, this](const gpid &pid) { const partition_configuration &pc = _app->partitions[pid.get_partition_index()]; - for (const auto &secondary : pc.secondaries) { - auto i = _address_id.find(secondary); - CHECK(i != _address_id.end(), "invalid secondary address, address = {}", secondary); + for (const auto &secondary : pc.hp_secondaries) { + auto i = _host_port_id.find(secondary); + CHECK(i != _host_port_id.end(), "invalid secondary address, address = {}", secondary); _network[node_id][i->second]++; } return true; @@ -619,15 +631,19 @@ copy_replica_operation::copy_replica_operation( const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id) - : _app(app), _apps(apps), _nodes(nodes), _address_vec(address_vec), _address_id(address_id) + const std::vector &host_port_vec, + const std::unordered_map &host_port_id) + : _app(app), + _apps(apps), + _nodes(nodes), + _host_port_vec(host_port_vec), + _host_port_id(host_port_id) { } bool copy_replica_operation::start(migration_list *result) { - init_ordered_address_ids(); + init_ordered_host_port_ids(); _node_loads = get_node_loads(_app, _apps, _nodes, only_copy_primary()); if (_node_loads.size() != _nodes.size()) { return false; @@ -641,9 +657,9 @@ bool copy_replica_operation::start(migration_list *result) gpid selected_pid = select_partition(result); if (selected_pid.get_app_id() != -1) { copy_once(selected_pid, result); - update_ordered_address_ids(); + update_ordered_host_port_ids(); } else { - _ordered_address_ids.erase(--_ordered_address_ids.end()); + _ordered_host_port_ids.erase(--_ordered_host_port_ids.end()); } } return true; @@ -651,8 +667,8 @@ bool copy_replica_operation::start(migration_list *result) const partition_set *copy_replica_operation::get_all_partitions() { - int id_max = *_ordered_address_ids.rbegin(); - const node_state &ns = _nodes.find(_address_vec[id_max])->second; + int id_max = *_ordered_host_port_ids.rbegin(); + const node_state &ns = _nodes.find(_host_port_vec[id_max])->second; const partition_set *partitions = ns.partitions(_app->app_id, only_copy_primary()); return partitions; } @@ -660,8 +676,8 @@ const partition_set *copy_replica_operation::get_all_partitions() gpid copy_replica_operation::select_max_load_gpid(const partition_set *partitions, migration_list *result) { - int id_max = *_ordered_address_ids.rbegin(); - const disk_load &load_on_max = _node_loads.at(_address_vec[id_max]); + int id_max = *_ordered_host_port_ids.rbegin(); + const auto &load_on_max = _node_loads.at(_host_port_vec[id_max]); gpid selected_pid(-1, -1); int max_load = -1; @@ -670,7 +686,7 @@ gpid copy_replica_operation::select_max_load_gpid(const partition_set *partition continue; } - const std::string &disk_tag = get_disk_tag(_apps, _address_vec[id_max], pid); + const std::string &disk_tag = get_disk_tag(_apps, _host_port_vec[id_max], pid); auto load = load_on_max.at(disk_tag); if (load > max_load) { selected_pid = pid; @@ -682,33 +698,39 @@ gpid copy_replica_operation::select_max_load_gpid(const partition_set *partition void copy_replica_operation::copy_once(gpid selected_pid, migration_list *result) { - auto from = _address_vec[*_ordered_address_ids.rbegin()]; - auto to = _address_vec[*_ordered_address_ids.begin()]; + const auto &from = _host_port_vec[*_ordered_host_port_ids.rbegin()]; + const auto &to = _host_port_vec[*_ordered_host_port_ids.begin()]; auto pc = _app->partitions[selected_pid.get_partition_index()]; - auto request = generate_balancer_request(_apps, pc, get_balance_type(), from, to); + auto request = generate_balancer_request(_apps, + pc, + get_balance_type(), + dsn::dns_resolver::instance().resolve_address(from), + dsn::dns_resolver::instance().resolve_address(to), + from, + to); result->emplace(selected_pid, request); } -void copy_replica_operation::update_ordered_address_ids() +void copy_replica_operation::update_ordered_host_port_ids() { - int id_min = *_ordered_address_ids.begin(); - int id_max = *_ordered_address_ids.rbegin(); + int id_min = *_ordered_host_port_ids.begin(); + int id_max = *_ordered_host_port_ids.rbegin(); --_partition_counts[id_max]; ++_partition_counts[id_min]; - _ordered_address_ids.erase(_ordered_address_ids.begin()); - _ordered_address_ids.erase(--_ordered_address_ids.end()); + _ordered_host_port_ids.erase(_ordered_host_port_ids.begin()); + _ordered_host_port_ids.erase(--_ordered_host_port_ids.end()); - _ordered_address_ids.insert(id_max); - _ordered_address_ids.insert(id_min); + _ordered_host_port_ids.insert(id_max); + _ordered_host_port_ids.insert(id_min); } -void copy_replica_operation::init_ordered_address_ids() +void copy_replica_operation::init_ordered_host_port_ids() { - _partition_counts.resize(_address_vec.size(), 0); + _partition_counts.resize(_host_port_vec.size(), 0); for (const auto &iter : _nodes) { - auto id = _address_id.at(iter.first); + auto id = _host_port_id.at(iter.first); _partition_counts[id] = get_partition_count(iter.second); } @@ -719,19 +741,21 @@ void copy_replica_operation::init_ordered_address_ids() : left < right; }); for (const auto &iter : _nodes) { - auto id = _address_id.at(iter.first); + auto id = _host_port_id.at(iter.first); ordered_queue.insert(id); } - _ordered_address_ids.swap(ordered_queue); + _ordered_host_port_ids.swap(ordered_queue); } gpid copy_replica_operation::select_partition(migration_list *result) { const partition_set *partitions = get_all_partitions(); - int id_max = *_ordered_address_ids.rbegin(); - const node_state &ns = _nodes.find(_address_vec[id_max])->second; - CHECK(partitions != nullptr && !partitions->empty(), "max load({}) shouldn't empty", ns.addr()); + int id_max = *_ordered_host_port_ids.rbegin(); + const node_state &ns = _nodes.find(_host_port_vec[id_max])->second; + CHECK(partitions != nullptr && !partitions->empty(), + "max load({}) shouldn't empty", + ns.host_port()); return select_max_load_gpid(partitions, result); } @@ -740,11 +764,11 @@ copy_primary_operation::copy_primary_operation( const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id, + const std::vector &host_port_vec, + const std::unordered_map &host_port_id, bool have_lower_than_average, int replicas_low) - : copy_replica_operation(app, apps, nodes, address_vec, address_id) + : copy_replica_operation(app, apps, nodes, host_port_vec, host_port_id) { _have_lower_than_average = have_lower_than_average; _replicas_low = replicas_low; @@ -762,14 +786,14 @@ bool copy_primary_operation::can_select(gpid pid, migration_list *result) bool copy_primary_operation::can_continue() { - int id_min = *_ordered_address_ids.begin(); + int id_min = *_ordered_host_port_ids.begin(); if (_have_lower_than_average && _partition_counts[id_min] >= _replicas_low) { LOG_INFO("{}: stop the copy due to primaries on all nodes will reach low later.", _app->get_logname()); return false; } - int id_max = *_ordered_address_ids.rbegin(); + int id_max = *_ordered_host_port_ids.rbegin(); if (!_have_lower_than_average && _partition_counts[id_max] - _partition_counts[id_min] <= 1) { LOG_INFO("{}: stop the copy due to the primary will be balanced later.", _app->get_logname()); diff --git a/src/meta/load_balance_policy.h b/src/meta/load_balance_policy.h index f0e06bf0b6..5afbda587a 100644 --- a/src/meta/load_balance_policy.h +++ b/src/meta/load_balance_policy.h @@ -33,16 +33,18 @@ #include "common/gpid.h" #include "common/replication_other_types.h" #include "meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/enum_helper.h" #include "utils/zlocks.h" namespace dsn { class command_deregister; class partition_configuration; +class rpc_address; namespace replication { class configuration_balancer_request; +class meta_service; // disk_tag->primary_count/total_count_on_this_disk typedef std::map disk_load; @@ -63,18 +65,19 @@ ENUM_END(balance_type) bool calc_disk_load(node_mapper &nodes, const app_mapper &apps, app_id id, - const rpc_address &node, + const host_port &node, bool only_primary, /*out*/ disk_load &load); -const std::string &get_disk_tag(const app_mapper &apps, const rpc_address &node, const gpid &pid); +const std::string &get_disk_tag(const app_mapper &apps, const host_port &node, const gpid &pid); std::shared_ptr generate_balancer_request(const app_mapper &apps, const partition_configuration &pc, const balance_type &type, const rpc_address &from, - const rpc_address &to); + const rpc_address &to, + const host_port &hp_from, + const host_port &hp_to); -class meta_service; struct flow_path; class load_balance_policy @@ -105,8 +108,8 @@ class load_balance_policy int _alive_nodes; // this is used to assign an integer id for every node // and these are generated from the above data, which are tempory too - std::unordered_map address_id; - std::vector address_vec; + std::unordered_map host_port_id; + std::vector host_port_vec; // the app set which won't be re-balanced dsn::zrwlock_nr _balancer_ignored_apps_lock; // { @@ -116,19 +119,19 @@ class load_balance_policy private: void start_moving_primary(const std::shared_ptr &app, - const rpc_address &from, - const rpc_address &to, + const host_port &from, + const host_port &to, int plan_moving, disk_load *prev_load, disk_load *current_load); std::list calc_potential_moving(const std::shared_ptr &app, - const rpc_address &from, - const rpc_address &to); + const host_port &from, + const host_port &to); dsn::gpid select_moving(std::list &potential_moving, disk_load *prev_load, disk_load *current_load, - rpc_address from, - rpc_address to); + host_port from, + host_port to); void number_nodes(const node_mapper &nodes); std::string remote_command_balancer_ignored_app_ids(const std::vector &args); @@ -160,7 +163,7 @@ class ford_fulkerson ford_fulkerson() = delete; ford_fulkerson(const std::shared_ptr &app, const node_mapper &nodes, - const std::unordered_map &address_id, + const std::unordered_map &host_port_id, uint32_t higher_count, uint32_t lower_count, int replicas_low); @@ -174,8 +177,8 @@ class ford_fulkerson public: builder(const std::shared_ptr &app, const node_mapper &nodes, - const std::unordered_map &address_id) - : _app(app), _nodes(nodes), _address_id(address_id) + const std::unordered_map &host_port_id) + : _app(app), _nodes(nodes), _host_port_id(host_port_id) { } @@ -199,13 +202,13 @@ class ford_fulkerson return nullptr; } return std::make_unique( - _app, _nodes, _address_id, higher_count, lower_count, replicas_low); + _app, _nodes, _host_port_id, higher_count, lower_count, replicas_low); } private: const std::shared_ptr &_app; const node_mapper &_nodes; - const std::unordered_map &_address_id; + const std::unordered_map &_host_port_id; }; private: @@ -224,7 +227,7 @@ class ford_fulkerson const std::shared_ptr &_app; const node_mapper &_nodes; - const std::unordered_map &_address_id; + const std::unordered_map &_host_port_id; uint32_t _higher_count; uint32_t _lower_count; int _replicas_low; @@ -244,33 +247,33 @@ class copy_replica_operation copy_replica_operation(const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id); + const std::vector &host_port_vec, + const std::unordered_map &host_port_id); virtual ~copy_replica_operation() = default; bool start(migration_list *result); protected: - void init_ordered_address_ids(); + void init_ordered_host_port_ids(); virtual int get_partition_count(const node_state &ns) const = 0; gpid select_partition(migration_list *result); const partition_set *get_all_partitions(); gpid select_max_load_gpid(const partition_set *partitions, migration_list *result); void copy_once(gpid selected_pid, migration_list *result); - void update_ordered_address_ids(); + void update_ordered_host_port_ids(); virtual bool only_copy_primary() = 0; virtual bool can_select(gpid pid, migration_list *result) = 0; virtual bool can_continue() = 0; virtual balance_type get_balance_type() = 0; - std::set> _ordered_address_ids; + std::set> _ordered_host_port_ids; const std::shared_ptr _app; const app_mapper &_apps; node_mapper &_nodes; - const std::vector &_address_vec; - const std::unordered_map &_address_id; - std::unordered_map _node_loads; + const std::vector &_host_port_vec; + const std::unordered_map &_host_port_id; + std::unordered_map _node_loads; std::vector _partition_counts; FRIEND_TEST(copy_primary_operation, misc); @@ -283,8 +286,8 @@ class copy_primary_operation : public copy_replica_operation copy_primary_operation(const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id, + const std::vector &host_port_vec, + const std::unordered_map &host_port_id, bool have_lower_than_average, int replicas_low); ~copy_primary_operation() = default; diff --git a/src/meta/meta_backup_service.cpp b/src/meta/meta_backup_service.cpp index 5c8a941ae9..ff25050338 100644 --- a/src/meta/meta_backup_service.cpp +++ b/src/meta/meta_backup_service.cpp @@ -37,7 +37,7 @@ #include "meta_backup_service.h" #include "meta_service.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_holder.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" @@ -155,7 +155,7 @@ void policy_context::start_backup_app_meta_unlocked(int32_t app_id) int total_partitions = iter->second; for (int32_t pidx = 0; pidx < total_partitions; ++pidx) { update_partition_progress_unlocked( - gpid(app_id, pidx), cold_backup_constant::PROGRESS_FINISHED, dsn::rpc_address()); + gpid(app_id, pidx), cold_backup_constant::PROGRESS_FINISHED, dsn::host_port()); } return; } @@ -458,7 +458,7 @@ void policy_context::write_backup_info_unlocked(const backup_info &b_info, bool policy_context::update_partition_progress_unlocked(gpid pid, int32_t progress, - const rpc_address &source) + const host_port &source) { int32_t &local_progress = _progress.partition_progress[pid]; if (local_progress == cold_backup_constant::PROGRESS_FINISHED) { @@ -509,7 +509,7 @@ void policy_context::record_partition_checkpoint_size_unlock(const gpid &pid, in void policy_context::start_backup_partition_unlocked(gpid pid) { - dsn::rpc_address partition_primary; + dsn::host_port partition_primary; { // check app and partition status zauto_read_lock l; @@ -521,10 +521,10 @@ void policy_context::start_backup_partition_unlocked(gpid pid) "{}: app {} is not available, skip to backup it.", _backup_sig, pid.get_app_id()); _progress.is_app_skipped[pid.get_app_id()] = true; update_partition_progress_unlocked( - pid, cold_backup_constant::PROGRESS_FINISHED, dsn::rpc_address()); + pid, cold_backup_constant::PROGRESS_FINISHED, dsn::host_port()); return; } - partition_primary = app->partitions[pid.get_partition_index()].primary; + partition_primary = app->partitions[pid.get_partition_index()].hp_primary; } if (partition_primary.is_invalid()) { LOG_WARNING("{}: partition {} doesn't have a primary now, retry to backup it later", @@ -565,7 +565,7 @@ void policy_context::start_backup_partition_unlocked(gpid pid) void policy_context::on_backup_reply(error_code err, backup_response &&response, gpid pid, - const rpc_address &primary) + const host_port &primary) { LOG_INFO( "{}: receive backup response for partition {} from server {}.", _backup_sig, pid, primary); @@ -1715,7 +1715,7 @@ void backup_service::start_backup_app(start_backup_app_rpc rpc) }); int32_t app_id = request.app_id; - std::shared_ptr engine = std::make_shared(this); + auto engine = std::make_shared(this); error_code err = engine->init_backup(app_id); if (err != ERR_OK) { response.err = err; diff --git a/src/meta/meta_backup_service.h b/src/meta/meta_backup_service.h index e382a8922c..f767ad2be3 100644 --- a/src/meta/meta_backup_service.h +++ b/src/meta/meta_backup_service.h @@ -49,7 +49,7 @@ namespace dsn { class message_ex; -class rpc_address; +class host_port; namespace dist { namespace block_service { @@ -297,7 +297,7 @@ mock_private : // mock_virtual bool - update_partition_progress_unlocked(gpid pid, int32_t progress, const rpc_address &source); + update_partition_progress_unlocked(gpid pid, int32_t progress, const host_port &source); mock_virtual void record_partition_checkpoint_size_unlock(const gpid& pid, int64_t size); mock_virtual void start_backup_app_meta_unlocked(int32_t app_id); @@ -326,7 +326,7 @@ mock_private : mock_virtual void on_backup_reply(dsn::error_code err, backup_response &&response, gpid pid, - const rpc_address &primary); + const host_port &primary); mock_virtual void gc_backup_info_unlocked(const backup_info &info_to_gc); mock_virtual void issue_gc_backup_info_task_unlocked(); diff --git a/src/meta/meta_bulk_load_ingestion_context.cpp b/src/meta/meta_bulk_load_ingestion_context.cpp index d6b9de80eb..30989c79f8 100644 --- a/src/meta/meta_bulk_load_ingestion_context.cpp +++ b/src/meta/meta_bulk_load_ingestion_context.cpp @@ -48,9 +48,9 @@ void ingestion_context::partition_node_info::create(const partition_configuratio const config_context &cc) { pid = config.pid; - std::unordered_set current_nodes; - current_nodes.insert(config.primary); - for (const auto &secondary : config.secondaries) { + std::unordered_set current_nodes; + current_nodes.insert(config.hp_primary); + for (const auto &secondary : config.hp_secondaries) { current_nodes.insert(secondary); } for (const auto &node : current_nodes) { @@ -139,7 +139,7 @@ bool ingestion_context::try_partition_ingestion(const partition_configuration &c return true; } -bool ingestion_context::check_node_ingestion(const rpc_address &node, const std::string &disk_tag) +bool ingestion_context::check_node_ingestion(const host_port &node, const std::string &disk_tag) { if (_nodes_context.find(node) == _nodes_context.end()) { _nodes_context[node] = node_context(node, disk_tag); diff --git a/src/meta/meta_bulk_load_ingestion_context.h b/src/meta/meta_bulk_load_ingestion_context.h index 3b18bce273..1675726d66 100644 --- a/src/meta/meta_bulk_load_ingestion_context.h +++ b/src/meta/meta_bulk_load_ingestion_context.h @@ -22,7 +22,7 @@ #include #include "common/gpid.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/flags.h" DSN_DECLARE_uint32(bulk_load_node_max_ingesting_count); @@ -46,7 +46,7 @@ class ingestion_context { gpid pid; // node address -> disk_tag - std::unordered_map node_disk; + std::unordered_map node_disk; partition_node_info() {} partition_node_info(const partition_configuration &config, const config_context &cc) @@ -58,13 +58,13 @@ class ingestion_context struct node_context { - rpc_address address; + host_port address; uint32_t node_ingesting_count; // disk tag -> ingesting partition count std::unordered_map disk_ingesting_counts; node_context() {} - node_context(const rpc_address &address, const std::string &disk_tag) + node_context(const host_port &address, const std::string &disk_tag) : address(address), node_ingesting_count(0) { init_disk(disk_tag); @@ -78,7 +78,7 @@ class ingestion_context }; bool try_partition_ingestion(const partition_configuration &config, const config_context &cc); - bool check_node_ingestion(const rpc_address &node, const std::string &disk_tag); + bool check_node_ingestion(const host_port &node, const std::string &disk_tag); void add_partition(const partition_node_info &info); void remove_partition(const gpid &pid); uint32_t get_app_ingesting_count(const uint32_t app_id) const; @@ -93,7 +93,7 @@ class ingestion_context // ingesting partitions std::unordered_map _running_partitions; // every node and every disk ingesting partition count - std::unordered_map _nodes_context; + std::unordered_map _nodes_context; }; } // namespace replication diff --git a/src/meta/meta_bulk_load_service.cpp b/src/meta/meta_bulk_load_service.cpp index 082a635b3f..26f51c5a0e 100644 --- a/src/meta/meta_bulk_load_service.cpp +++ b/src/meta/meta_bulk_load_service.cpp @@ -37,6 +37,8 @@ #include "meta/meta_state_service.h" #include "meta/server_state.h" #include "meta_admin_types.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" @@ -367,7 +369,7 @@ bool bulk_load_service::check_partition_status( } pconfig = app->partitions[pid.get_partition_index()]; - if (pconfig.primary.is_invalid()) { + if (pconfig.hp_primary.is_invalid()) { LOG_WARNING("app({}) partition({}) primary is invalid, try it later", app_name, pid); tasking::enqueue(LPC_META_STATE_NORMAL, _meta_svc->tracker(), @@ -377,7 +379,7 @@ bool bulk_load_service::check_partition_status( return false; } - if (pconfig.secondaries.size() < pconfig.max_replica_count - 1) { + if (pconfig.hp_secondaries.size() < pconfig.max_replica_count - 1) { bulk_load_status::type p_status; { zauto_read_lock l(_lock); @@ -422,14 +424,14 @@ void bulk_load_service::partition_bulk_load(const std::string &app_name, const g return; } - rpc_address primary_addr = pconfig.primary; auto req = std::make_unique(); { zauto_read_lock l(_lock); const app_bulk_load_info &ainfo = _app_bulk_load_info[pid.get_app_id()]; req->pid = pid; req->app_name = app_name; - req->primary_addr = primary_addr; + req->primary_addr = pconfig.primary; + req->__set_hp_primary(pconfig.hp_primary); req->remote_provider_name = ainfo.file_provider_type; req->cluster_name = ainfo.cluster_name; req->meta_bulk_load_status = get_partition_bulk_load_status_unlocked(pid); @@ -438,9 +440,10 @@ void bulk_load_service::partition_bulk_load(const std::string &app_name, const g req->remote_root_path = ainfo.remote_root_path; } - LOG_INFO("send bulk load request to node({}), app({}), partition({}), partition " + LOG_INFO("send bulk load request to node({}({})), app({}), partition({}), partition " "status = {}, remote provider = {}, cluster_name = {}, remote_root_path = {}", - primary_addr, + pconfig.hp_primary, + pconfig.primary, app_name, pid, dsn::enum_to_string(req->meta_bulk_load_status), @@ -449,7 +452,18 @@ void bulk_load_service::partition_bulk_load(const std::string &app_name, const g req->remote_root_path); bulk_load_rpc rpc(std::move(req), RPC_BULK_LOAD, 0_ms, 0, pid.thread_hash()); - rpc.call(primary_addr, _meta_svc->tracker(), [this, rpc](error_code err) mutable { + rpc.call(pconfig.primary, _meta_svc->tracker(), [this, rpc](error_code err) mutable { + // fill host_port struct if needed + // remote server maybe not supported host_post, just have address + auto &bulk_load_resp = rpc.response(); + if (!bulk_load_resp.__isset.hp_group_bulk_load_state) { + bulk_load_resp.__set_hp_group_bulk_load_state({}); + for (const auto &kv : bulk_load_resp.group_bulk_load_state) { + bulk_load_resp.hp_group_bulk_load_state[host_port::from_address(kv.first)] = + kv.second; + } + } + on_partition_bulk_load_reply(err, rpc.request(), rpc.response()); }); } @@ -461,15 +475,17 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, { const std::string &app_name = request.app_name; const gpid &pid = request.pid; - const rpc_address &primary_addr = request.primary_addr; + const auto &primary_addr = request.primary_addr; + const auto &primary_hp = request.hp_primary; if (err != ERR_OK) { - LOG_ERROR( - "app({}), partition({}) failed to receive bulk load response from node({}), error = {}", - app_name, - pid, - primary_addr, - err); + LOG_ERROR("app({}), partition({}) failed to receive bulk load response from node({}({})), " + "error = {}", + app_name, + pid, + primary_hp, + primary_addr, + err); try_rollback_to_downloading(app_name, pid); try_resend_bulk_load_request(app_name, pid); return; @@ -477,9 +493,10 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, if (response.err == ERR_OBJECT_NOT_FOUND || response.err == ERR_INVALID_STATE) { LOG_ERROR( - "app({}), partition({}) doesn't exist or has invalid state on node({}), error = {}", + "app({}), partition({}) doesn't exist or has invalid state on node({}({})), error = {}", app_name, pid, + primary_hp, primary_addr, response.err); try_rollback_to_downloading(app_name, pid); @@ -489,8 +506,9 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, if (response.err == ERR_BUSY) { LOG_WARNING( - "node({}) has enough replicas downloading, wait for next round to send bulk load " + "node({}({})) has enough replicas downloading, wait for next round to send bulk load " "request for app({}), partition({})", + primary_hp, primary_addr, app_name, pid); @@ -499,13 +517,15 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, } if (response.err != ERR_OK) { - LOG_ERROR("app({}), partition({}) from node({}) handle bulk load response failed, error = " - "{}, primary status = {}", - app_name, - pid, - primary_addr, - response.err, - dsn::enum_to_string(response.primary_bulk_load_status)); + LOG_ERROR( + "app({}), partition({}) from node({}({})) handle bulk load response failed, error = " + "{}, primary status = {}", + app_name, + pid, + primary_hp, + primary_addr, + response.err, + dsn::enum_to_string(response.primary_bulk_load_status)); handle_bulk_load_failed(pid.get_app_id(), response.err); try_resend_bulk_load_request(app_name, pid); return; @@ -538,7 +558,7 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, bulk_load_status::type app_status = get_app_bulk_load_status(response.pid.get_app_id()); switch (app_status) { case bulk_load_status::BLS_DOWNLOADING: - handle_app_downloading(response, primary_addr); + handle_app_downloading(response, primary_hp); break; case bulk_load_status::BLS_DOWNLOADED: update_partition_info_on_remote_storage( @@ -546,15 +566,15 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, // when app status is downloaded or ingesting, send request frequently break; case bulk_load_status::BLS_INGESTING: - handle_app_ingestion(response, primary_addr); + handle_app_ingestion(response, primary_hp); break; case bulk_load_status::BLS_SUCCEED: case bulk_load_status::BLS_FAILED: case bulk_load_status::BLS_CANCELED: - handle_bulk_load_finish(response, primary_addr); + handle_bulk_load_finish(response, primary_hp); break; case bulk_load_status::BLS_PAUSING: - handle_app_pausing(response, primary_addr); + handle_app_pausing(response, primary_hp); break; case bulk_load_status::BLS_PAUSED: // paused not send request to replica servers @@ -583,7 +603,7 @@ void bulk_load_service::try_resend_bulk_load_request(const std::string &app_name // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_app_downloading(const bulk_load_response &response, - const rpc_address &primary_addr) + const host_port &primary_addr) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -599,7 +619,7 @@ void bulk_load_service::handle_app_downloading(const bulk_load_response &respons return; } - for (const auto &kv : response.group_bulk_load_state) { + for (const auto &kv : response.hp_group_bulk_load_state) { const auto &bulk_load_states = kv.second; if (!bulk_load_states.__isset.download_progress || !bulk_load_states.__isset.download_status) { @@ -652,7 +672,7 @@ void bulk_load_service::handle_app_downloading(const bulk_load_response &respons { zauto_write_lock l(_lock); _partitions_total_download_progress[pid] = total_progress; - _partitions_bulk_load_state[pid] = response.group_bulk_load_state; + _partitions_bulk_load_state[pid] = response.hp_group_bulk_load_state; } // update partition status to `downloaded` if all replica downloaded @@ -665,7 +685,7 @@ void bulk_load_service::handle_app_downloading(const bulk_load_response &respons // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, - const rpc_address &primary_addr) + const host_port &primary_addr) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -680,7 +700,7 @@ void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, return; } - for (const auto &kv : response.group_bulk_load_state) { + for (const auto &kv : response.hp_group_bulk_load_state) { const auto &bulk_load_states = kv.second; if (!bulk_load_states.__isset.ingest_status) { LOG_WARNING("receive bulk load response from node({}) app({}) partition({}), " @@ -711,7 +731,7 @@ void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, response.is_group_ingestion_finished); { zauto_write_lock l(_lock); - _partitions_bulk_load_state[pid] = response.group_bulk_load_state; + _partitions_bulk_load_state[pid] = response.hp_group_bulk_load_state; } if (response.is_group_ingestion_finished) { @@ -723,7 +743,7 @@ void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &response, - const rpc_address &primary_addr) + const host_port &primary_addr) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -738,7 +758,7 @@ void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &respon return; } - for (const auto &kv : response.group_bulk_load_state) { + for (const auto &kv : response.hp_group_bulk_load_state) { if (!kv.second.__isset.is_cleaned_up) { LOG_WARNING("receive bulk load response from node({}) app({}), partition({}), " "primary_status({}), but node({}) is_cleaned_up is not set", @@ -776,7 +796,7 @@ void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &respon { zauto_write_lock l(_lock); _partitions_cleaned_up[pid] = group_cleaned_up; - _partitions_bulk_load_state[pid] = response.group_bulk_load_state; + _partitions_bulk_load_state[pid] = response.hp_group_bulk_load_state; } if (group_cleaned_up) { @@ -804,7 +824,7 @@ void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &respon // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_app_pausing(const bulk_load_response &response, - const rpc_address &primary_addr) + const host_port &primary_addr) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -819,7 +839,7 @@ void bulk_load_service::handle_app_pausing(const bulk_load_response &response, return; } - for (const auto &kv : response.group_bulk_load_state) { + for (const auto &kv : response.hp_group_bulk_load_state) { if (!kv.second.__isset.is_paused) { LOG_WARNING("receive bulk load response from node({}) app({}), partition({}), " "primary_status({}), but node({}) is_paused is not set", @@ -842,7 +862,7 @@ void bulk_load_service::handle_app_pausing(const bulk_load_response &response, is_group_paused); { zauto_write_lock l(_lock); - _partitions_bulk_load_state[pid] = response.group_bulk_load_state; + _partitions_bulk_load_state[pid] = response.hp_group_bulk_load_state; } if (is_group_paused) { @@ -1013,10 +1033,10 @@ void bulk_load_service::update_partition_info_unlock(const gpid &pid, // no need to update other field of partition_bulk_load_info return; } - pinfo.addresses.clear(); + pinfo.host_ports.clear(); const auto &state = _partitions_bulk_load_state[pid]; for (const auto &kv : state) { - pinfo.addresses.emplace_back(kv.first); + pinfo.host_ports.emplace_back(kv.first); } pinfo.ever_ingest_succeed = true; } @@ -1187,15 +1207,15 @@ bool bulk_load_service::check_ever_ingestion_succeed(const partition_configurati return false; } - std::vector current_nodes; - current_nodes.emplace_back(config.primary); - for (const auto &secondary : config.secondaries) { + std::vector current_nodes; + current_nodes.emplace_back(config.hp_primary); + for (const auto &secondary : config.hp_secondaries) { current_nodes.emplace_back(secondary); } - std::sort(pinfo.addresses.begin(), pinfo.addresses.end()); + std::sort(pinfo.host_ports.begin(), pinfo.host_ports.end()); std::sort(current_nodes.begin(), current_nodes.end()); - if (current_nodes == pinfo.addresses) { + if (current_nodes == pinfo.host_ports) { LOG_INFO("app({}) partition({}) has already executed ingestion succeed", app_name, pid); update_partition_info_on_remote_storage(app_name, pid, bulk_load_status::BLS_SUCCEED); return true; @@ -1257,7 +1277,7 @@ void bulk_load_service::partition_ingestion(const std::string &app_name, const g return; } - rpc_address primary_addr = pconfig.primary; + const auto &primary_addr = pconfig.hp_primary; ballot meta_ballot = pconfig.ballot; tasking::enqueue(LPC_BULK_LOAD_INGESTION, _meta_svc->tracker(), @@ -1274,7 +1294,7 @@ void bulk_load_service::partition_ingestion(const std::string &app_name, const g // ThreadPool: THREAD_POOL_DEFAULT void bulk_load_service::send_ingestion_request(const std::string &app_name, const gpid &pid, - const rpc_address &primary_addr, + const host_port &primary_addr, const ballot &meta_ballot) { ingestion_request req; @@ -1309,7 +1329,7 @@ void bulk_load_service::on_partition_ingestion_reply(error_code err, const ingestion_response &&resp, const std::string &app_name, const gpid &pid, - const rpc_address &primary_addr) + const host_port &primary_addr) { if (err != ERR_OK || resp.err != ERR_OK || resp.rocksdb_error != ERR_OK) { finish_ingestion(pid); @@ -1595,9 +1615,18 @@ void bulk_load_service::on_query_bulk_load_status(query_bulk_load_rpc rpc) } response.bulk_load_states.resize(partition_count); + response.__set_hp_bulk_load_states( + std::vector>(partition_count)); for (const auto &kv : _partitions_bulk_load_state) { if (kv.first.get_app_id() == app_id) { - response.bulk_load_states[kv.first.get_partition_index()] = kv.second; + auto pidx = kv.first.get_partition_index(); + response.hp_bulk_load_states[pidx] = kv.second; + + std::map addr_pbls; + for (const auto &bls : kv.second) { + addr_pbls[dsn::dns_resolver::instance().resolve_address(bls.first)] = bls.second; + } + response.bulk_load_states[pidx] = addr_pbls; } } diff --git a/src/meta/meta_bulk_load_service.h b/src/meta/meta_bulk_load_service.h index 216a4b9911..359b3fa085 100644 --- a/src/meta/meta_bulk_load_service.h +++ b/src/meta/meta_bulk_load_service.h @@ -35,7 +35,7 @@ #include "common/replication_other_types.h" #include "meta/meta_state_service_utils.h" #include "meta_bulk_load_ingestion_context.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_tracker.h" #include "server_state.h" #include "utils/error_code.h" @@ -87,8 +87,8 @@ struct partition_bulk_load_info bulk_load_status::type status; bulk_load_metadata metadata; bool ever_ingest_succeed; - std::vector addresses; - DEFINE_JSON_SERIALIZATION(status, metadata, ever_ingest_succeed, addresses) + std::vector host_ports; + DEFINE_JSON_SERIALIZATION(status, metadata, ever_ingest_succeed, host_ports) }; // Used for remote file provider @@ -200,17 +200,15 @@ class bulk_load_service // if app is still in bulk load, resend bulk_load_request to primary after interval seconds void try_resend_bulk_load_request(const std::string &app_name, const gpid &pid); - void handle_app_downloading(const bulk_load_response &response, - const rpc_address &primary_addr); + void handle_app_downloading(const bulk_load_response &response, const host_port &primary_addr); - void handle_app_ingestion(const bulk_load_response &response, const rpc_address &primary_addr); + void handle_app_ingestion(const bulk_load_response &response, const host_port &primary_addr); // when app status is `succeed, `failed`, `canceled`, meta and replica should cleanup bulk load // states - void handle_bulk_load_finish(const bulk_load_response &response, - const rpc_address &primary_addr); + void handle_bulk_load_finish(const bulk_load_response &response, const host_port &primary_addr); - void handle_app_pausing(const bulk_load_response &response, const rpc_address &primary_addr); + void handle_app_pausing(const bulk_load_response &response, const host_port &primary_addr); // app not existed or not available during bulk load void handle_app_unavailable(int32_t app_id, const std::string &app_name); @@ -225,14 +223,14 @@ class bulk_load_service void send_ingestion_request(const std::string &app_name, const gpid &pid, - const rpc_address &primary_addr, + const host_port &primary_addr, const ballot &meta_ballot); void on_partition_ingestion_reply(error_code err, const ingestion_response &&resp, const std::string &app_name, const gpid &pid, - const rpc_address &primary_addr); + const host_port &primary_addr); // Called by `partition_ingestion` // - true : this partition has ever executed ingestion succeed, no need to send ingestion @@ -519,7 +517,7 @@ class bulk_load_service // partition_index -> group total download progress std::unordered_map _partitions_total_download_progress; // partition_index -> group bulk load states(node address -> state) - std::unordered_map> + std::unordered_map> _partitions_bulk_load_state; std::unordered_map _partitions_cleaned_up; diff --git a/src/meta/meta_data.cpp b/src/meta/meta_data.cpp index 33af192627..610bd18987 100644 --- a/src/meta/meta_data.cpp +++ b/src/meta/meta_data.cpp @@ -31,6 +31,7 @@ #include "common/replication_enums.h" #include "meta_data.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_message.h" #include "utils/flags.h" @@ -93,26 +94,6 @@ void when_update_replicas(config_type::type t, const std::function & } } -void maintain_drops(std::vector &drops, const rpc_address &node, config_type::type t) -{ - auto action = [&drops, &node](bool is_adding) { - auto it = std::find(drops.begin(), drops.end(), node); - if (is_adding) { - if (it != drops.end()) { - drops.erase(it); - } - } else { - CHECK( - it == drops.end(), "the node({}) cannot be in drops set before this update", node); - drops.push_back(node); - if (drops.size() > 3) { - drops.erase(drops.begin()); - } - } - }; - when_update_replicas(t, action); -} - bool construct_replica(meta_view view, const gpid &pid, int max_replica_count) { partition_configuration &pc = *get_config(*view.apps, pid); @@ -133,7 +114,8 @@ bool construct_replica(meta_view view, const gpid &pid, int max_replica_count) invalid_ballot, "the ballot of server must not be invalid_ballot, node = {}", server.node); - pc.primary = server.node; + pc.primary = dsn::dns_resolver::instance().resolve_address(server.node); + pc.__set_hp_primary(server.node); pc.ballot = server.ballot; pc.partition_flags = 0; pc.max_replica_count = max_replica_count; @@ -151,12 +133,14 @@ bool construct_replica(meta_view view, const gpid &pid, int max_replica_count) // we put max_replica_count-1 recent replicas to last_drops, in case of the DDD-state when the // only primary dead // when add node to pc.last_drops, we don't remove it from our cc.drop_list - CHECK(pc.last_drops.empty(), "last_drops of partition({}) must be empty", pid); + CHECK(pc.hp_last_drops.empty(), "last_drops of partition({}) must be empty", pid); for (auto iter = drop_list.rbegin(); iter != drop_list.rend(); ++iter) { - if (pc.last_drops.size() + 1 >= max_replica_count) + if (pc.hp_last_drops.size() + 1 >= max_replica_count) break; // similar to cc.drop_list, pc.last_drop is also a stack structure - pc.last_drops.insert(pc.last_drops.begin(), iter->node); + pc.last_drops.insert(pc.last_drops.begin(), + dsn::dns_resolver::instance().resolve_address(iter->node)); + pc.hp_last_drops.insert(pc.hp_last_drops.begin(), iter->node); LOG_INFO("construct for ({}), select {} into last_drops, ballot({}), " "committed_decree({}), prepare_decree({})", pid, @@ -170,7 +154,7 @@ bool construct_replica(meta_view view, const gpid &pid, int max_replica_count) return true; } -bool collect_replica(meta_view view, const rpc_address &node, const replica_info &info) +bool collect_replica(meta_view view, const host_port &node, const replica_info &info) { partition_configuration &pc = *get_config(*view.apps, info.pid); // current partition is during partition split @@ -204,12 +188,12 @@ void proposal_actions::reset_tracked_current_learner() current_learner.last_prepared_decree = invalid_decree; } -void proposal_actions::track_current_learner(const dsn::rpc_address &node, const replica_info &info) +void proposal_actions::track_current_learner(const dsn::host_port &node, const replica_info &info) { if (empty()) return; configuration_proposal_action &act = acts.front(); - if (act.node != node) + if (act.hp_node != node) return; // currently we only handle add secondary @@ -327,7 +311,7 @@ void config_context::check_size() } } -std::vector::iterator config_context::find_from_dropped(const rpc_address &node) +std::vector::iterator config_context::find_from_dropped(const host_port &node) { return std::find_if(dropped.begin(), dropped.end(), [&node](const dropped_replica &r) { return r.node == node; @@ -335,14 +319,14 @@ std::vector::iterator config_context::find_from_dropped(const r } std::vector::const_iterator -config_context::find_from_dropped(const rpc_address &node) const +config_context::find_from_dropped(const host_port &node) const { return std::find_if(dropped.begin(), dropped.end(), [&node](const dropped_replica &r) { return r.node == node; }); } -bool config_context::remove_from_dropped(const rpc_address &node) +bool config_context::remove_from_dropped(const host_port &node) { auto iter = find_from_dropped(node); if (iter != dropped.end()) { @@ -353,7 +337,7 @@ bool config_context::remove_from_dropped(const rpc_address &node) return false; } -bool config_context::record_drop_history(const rpc_address &node) +bool config_context::record_drop_history(const host_port &node) { auto iter = find_from_dropped(node); if (iter != dropped.end()) @@ -365,7 +349,7 @@ bool config_context::record_drop_history(const rpc_address &node) return true; } -int config_context::collect_drop_replica(const rpc_address &node, const replica_info &info) +int config_context::collect_drop_replica(const host_port &node, const replica_info &info) { bool in_dropped = false; auto iter = find_from_dropped(node); @@ -426,7 +410,7 @@ bool config_context::check_order() return true; } -std::vector::iterator config_context::find_from_serving(const rpc_address &node) +std::vector::iterator config_context::find_from_serving(const host_port &node) { return std::find_if(serving.begin(), serving.end(), [&node](const serving_replica &r) { return r.node == node; @@ -434,14 +418,14 @@ std::vector::iterator config_context::find_from_serving(const r } std::vector::const_iterator -config_context::find_from_serving(const rpc_address &node) const +config_context::find_from_serving(const host_port &node) const { return std::find_if(serving.begin(), serving.end(), [&node](const serving_replica &r) { return r.node == node; }); } -bool config_context::remove_from_serving(const rpc_address &node) +bool config_context::remove_from_serving(const host_port &node) { auto iter = find_from_serving(node); if (iter != serving.end()) { @@ -451,7 +435,7 @@ bool config_context::remove_from_serving(const rpc_address &node) return false; } -void config_context::collect_serving_replica(const rpc_address &node, const replica_info &info) +void config_context::collect_serving_replica(const host_port &node, const replica_info &info) { auto iter = find_from_serving(node); auto compact_status = info.__isset.manual_compact_status ? info.manual_compact_status @@ -465,12 +449,12 @@ void config_context::collect_serving_replica(const rpc_address &node, const repl } } -void config_context::adjust_proposal(const rpc_address &node, const replica_info &info) +void config_context::adjust_proposal(const host_port &node, const replica_info &info) { lb_actions.track_current_learner(node, info); } -bool config_context::get_disk_tag(const rpc_address &node, /*out*/ std::string &disk_tag) const +bool config_context::get_disk_tag(const host_port &node, /*out*/ std::string &disk_tag) const { auto iter = find_from_serving(node); if (iter == serving.end()) { @@ -549,6 +533,11 @@ app_state::app_state(const app_info &info) : app_info(info), helpers(new app_sta config.max_replica_count = app_info::max_replica_count; config.primary.set_invalid(); config.secondaries.clear(); + + config.__set_hp_primary(host_port()); + config.__set_hp_secondaries({}); + config.__set_hp_last_drops({}); + partitions.assign(app_info::partition_count, config); for (int i = 0; i != app_info::partition_count; ++i) partitions[i].pid.set_partition_index(i); diff --git a/src/meta/meta_data.h b/src/meta/meta_data.h index ea236fa5cc..55384f5198 100644 --- a/src/meta/meta_data.h +++ b/src/meta/meta_data.h @@ -47,13 +47,14 @@ #include "meta_admin_types.h" #include "metadata_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "utils/autoref_ptr.h" #include "utils/blob.h" #include "utils/enum_helper.h" #include "utils/error_code.h" #include "utils/extensible_object.h" +#include "utils/fmt_logging.h" namespace dsn { class message_ex; @@ -112,7 +113,7 @@ class proposal_actions public: proposal_actions(); void reset_tracked_current_learner(); - void track_current_learner(const rpc_address &node, const replica_info &info); + void track_current_learner(const host_port &node, const replica_info &info); void clear(); // return the action in acts & whether the action is from balancer @@ -143,7 +144,7 @@ class proposal_actions // if you modify the dropped_replica::INVALID_TIMESTAMP, please modify the dropped_cmp accordingly. struct dropped_replica { - dsn::rpc_address node; + dsn::host_port node; // if a drop-replica is generated by the update-cfg-req, then we can // record the drop time (milliseconds) @@ -184,7 +185,7 @@ inline int dropped_cmp(const dropped_replica &d1, const dropped_replica &d2) // Load balancer may use this to do balance decisions. struct serving_replica { - dsn::rpc_address node; + dsn::host_port node; // TODO: report the storage size of replica int64_t storage_mb; std::string disk_tag; @@ -222,37 +223,37 @@ class config_context void check_size(); void cancel_sync(); - std::vector::iterator find_from_dropped(const dsn::rpc_address &node); - std::vector::const_iterator find_from_dropped(const rpc_address &node) const; + std::vector::iterator find_from_dropped(const dsn::host_port &node); + std::vector::const_iterator find_from_dropped(const host_port &node) const; // return true if remove ok, false if node doesn't in dropped - bool remove_from_dropped(const dsn::rpc_address &node); + bool remove_from_dropped(const dsn::host_port &node); // put recently downgraded node to dropped // return true if put ok, false if the node has been in dropped - bool record_drop_history(const dsn::rpc_address &node); + bool record_drop_history(const dsn::host_port &node); // Notice: please make sure whether node is actually an inactive or a serving replica // ret: // 1 => node has been in the dropped // 0 => insert the info to the dropped // -1 => info is too staled to insert - int collect_drop_replica(const dsn::rpc_address &node, const replica_info &info); + int collect_drop_replica(const dsn::host_port &node, const replica_info &info); // check if dropped vector satisfied the order bool check_order(); - std::vector::iterator find_from_serving(const dsn::rpc_address &node); - std::vector::const_iterator find_from_serving(const rpc_address &node) const; + std::vector::iterator find_from_serving(const dsn::host_port &node); + std::vector::const_iterator find_from_serving(const host_port &node) const; // return true if remove ok, false if node doesn't in serving - bool remove_from_serving(const dsn::rpc_address &node); + bool remove_from_serving(const dsn::host_port &node); - void collect_serving_replica(const dsn::rpc_address &node, const replica_info &info); + void collect_serving_replica(const dsn::host_port &node, const replica_info &info); - void adjust_proposal(const dsn::rpc_address &node, const replica_info &info); + void adjust_proposal(const dsn::host_port &node, const replica_info &info); - bool get_disk_tag(const rpc_address &node, /*out*/ std::string &disk_tag) const; + bool get_disk_tag(const host_port &node, /*out*/ std::string &disk_tag) const; public: // intialize to 4 statically. @@ -264,19 +265,19 @@ struct partition_configuration_stateless { partition_configuration &config; partition_configuration_stateless(partition_configuration &pc) : config(pc) {} - std::vector &workers() { return config.last_drops; } - std::vector &hosts() { return config.secondaries; } - bool is_host(const rpc_address &node) const + std::vector &workers() { return config.hp_last_drops; } + std::vector &hosts() { return config.hp_secondaries; } + bool is_host(const host_port &node) const { - return std::find(config.secondaries.begin(), config.secondaries.end(), node) != - config.secondaries.end(); + return std::find(config.hp_secondaries.begin(), config.hp_secondaries.end(), node) != + config.hp_secondaries.end(); } - bool is_worker(const rpc_address &node) const + bool is_worker(const host_port &node) const { - return std::find(config.last_drops.begin(), config.last_drops.end(), node) != - config.last_drops.end(); + return std::find(config.hp_last_drops.begin(), config.hp_last_drops.end(), node) != + config.hp_last_drops.end(); } - bool is_member(const rpc_address &node) const { return is_host(node) || is_worker(node); } + bool is_member(const host_port &node) const { return is_host(node) || is_worker(node); } }; struct restore_state @@ -392,7 +393,7 @@ class node_state : public extensible_object // status bool is_alive; bool has_collected_replicas; - dsn::rpc_address address; + dsn::host_port hp; const partition_set *get_partitions(app_id id, bool only_primary) const; partition_set *get_partitions(app_id id, bool only_primary, bool create_new); @@ -416,8 +417,8 @@ class node_state : public extensible_object void set_alive(bool alive) { is_alive = alive; } bool has_collected() { return has_collected_replicas; } void set_replicas_collect_flag(bool has_collected) { has_collected_replicas = has_collected; } - dsn::rpc_address addr() const { return address; } - void set_addr(const dsn::rpc_address &addr) { address = addr; } + dsn::host_port host_port() const { return hp; } + void set_hp(const dsn::host_port &val) { hp = val; } void put_partition(const dsn::gpid &pid, bool is_primary); void remove_partition(const dsn::gpid &pid, bool only_primary); @@ -427,7 +428,7 @@ class node_state : public extensible_object bool for_each_primary(app_id id, const std::function &f) const; }; -typedef std::unordered_map node_mapper; +typedef std::unordered_map node_mapper; typedef std::map> migration_list; struct meta_view @@ -436,22 +437,22 @@ struct meta_view node_mapper *nodes; }; -inline node_state *get_node_state(node_mapper &nodes, rpc_address addr, bool create_new) +inline node_state *get_node_state(node_mapper &nodes, host_port hp, bool create_new) { node_state *ns; - if (nodes.find(addr) == nodes.end()) { + if (nodes.find(hp) == nodes.end()) { if (!create_new) return nullptr; - ns = &nodes[addr]; - ns->set_addr(addr); + ns = &nodes[hp]; + ns->set_hp(hp); } - ns = &nodes[addr]; + ns = &nodes[hp]; return ns; } -inline bool is_node_alive(const node_mapper &nodes, rpc_address addr) +inline bool is_node_alive(const node_mapper &nodes, host_port hp) { - auto iter = nodes.find(addr); + auto iter = nodes.find(hp); if (iter == nodes.end()) return false; return iter->second.alive(); @@ -491,8 +492,8 @@ inline config_context *get_config_context(app_mapper &apps, const dsn::gpid &gpi inline int replica_count(const partition_configuration &pc) { - int ans = (pc.primary.is_invalid()) ? 0 : 1; - return ans + pc.secondaries.size(); + int ans = (pc.hp_primary.is_invalid()) ? 0 : 1; + return ans + pc.hp_secondaries.size(); } enum health_status @@ -509,13 +510,13 @@ enum health_status inline health_status partition_health_status(const partition_configuration &pc, int mutation_2pc_min_replica_count) { - if (pc.primary.is_invalid()) { - if (pc.secondaries.empty()) + if (pc.hp_primary.is_invalid()) { + if (pc.hp_secondaries.empty()) return HS_DEAD; else return HS_UNREADABLE; } else { // !pc.primary.is_invalid() - int n = pc.secondaries.size() + 1; + int n = pc.hp_secondaries.size() + 1; if (n < mutation_2pc_min_replica_count) return HS_UNWRITABLE; else if (n < pc.max_replica_count) @@ -547,9 +548,27 @@ inline int count_partitions(const app_mapper &apps) } void when_update_replicas(config_type::type t, const std::function &func); -void maintain_drops(/*inout*/ std::vector &drops, - const dsn::rpc_address &node, - config_type::type t); + +template +void maintain_drops(/*inout*/ std::vector &drops, const T &node, config_type::type t) +{ + auto action = [&drops, &node](bool is_adding) { + auto it = std::find(drops.begin(), drops.end(), node); + if (is_adding) { + if (it != drops.end()) { + drops.erase(it); + } + } else { + CHECK( + it == drops.end(), "the node({}) cannot be in drops set before this update", node); + drops.push_back(node); + if (drops.size() > 3) { + drops.erase(drops.begin()); + } + } + }; + when_update_replicas(t, action); +} // Try to construct a replica-group by current replica-infos of a gpid // ret: @@ -566,7 +585,7 @@ bool construct_replica(meta_view view, const gpid &pid, int max_replica_count); // ret: // return true if the replica is accepted as an useful replica. Or-else false. // WARNING: if false is returned, the replica on node may be garbage-collected -bool collect_replica(meta_view view, const rpc_address &node, const replica_info &info); +bool collect_replica(meta_view view, const host_port &node, const replica_info &info); inline bool has_seconds_expired(uint64_t second_ts) { return second_ts * 1000 < dsn_now_ms(); } diff --git a/src/meta/meta_http_service.cpp b/src/meta/meta_http_service.cpp index f93d869c33..ba5bc9b7e8 100644 --- a/src/meta/meta_http_service.cpp +++ b/src/meta/meta_http_service.cpp @@ -47,7 +47,7 @@ #include "meta_http_service.h" #include "meta_server_failure_detector.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "server_load_balancer.h" #include "server_state.h" #include "utils/error_code.h" @@ -128,7 +128,7 @@ void meta_http_service::get_app_handler(const http_request &req, http_response & tp_details.add_column("replica_count"); tp_details.add_column("primary"); tp_details.add_column("secondaries"); - std::map> node_stat; + std::map> node_stat; int total_prim_count = 0; int total_sec_count = 0; @@ -137,14 +137,14 @@ void meta_http_service::get_app_handler(const http_request &req, http_response & int read_unhealthy = 0; for (const auto &p : response.partitions) { int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; - node_stat[p.primary].first++; + node_stat[p.hp_primary].first++; total_prim_count++; } - replica_count += p.secondaries.size(); - total_sec_count += p.secondaries.size(); - if (!p.primary.is_invalid()) { + replica_count += p.hp_secondaries.size(); + total_sec_count += p.hp_secondaries.size(); + if (!p.hp_primary.is_invalid()) { if (replica_count >= p.max_replica_count) fully_healthy++; else if (replica_count < 2) @@ -158,14 +158,14 @@ void meta_http_service::get_app_handler(const http_request &req, http_response & std::stringstream oss; oss << replica_count << "/" << p.max_replica_count; tp_details.append_data(oss.str()); - tp_details.append_data((p.primary.is_invalid() ? "-" : p.primary.to_string())); + tp_details.append_data((p.hp_primary.is_invalid() ? "-" : p.hp_primary.to_string())); oss.str(""); oss << "["; - for (int j = 0; j < p.secondaries.size(); j++) { + for (int j = 0; j < p.hp_secondaries.size(); j++) { if (j != 0) oss << ","; - oss << p.secondaries[j]; - node_stat[p.secondaries[j]].second++; + oss << p.hp_secondaries[j]; + node_stat[p.hp_secondaries[j]].second++; } oss << "]"; tp_details.append_data(oss.str()); @@ -319,11 +319,11 @@ void meta_http_service::list_app_handler(const http_request &req, http_response for (int i = 0; i < response.partitions.size(); i++) { const dsn::partition_configuration &p = response.partitions[i]; int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; } - replica_count += p.secondaries.size(); - if (!p.primary.is_invalid()) { + replica_count += p.hp_secondaries.size(); + if (!p.hp_primary.is_invalid()) { if (replica_count >= p.max_replica_count) fully_healthy++; else if (replica_count < 2) @@ -384,7 +384,7 @@ void meta_http_service::list_node_handler(const http_request &req, http_response if (!redirect_if_not_primary(req, resp)) return; - std::map tmp_map; + std::map tmp_map; for (const auto &node : _service->_alive_set) { tmp_map.emplace(node, list_nodes_helper(node.to_string(), "ALIVE")); } @@ -409,14 +409,14 @@ void meta_http_service::list_node_handler(const http_request &req, http_response for (int i = 0; i < response_app.partitions.size(); i++) { const dsn::partition_configuration &p = response_app.partitions[i]; - if (!p.primary.is_invalid()) { - auto find = tmp_map.find(p.primary); + if (!p.hp_primary.is_invalid()) { + auto find = tmp_map.find(p.hp_primary); if (find != tmp_map.end()) { find->second.primary_count++; } } - for (int j = 0; j < p.secondaries.size(); j++) { - auto find = tmp_map.find(p.secondaries[j]); + for (int j = 0; j < p.hp_secondaries.size(); j++) { + auto find = tmp_map.find(p.hp_secondaries[j]); if (find != tmp_map.end()) { find->second.secondary_count++; } @@ -475,7 +475,7 @@ void meta_http_service::get_cluster_info_handler(const http_request &req, http_r } } tp.add_row_name_and_data("meta_servers", meta_servers_str); - tp.add_row_name_and_data("primary_meta_server", dsn_primary_address().to_string()); + tp.add_row_name_and_data("primary_meta_server", dsn_primary_host_port().to_string()); tp.add_row_name_and_data("zookeeper_hosts", FLAGS_hosts_list); tp.add_row_name_and_data("zookeeper_root", _service->_cluster_root); tp.add_row_name_and_data( @@ -846,7 +846,7 @@ bool meta_http_service::redirect_if_not_primary(const http_request &req, http_re } #endif - rpc_address leader; + host_port leader; if (_service->_failure_detector->get_leader(&leader)) { return true; } diff --git a/src/meta/meta_server_failure_detector.cpp b/src/meta/meta_server_failure_detector.cpp index 36fa56818c..1da8e3a999 100644 --- a/src/meta/meta_server_failure_detector.cpp +++ b/src/meta/meta_server_failure_detector.cpp @@ -30,10 +30,13 @@ #include #include +#include "absl/strings/string_view.h" #include "fd_types.h" #include "meta/meta_options.h" #include "meta/meta_service.h" #include "runtime/app_model.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/serverlet.h" #include "runtime/task/task_code.h" #include "utils/autoref_ptr.h" @@ -44,7 +47,6 @@ #include "utils/flags.h" #include "utils/fmt_logging.h" #include "utils/string_conv.h" -#include "absl/strings/string_view.h" DSN_DEFINE_int32(meta_server, max_succssive_unstable_restart, @@ -94,24 +96,24 @@ meta_server_failure_detector::~meta_server_failure_detector() } } -void meta_server_failure_detector::on_worker_disconnected(const std::vector &nodes) +void meta_server_failure_detector::on_worker_disconnected(const std::vector &nodes) { _svc->set_node_state(nodes, false); } -void meta_server_failure_detector::on_worker_connected(rpc_address node) +void meta_server_failure_detector::on_worker_connected(host_port node) { - _svc->set_node_state(std::vector{node}, true); + _svc->set_node_state({node}, true); } -bool meta_server_failure_detector::get_leader(rpc_address *leader) +bool meta_server_failure_detector::get_leader(host_port *leader) { FAIL_POINT_INJECT_F("meta_server_failure_detector_get_leader", [leader](absl::string_view str) { /// the format of str is : true#{ip}:{port} or false#{ip}:{port} auto pos = str.find("#"); - // get leader addr + // get leader host_port auto addr_part = str.substr(pos + 1, str.length() - pos - 1); - *leader = dsn::rpc_address::from_host_port(addr_part); + *leader = host_port::from_string(addr_part.data()); CHECK(*leader, "parse {} to rpc_address failed", addr_part); // get the return value which implies whether the current node is primary or not @@ -123,18 +125,18 @@ bool meta_server_failure_detector::get_leader(rpc_address *leader) return is_leader; }); - dsn::rpc_address holder; + dsn::host_port holder; if (leader == nullptr) { leader = &holder; } if (_is_leader.load()) { - *leader = dsn_primary_address(); + *leader = dsn_primary_host_port(); return true; } if (_lock_svc == nullptr) { - leader->set_invalid(); + leader->reset(); return false; } @@ -143,17 +145,17 @@ bool meta_server_failure_detector::get_leader(rpc_address *leader) error_code err = _lock_svc->query_cache(_primary_lock_id, lock_owner, version); if (err != dsn::ERR_OK) { LOG_WARNING("query leader from cache got error({})", err); - leader->set_invalid(); + leader->reset(); return false; } - *leader = rpc_address::from_host_port(lock_owner); + *leader = host_port::from_string(lock_owner); if (!(*leader)) { - leader->set_invalid(); + leader->reset(); return false; } - return (*leader) == dsn_primary_address(); + return (*leader) == dsn_primary_host_port(); } DEFINE_TASK_CODE(LPC_META_SERVER_LEADER_LOCK_CALLBACK, TASK_PRIORITY_COMMON, fd::THREAD_POOL_FD) @@ -167,7 +169,7 @@ void meta_server_failure_detector::acquire_leader_lock() error_code err; auto tasks = _lock_svc->lock( _primary_lock_id, - dsn_primary_address().to_string(), + dsn_primary_host_port().to_string(), // lock granted LPC_META_SERVER_LEADER_LOCK_CALLBACK, [this, &err](error_code ec, const std::string &owner, uint64_t version) { @@ -206,7 +208,7 @@ void meta_server_failure_detector::acquire_leader_lock() } } -void meta_server_failure_detector::reset_stability_stat(const rpc_address &node) +void meta_server_failure_detector::reset_stability_stat(const host_port &node) { zauto_lock l(_map_lock); auto iter = _stablity.find(node); @@ -224,9 +226,9 @@ void meta_server_failure_detector::reset_stability_stat(const rpc_address &node) void meta_server_failure_detector::leader_initialize(const std::string &lock_service_owner) { - const auto addr = rpc_address::from_host_port(lock_service_owner); - CHECK(addr, "parse {} to rpc_address failed", lock_service_owner); - CHECK_EQ_MSG(addr, dsn_primary_address(), "acquire leader return success, but owner not match"); + const auto hp = dsn::host_port::from_string(lock_service_owner); + CHECK(hp, "parse {} to host_port failed", lock_service_owner); + CHECK_EQ_MSG(hp, dsn_primary_host_port(), "acquire leader return success, but owner not match"); _is_leader.store(true); _election_moment.store(dsn_now_ms()); } @@ -234,37 +236,47 @@ void meta_server_failure_detector::leader_initialize(const std::string &lock_ser bool meta_server_failure_detector::update_stability_stat(const fd::beacon_msg &beacon) { zauto_lock l(_map_lock); - auto iter = _stablity.find(beacon.from_addr); + + host_port hp_from_node; + GET_HOST_PORT(beacon, from_node, hp_from_node); + + auto iter = _stablity.find(hp_from_node); if (iter == _stablity.end()) { - _stablity.emplace(beacon.from_addr, worker_stability{beacon.start_time, 0}); + _stablity.emplace(hp_from_node, worker_stability{beacon.start_time, 0}); return true; } else { worker_stability &w = iter->second; if (beacon.start_time == w.last_start_time_ms) { - LOG_DEBUG( - "{} isn't restarted, last_start_time({})", beacon.from_addr, w.last_start_time_ms); + LOG_DEBUG("{}({}) isn't restarted, last_start_time({})", + hp_from_node, + beacon.from_node, + w.last_start_time_ms); if (dsn_now_ms() - w.last_start_time_ms >= FLAGS_stable_rs_min_running_seconds * 1000 && w.unstable_restart_count > 0) { - LOG_INFO("{} has stably run for a while, reset it's unstable count({}) to 0", - beacon.from_addr, + LOG_INFO("{}({}) has stably run for a while, reset it's unstable count({}) to 0", + hp_from_node, + beacon.from_node, w.unstable_restart_count); w.unstable_restart_count = 0; } } else if (beacon.start_time > w.last_start_time_ms) { - LOG_INFO("check {} restarted, last_time({}), this_time({})", - beacon.from_addr, + LOG_INFO("check {}({}) restarted, last_time({}), this_time({})", + hp_from_node, + beacon.from_node, w.last_start_time_ms, beacon.start_time); if (beacon.start_time - w.last_start_time_ms < FLAGS_stable_rs_min_running_seconds * 1000) { w.unstable_restart_count++; - LOG_WARNING("{} encounter an unstable restart, total_count({})", - beacon.from_addr, + LOG_WARNING("{}({}) encounter an unstable restart, total_count({})", + hp_from_node, + beacon.from_node, w.unstable_restart_count); } else if (w.unstable_restart_count > 0) { - LOG_INFO("{} restart in {} ms after last restart, may recover ok, reset " + LOG_INFO("{}({}) restart in {} ms after last restart, may recover ok, reset " "it's unstable count({}) to 0", - beacon.from_addr, + hp_from_node, + beacon.from_node, beacon.start_time - w.last_start_time_ms, w.unstable_restart_count); w.unstable_restart_count = 0; @@ -272,7 +284,9 @@ bool meta_server_failure_detector::update_stability_stat(const fd::beacon_msg &b w.last_start_time_ms = beacon.start_time; } else { - LOG_WARNING("{}: possible encounter a staled message, ignore it", beacon.from_addr); + LOG_WARNING("{}({}): possible encounter a staled message, ignore it", + hp_from_node, + beacon.from_node); } return w.unstable_restart_count < FLAGS_max_succssive_unstable_restart; } @@ -281,50 +295,61 @@ bool meta_server_failure_detector::update_stability_stat(const fd::beacon_msg &b void meta_server_failure_detector::on_ping(const fd::beacon_msg &beacon, rpc_replier &reply) { - fd::beacon_ack ack; - ack.time = beacon.time; - ack.this_node = beacon.to_addr; - ack.allowed = true; + host_port hp_from_node, hp_to_node; + GET_HOST_PORT(beacon, from_node, hp_from_node); + GET_HOST_PORT(beacon, to_node, hp_to_node); if (beacon.__isset.start_time && !update_stability_stat(beacon)) { - LOG_WARNING("{} is unstable, don't response to it's beacon", beacon.from_addr); + LOG_WARNING( + "{}({}) is unstable, don't response to it's beacon", beacon.from_node, hp_from_node); return; } - dsn::rpc_address leader; + fd::beacon_ack ack; + ack.time = beacon.time; + ack.this_node = beacon.to_node; + ack.allowed = true; + ack.__set_hp_this_node(hp_to_node); + + dsn::host_port leader; if (!get_leader(&leader)) { ack.is_master = false; - ack.primary_node = leader; + ack.primary_node = dsn::dns_resolver::instance().resolve_address(leader); + ack.__set_hp_primary_node(leader); } else { ack.is_master = true; - ack.primary_node = beacon.to_addr; + ack.primary_node = beacon.to_node; + ack.__set_hp_primary_node(hp_to_node); failure_detector::on_ping_internal(beacon, ack); } - LOG_INFO("on_ping, beacon send time[{}], is_master({}), from_node({}), this_node({}), " - "primary_node({})", + LOG_INFO("on_ping, beacon send time[{}], is_master({}), from_node({}({})), this_node({}({})), " + "primary_node({}({}))", ack.time, ack.is_master ? "true" : "false", - beacon.from_addr, - ack.this_node, + hp_from_node, + beacon.from_node, + hp_to_node, + beacon.to_node, + ack.hp_primary_node, ack.primary_node); reply(ack); } /*the following functions are only for test*/ -meta_server_failure_detector::meta_server_failure_detector(rpc_address leader_address, +meta_server_failure_detector::meta_server_failure_detector(host_port leader_host_port, bool is_myself_leader) { - LOG_INFO("set {} as leader", leader_address); + LOG_INFO("set {} as leader", leader_host_port); _lock_svc = nullptr; _is_leader.store(is_myself_leader); } -void meta_server_failure_detector::set_leader_for_test(rpc_address leader_address, +void meta_server_failure_detector::set_leader_for_test(host_port leader_host_port, bool is_myself_leader) { - LOG_INFO("set {} as leader", leader_address); + LOG_INFO("set {} as leader", leader_host_port); _is_leader.store(is_myself_leader); } diff --git a/src/meta/meta_server_failure_detector.h b/src/meta/meta_server_failure_detector.h index e6cf1e3b51..2b9d780a15 100644 --- a/src/meta/meta_server_failure_detector.h +++ b/src/meta/meta_server_failure_detector.h @@ -34,7 +34,7 @@ #include "failure_detector/failure_detector.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "utils/fmt_logging.h" #include "utils/zlocks.h" @@ -70,22 +70,22 @@ class meta_server_failure_detector : public fd::failure_detector { } }; - typedef std::map stability_map; + typedef std::map stability_map; public: meta_server_failure_detector(meta_service *svc); virtual ~meta_server_failure_detector(); // get the meta-server's leader - // leader: the leader's address. Invalid if no leader selected + // leader: the leader's host_port. Invalid if no leader selected // if leader==nullptr, then the new leader won't be returned // ret true if i'm the current leader; false if not. - bool get_leader(/*output*/ dsn::rpc_address *leader); + bool get_leader(/*output*/ dsn::host_port *leader); // return if acquire the leader lock, or-else blocked forever void acquire_leader_lock(); - void reset_stability_stat(const dsn::rpc_address &node); + void reset_stability_stat(const dsn::host_port &node); // _fd_opts is initialized in constructor with a fd_suboption stored in meta_service. // so usually you don't need to call this. @@ -94,18 +94,18 @@ class meta_server_failure_detector : public fd::failure_detector void set_options(fd_suboptions *options) { _fd_opts = options; } // client side - virtual void on_master_disconnected(const std::vector &) + virtual void on_master_disconnected(const std::vector &) { CHECK(false, "unsupported method"); } - virtual void on_master_connected(rpc_address) { CHECK(false, "unsupported method"); } + virtual void on_master_connected(host_port) { CHECK(false, "unsupported method"); } // server side // it is in the protection of failure_detector::_lock - virtual void on_worker_disconnected(const std::vector &nodes) override; + virtual void on_worker_disconnected(const std::vector &nodes) override; // it is in the protection of failure_detector::_lock - virtual void on_worker_connected(rpc_address node) override; - virtual bool is_worker_connected(rpc_address node) const override + virtual void on_worker_connected(host_port node) override; + virtual bool is_worker_connected(host_port node) const override { // we treat all nodes not in the worker list alive in the first grace period. // For the reason, please consider this situation: @@ -122,7 +122,7 @@ class meta_server_failure_detector : public fd::failure_detector virtual void on_ping(const fd::beacon_msg &beacon, rpc_replier &reply) override; private: - // return value: return true if beacon.from_addr is stable; or-else, false + // return value: return true if beacon.from_node is stable; or-else, false bool update_stability_stat(const fd::beacon_msg &beacon); void leader_initialize(const std::string &lock_service_owner); @@ -150,8 +150,8 @@ class meta_server_failure_detector : public fd::failure_detector public: /* these two functions are for test */ - meta_server_failure_detector(rpc_address leader_address, bool is_myself_leader); - void set_leader_for_test(rpc_address leader_address, bool is_myself_leader); + meta_server_failure_detector(host_port leader_host_port, bool is_myself_leader); + void set_leader_for_test(host_port leader_host_port, bool is_myself_leader); stability_map *get_stability_map_for_test(); }; } diff --git a/src/meta/meta_service.cpp b/src/meta/meta_service.cpp index 3f556a4cf1..8c2b38b9ca 100644 --- a/src/meta/meta_service.cpp +++ b/src/meta/meta_service.cpp @@ -57,6 +57,7 @@ #include "partition_split_types.h" #include "ranger/ranger_resource_policy_manager.h" #include "remote_cmd/remote_command.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" #include "runtime/task/async_calls.h" #include "server_load_balancer.h" @@ -249,7 +250,7 @@ error_code meta_service::remote_storage_initialize() } // visited in protection of failure_detector::_lock -void meta_service::set_node_state(const std::vector &nodes, bool is_alive) +void meta_service::set_node_state(const std::vector &nodes, bool is_alive) { for (auto &node : nodes) { if (is_alive) { @@ -268,16 +269,15 @@ void meta_service::set_node_state(const std::vector &nodes, bool is if (!_started) { return; } - for (const rpc_address &address : nodes) { - tasking::enqueue( - LPC_META_STATE_HIGH, - nullptr, - std::bind(&server_state::on_change_node_state, _state.get(), address, is_alive), - server_state::sStateHash); + for (const auto &hp : nodes) { + tasking::enqueue(LPC_META_STATE_HIGH, + nullptr, + std::bind(&server_state::on_change_node_state, _state.get(), hp, is_alive), + server_state::sStateHash); } } -void meta_service::get_node_state(/*out*/ std::map &all_nodes) +void meta_service::get_node_state(/*out*/ std::map &all_nodes) { zauto_lock l(_failure_detector->_lock); for (auto &node : _alive_set) @@ -331,7 +331,7 @@ void meta_service::start_service() METRIC_VAR_SET(alive_replica_servers, _alive_set.size()); - for (const dsn::rpc_address &node : _alive_set) { + for (const auto &node : _alive_set) { // sync alive set and the failure_detector _failure_detector->unregister_worker(node); _failure_detector->register_worker(node, true); @@ -343,13 +343,13 @@ void meta_service::start_service() _access_controller = security::create_meta_access_controller(_ranger_resource_policy_manager); _started = true; - for (const dsn::rpc_address &node : _alive_set) { + for (const auto &node : _alive_set) { tasking::enqueue(LPC_META_STATE_HIGH, nullptr, std::bind(&server_state::on_change_node_state, _state.get(), node, true), server_state::sStateHash); } - for (const dsn::rpc_address &node : _dead_set) { + for (const auto &node : _dead_set) { tasking::enqueue(LPC_META_STATE_HIGH, nullptr, std::bind(&server_state::on_change_node_state, _state.get(), node, false), @@ -417,7 +417,8 @@ error_code meta_service::start() _failure_detector->acquire_leader_lock(); CHECK(_failure_detector->get_leader(nullptr), "must be primary at this point"); - LOG_INFO("{} got the primary lock, start to recover server state from remote storage", + LOG_INFO("{}({}) got the primary lock, start to recover server state from remote storage", + dsn_primary_host_port(), dsn_primary_address()); // initialize the load balancer @@ -563,10 +564,9 @@ void meta_service::register_rpc_handlers() &meta_service::on_set_max_replica_count); } -meta_leader_state meta_service::check_leader(dsn::message_ex *req, - dsn::rpc_address *forward_address) +meta_leader_state meta_service::check_leader(dsn::message_ex *req, dsn::host_port *forward_address) { - dsn::rpc_address leader; + host_port leader; if (!_failure_detector->get_leader(&leader)) { if (!req->header->context.u.is_forward_supported) { if (forward_address != nullptr) @@ -576,11 +576,11 @@ meta_leader_state meta_service::check_leader(dsn::message_ex *req, LOG_DEBUG("leader address: {}", leader); if (!leader.is_invalid()) { - dsn_rpc_forward(req, leader); + dsn_rpc_forward(req, dsn::dns_resolver::instance().resolve_address(leader)); return meta_leader_state::kNotLeaderAndCanForwardRpc; } else { if (forward_address != nullptr) - forward_address->set_invalid(); + forward_address->reset(); return meta_leader_state::kNotLeaderAndCannotForwardRpc; } } @@ -693,7 +693,8 @@ void meta_service::on_list_nodes(configuration_list_nodes_rpc rpc) if (request.status == node_status::NS_INVALID || request.status == node_status::NS_ALIVE) { info.status = node_status::NS_ALIVE; for (auto &node : _alive_set) { - info.address = node; + info.address = dsn::dns_resolver::instance().resolve_address(node); + info.__set_hp_address(node); response.infos.push_back(info); } } @@ -701,7 +702,8 @@ void meta_service::on_list_nodes(configuration_list_nodes_rpc rpc) request.status == node_status::NS_UNALIVE) { info.status = node_status::NS_UNALIVE; for (auto &node : _dead_set) { - info.address = node; + info.address = dsn::dns_resolver::instance().resolve_address(node); + info.__set_hp_address(node); response.infos.push_back(info); } } @@ -726,7 +728,7 @@ void meta_service::on_query_cluster_info(configuration_cluster_info_rpc rpc) response.values.push_back(oss.str()); response.keys.push_back("primary_meta_server"); - response.values.push_back(dsn_primary_address().to_string()); + response.values.push_back(dsn_primary_host_port().to_string()); response.keys.push_back("zookeeper_hosts"); response.values.push_back(FLAGS_hosts_list); response.keys.push_back("zookeeper_root"); @@ -753,11 +755,12 @@ void meta_service::on_query_cluster_info(configuration_cluster_info_rpc rpc) void meta_service::on_query_configuration_by_index(configuration_query_by_index_rpc rpc) { query_cfg_response &response = rpc.response(); - rpc_address forward_address; - if (!check_status_and_authz(rpc, &forward_address)) { - if (!forward_address.is_invalid()) { + host_port forward_hp; + if (!check_status_and_authz(rpc, &forward_hp)) { + if (!forward_hp.is_invalid()) { partition_configuration config; - config.primary = forward_address; + config.primary = dsn::dns_resolver::instance().resolve_address(forward_hp); + config.__set_hp_primary(forward_hp); response.partitions.push_back(std::move(config)); } return; @@ -873,7 +876,8 @@ void meta_service::on_start_recovery(configuration_recovery_rpc rpc) } else { zauto_write_lock l(_meta_lock); if (_started.load()) { - LOG_INFO("service({}) is already started, ignore the recovery request", + LOG_INFO("service({}({})) is already started, ignore the recovery request", + dsn_primary_host_port(), dsn_primary_address()); response.err = ERR_SERVICE_ALREADY_RUNNING; } else { diff --git a/src/meta/meta_service.h b/src/meta/meta_service.h index 4b531a9c64..1db53b8371 100644 --- a/src/meta/meta_service.h +++ b/src/meta/meta_service.h @@ -47,8 +47,9 @@ #include "meta_rpc_types.h" #include "meta_server_failure_detector.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/network.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "security/access_controller.h" @@ -172,20 +173,20 @@ class meta_service : public serverlet { dsn_rpc_reply(response); } - virtual void send_message(const rpc_address &target, dsn::message_ex *request) + virtual void send_message(const host_port &target, dsn::message_ex *request) { - dsn_rpc_call_one_way(target, request); + dsn_rpc_call_one_way(dsn::dns_resolver::instance().resolve_address(target), request); } virtual void send_request(dsn::message_ex * /*req*/, - const rpc_address &target, + const host_port &target, const rpc_response_task_ptr &callback) { - dsn_rpc_call(target, callback); + dsn_rpc_call(dsn::dns_resolver::instance().resolve_address(target), callback); } // these two callbacks are running in fd's thread_pool, and in fd's lock - void set_node_state(const std::vector &nodes_list, bool is_alive); - void get_node_state(/*out*/ std::map &all_nodes); + void set_node_state(const std::vector &nodes_list, bool is_alive); + void get_node_state(/*out*/ std::map &all_nodes); void start_service(); void balancer_run(); @@ -287,9 +288,9 @@ class meta_service : public serverlet // if return 'kNotLeaderAndCannotForwardRpc' and 'forward_address' != nullptr, then return // leader by 'forward_address'. - meta_leader_state check_leader(dsn::message_ex *req, dsn::rpc_address *forward_address); + meta_leader_state check_leader(dsn::message_ex *req, dsn::host_port *forward_address); template - meta_leader_state check_leader(TRpcHolder rpc, /*out*/ rpc_address *forward_address); + meta_leader_state check_leader(TRpcHolder rpc, /*out*/ host_port *forward_address); // app_name: when the Ranger ACL is enabled, some rpc requests need to verify the app_name // ret: @@ -297,7 +298,7 @@ class meta_service : public serverlet // true: rpc request check and authentication succeed template bool check_status_and_authz(TRpcHolder rpc, - /*out*/ rpc_address *forward_address = nullptr, + /*out*/ host_port *forward_address = nullptr, const std::string &app_name = ""); // app_name: when the Ranger ACL is enabled, some rpc requests need to verify the app_name @@ -312,7 +313,7 @@ class meta_service : public serverlet bool check_status_and_authz_with_reply(message_ex *msg); template - bool check_leader_status(TRpcHolder rpc, rpc_address *forward_address = nullptr); + bool check_leader_status(TRpcHolder rpc, host_port *forward_address = nullptr); error_code remote_storage_initialize(); bool check_freeze() const; @@ -363,8 +364,8 @@ class meta_service : public serverlet // [ // this is protected by failure_detector::_lock - std::set _alive_set; - std::set _dead_set; + std::set _alive_set; + std::set _dead_set; // ] mutable zrwlock_nr _meta_lock; @@ -391,9 +392,9 @@ class meta_service : public serverlet }; template -meta_leader_state meta_service::check_leader(TRpcHolder rpc, rpc_address *forward_address) +meta_leader_state meta_service::check_leader(TRpcHolder rpc, host_port *forward_address) { - dsn::rpc_address leader; + host_port leader; if (!_failure_detector->get_leader(&leader)) { if (!rpc.dsn_request()->header->context.u.is_forward_supported) { if (forward_address != nullptr) @@ -401,13 +402,12 @@ meta_leader_state meta_service::check_leader(TRpcHolder rpc, rpc_address *forwar return meta_leader_state::kNotLeaderAndCannotForwardRpc; } - LOG_DEBUG("leader address: {}", leader); if (!leader.is_invalid()) { - rpc.forward(leader); + rpc.forward(dsn::dns_resolver::instance().resolve_address(leader)); return meta_leader_state::kNotLeaderAndCanForwardRpc; } else { if (forward_address != nullptr) - forward_address->set_invalid(); + forward_address->reset(); return meta_leader_state::kNotLeaderAndCannotForwardRpc; } } @@ -415,7 +415,7 @@ meta_leader_state meta_service::check_leader(TRpcHolder rpc, rpc_address *forwar } template -bool meta_service::check_leader_status(TRpcHolder rpc, rpc_address *forward_address) +bool meta_service::check_leader_status(TRpcHolder rpc, host_port *forward_address) { auto result = check_leader(rpc, forward_address); if (result == meta_leader_state::kNotLeaderAndCanForwardRpc) @@ -439,7 +439,7 @@ bool meta_service::check_leader_status(TRpcHolder rpc, rpc_address *forward_addr // above policy information may be out of date. template bool meta_service::check_status_and_authz(TRpcHolder rpc, - rpc_address *forward_address, + host_port *forward_address, const std::string &app_name) { if (!check_leader_status(rpc, forward_address)) { diff --git a/src/meta/meta_split_service.cpp b/src/meta/meta_split_service.cpp index 33610fc6a4..1fc770ae99 100644 --- a/src/meta/meta_split_service.cpp +++ b/src/meta/meta_split_service.cpp @@ -304,10 +304,12 @@ void meta_split_service::on_add_child_on_remote_storage_reply(error_code ec, update_child_request->config = request.child_config; update_child_request->info = *app; update_child_request->type = config_type::CT_REGISTER_CHILD; - update_child_request->node = request.primary_address; + update_child_request->node = request.primary; + update_child_request->__set_hp_node(request.hp_primary); partition_configuration child_config = app->partitions[child_gpid.get_partition_index()]; child_config.secondaries = request.child_config.secondaries; + child_config.__set_hp_secondaries(request.child_config.hp_secondaries); _state->update_configuration_locally(*app, update_child_request); if (parent_context.msg) { diff --git a/src/meta/partition_guardian.cpp b/src/meta/partition_guardian.cpp index 92a4c167da..7ca8210c50 100644 --- a/src/meta/partition_guardian.cpp +++ b/src/meta/partition_guardian.cpp @@ -28,11 +28,14 @@ #include "common/replication_common.h" #include "common/replication_other_types.h" +#include "meta/greedy_load_balancer.h" // IWYU pragma: keep #include "meta/meta_data.h" #include "meta/meta_service.h" #include "meta/server_load_balancer.h" #include "meta/server_state.h" #include "meta/table_metrics.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" #include "utils/flags.h" #include "utils/fmt_logging.h" #include "utils/metrics.h" @@ -80,11 +83,11 @@ pc_status partition_guardian::cure(meta_view view, CHECK(acts.empty(), ""); pc_status status; - if (pc.primary.is_invalid()) + if (pc.hp_primary.is_invalid()) status = on_missing_primary(view, gpid); - else if (static_cast(pc.secondaries.size()) + 1 < pc.max_replica_count) + else if (static_cast(pc.hp_secondaries.size()) + 1 < pc.max_replica_count) status = on_missing_secondary(view, gpid); - else if (static_cast(pc.secondaries.size()) >= pc.max_replica_count) + else if (static_cast(pc.hp_secondaries.size()) >= pc.max_replica_count) status = on_redundant_secondary(view, gpid); else status = pc_status::healthy; @@ -119,22 +122,23 @@ void partition_guardian::reconfig(meta_view view, const configuration_update_req if (request.type == config_type::CT_DROP_PARTITION) { cc->serving.clear(); - const std::vector &config_dropped = request.config.last_drops; - for (const rpc_address &drop_node : config_dropped) { + const std::vector &config_dropped = request.config.hp_last_drops; + for (const auto &drop_node : config_dropped) { cc->record_drop_history(drop_node); } } else { when_update_replicas(request.type, [cc, &request](bool is_adding) { if (is_adding) { - cc->remove_from_dropped(request.node); + cc->remove_from_dropped(request.hp_node); // when some replicas are added to partition_config // we should try to adjust the size of drop_list cc->check_size(); } else { - cc->remove_from_serving(request.node); + cc->remove_from_serving(request.hp_node); - CHECK(cc->record_drop_history(request.node), - "node({}) has been in the dropped", + CHECK(cc->record_drop_history(request.hp_node), + "node({}({})) has been in the dropped", + request.hp_node, request.node); } }); @@ -154,64 +158,63 @@ bool partition_guardian::from_proposals(meta_view &view, return false; } action = *(cc.lb_actions.front()); - char reason[1024]; - if (action.target.is_invalid()) { - sprintf(reason, "action target is invalid"); + std::string reason; + if (action.target.is_invalid() || action.hp_target.is_invalid()) { + reason = "action target is invalid"; goto invalid_action; } - if (action.node.is_invalid()) { - sprintf(reason, "action node is invalid"); + if (action.node.is_invalid() || action.hp_node.is_invalid()) { + reason = "action node is invalid"; goto invalid_action; } - if (!is_node_alive(*(view.nodes), action.target)) { - sprintf(reason, "action target(%s) is not alive", action.target.to_string()); + if (!is_node_alive(*(view.nodes), action.hp_target)) { + reason = fmt::format("action target({}) is not alive", action.hp_target); goto invalid_action; } - if (!is_node_alive(*(view.nodes), action.node)) { - sprintf(reason, "action node(%s) is not alive", action.node.to_string()); + if (!is_node_alive(*(view.nodes), action.hp_node)) { + reason = fmt::format("action node({}) is not alive", action.hp_node); goto invalid_action; } if (cc.lb_actions.is_abnormal_learning_proposal()) { - sprintf(reason, "learning process abnormal"); + reason = "learning process abnormal"; goto invalid_action; } switch (action.type) { case config_type::CT_ASSIGN_PRIMARY: - is_action_valid = (action.node == action.target && pc.primary.is_invalid() && - !is_secondary(pc, action.node)); + is_action_valid = (action.hp_node == action.hp_target && pc.primary.is_invalid() && + !is_secondary(pc, action.hp_node)); break; case config_type::CT_UPGRADE_TO_PRIMARY: - is_action_valid = (action.node == action.target && pc.primary.is_invalid() && - is_secondary(pc, action.node)); + is_action_valid = (action.hp_node == action.hp_target && pc.primary.is_invalid() && + is_secondary(pc, action.hp_node)); break; case config_type::CT_ADD_SECONDARY: case config_type::CT_ADD_SECONDARY_FOR_LB: - is_action_valid = (is_primary(pc, action.target) && !is_secondary(pc, action.node)); - is_action_valid = (is_action_valid && is_node_alive(*(view.nodes), action.node)); + is_action_valid = (is_primary(pc, action.hp_target) && !is_secondary(pc, action.hp_node)); + is_action_valid = (is_action_valid && is_node_alive(*(view.nodes), action.hp_node)); break; case config_type::CT_DOWNGRADE_TO_INACTIVE: case config_type::CT_REMOVE: - is_action_valid = (is_primary(pc, action.target) && is_member(pc, action.node)); + is_action_valid = (is_primary(pc, action.hp_target) && is_member(pc, action.hp_node)); break; case config_type::CT_DOWNGRADE_TO_SECONDARY: - is_action_valid = (action.target == action.node && is_primary(pc, action.target)); + is_action_valid = (action.hp_target == action.hp_node && is_primary(pc, action.hp_target)); break; default: is_action_valid = false; break; } - if (is_action_valid) + if (is_action_valid) { return true; - else - sprintf(reason, "action is invalid"); + } else { + reason = "action is invalid"; + } invalid_action: - std::stringstream ss; - ss << action; LOG_INFO("proposal action({}) for gpid({}) is invalid, clear all proposal actions: {}", - ss.str(), + action, gpid, reason); action.type = config_type::CT_INVALID; @@ -239,42 +242,45 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi action.type = config_type::CT_INVALID; // try to upgrade a secondary to primary if the primary is missing - if (pc.secondaries.size() > 0) { + if (pc.hp_secondaries.size() > 0) { action.node.set_invalid(); + action.__set_hp_node(host_port()); - for (int i = 0; i < pc.secondaries.size(); ++i) { - node_state *ns = get_node_state(*(view.nodes), pc.secondaries[i], false); - CHECK_NOTNULL(ns, "invalid secondary address, address = {}", pc.secondaries[i]); + for (int i = 0; i < pc.hp_secondaries.size(); ++i) { + auto ns = get_node_state(*(view.nodes), pc.hp_secondaries[i], false); + CHECK_NOTNULL(ns, "invalid secondary address, address = {}", pc.hp_secondaries[i]); if (!ns->alive()) continue; // find a node with minimal primaries newly_partitions *np = newly_partitions_ext::get_inited(ns); - if (action.node.is_invalid() || - np->less_primaries(*get_newly_partitions(*(view.nodes), action.node), + if (action.hp_node.is_invalid() || + np->less_primaries(*get_newly_partitions(*(view.nodes), action.hp_node), gpid.get_app_id())) { - action.node = ns->addr(); + action.node = dsn::dns_resolver::instance().resolve_address(ns->host_port()); + action.__set_hp_node(ns->host_port()); } } - if (action.node.is_invalid()) { + if (action.hp_node.is_invalid()) { LOG_ERROR( "all nodes for gpid({}) are dead, waiting for some secondary to come back....", gpid_name); result = pc_status::dead; } else { action.type = config_type::CT_UPGRADE_TO_PRIMARY; - newly_partitions *np = get_newly_partitions(*(view.nodes), action.node); + newly_partitions *np = get_newly_partitions(*(view.nodes), action.hp_node); np->newly_add_primary(gpid.get_app_id(), true); action.target = action.node; + action.hp_target = action.hp_node; result = pc_status::ill; } } // if nothing in the last_drops, it means that this is a newly created partition, so let's // just find a node and assign primary for it. - else if (pc.last_drops.empty()) { - dsn::rpc_address min_primary_server; + else if (pc.hp_last_drops.empty()) { + dsn::host_port min_primary_server; newly_partitions *min_primary_server_np = nullptr; for (auto &pairs : *view.nodes) { @@ -285,14 +291,16 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi // find a node which has minimal primaries if (min_primary_server_np == nullptr || np->less_primaries(*min_primary_server_np, gpid.get_app_id())) { - min_primary_server = ns.addr(); + min_primary_server = ns.host_port(); min_primary_server_np = np; } } if (min_primary_server_np != nullptr) { - action.node = min_primary_server; + action.node = dsn::dns_resolver::instance().resolve_address(min_primary_server); + action.__set_hp_node(min_primary_server); action.target = action.node; + action.__set_hp_target(action.hp_node); action.type = config_type::CT_ASSIGN_PRIMARY; min_primary_server_np->newly_add_primary(gpid.get_app_id(), false); } @@ -310,6 +318,7 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi std::string reason; config_context &cc = *get_config_context(*view.apps, gpid); action.node.set_invalid(); + action.__set_hp_node(host_port()); for (int i = 0; i < cc.dropped.size(); ++i) { const dropped_replica &dr = cc.dropped[i]; char time_buf[30] = {0}; @@ -327,28 +336,31 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi dr.last_prepared_decree); } - for (int i = 0; i < pc.last_drops.size(); ++i) { + for (int i = 0; i < pc.hp_last_drops.size(); ++i) { int dropped_index = -1; for (int k = 0; k < cc.dropped.size(); k++) { - if (cc.dropped[k].node == pc.last_drops[i]) { + if (cc.dropped[k].node == pc.hp_last_drops[i]) { dropped_index = k; break; } } - LOG_INFO("{}: config_context.last_drops[{}]: node({}), dropped_index({})", + LOG_INFO("{}: config_context.last_drops[{}({})]: node({}), dropped_index({})", gpid_name, i, + pc.hp_last_drops[i], pc.last_drops[i], dropped_index); } - if (pc.last_drops.size() == 1) { - LOG_WARNING("{}: the only node({}) is dead, waiting it to come back", + if (pc.hp_last_drops.size() == 1) { + LOG_WARNING("{}: the only node({}({})) is dead, waiting it to come back", gpid_name, + pc.hp_last_drops.back(), pc.last_drops.back()); + action.hp_node = pc.hp_last_drops.back(); action.node = pc.last_drops.back(); } else { - std::vector nodes(pc.last_drops.end() - 2, pc.last_drops.end()); + std::vector nodes(pc.hp_last_drops.end() - 2, pc.hp_last_drops.end()); std::vector collected_info(2); bool ready = true; @@ -365,7 +377,7 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi fmt::format("the last dropped node({}) hasn't come back yet", nodes[i]); LOG_WARNING("{}: don't select primary: {}", gpid_name, reason); } else { - std::vector::iterator it = cc.find_from_dropped(nodes[i]); + const auto &it = cc.find_from_dropped(nodes[i]); if (it == cc.dropped.end() || it->ballot == invalid_ballot) { if (ns->has_collected()) { LOG_INFO("{}: ignore {}'s replica info as it doesn't exist on " @@ -413,18 +425,22 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi if (larger_pd >= pc.last_committed_decree && larger_pd >= larger_cd) { if (gap1 != 0) { // 1. choose node with larger ballot - action.node = gap1 < 0 ? recent_dead.node : previous_dead.node; + action.hp_node = gap1 < 0 ? recent_dead.node : previous_dead.node; } else if (gap2 != 0) { // 2. choose node with larger last_committed_decree - action.node = gap2 < 0 ? recent_dead.node : previous_dead.node; + action.hp_node = gap2 < 0 ? recent_dead.node : previous_dead.node; } else { // 3. choose node with larger last_prepared_decree - action.node = previous_dead.last_prepared_decree > - recent_dead.last_prepared_decree - ? previous_dead.node - : recent_dead.node; + action.hp_node = previous_dead.last_prepared_decree > + recent_dead.last_prepared_decree + ? previous_dead.node + : recent_dead.node; } - LOG_INFO("{}: select {} as a new primary", gpid_name, action.node); + action.node = dsn::dns_resolver::instance().resolve_address(action.hp_node); + LOG_INFO("{}: select {}({}) as a new primary", + gpid_name, + action.hp_node, + action.node); } else { char buf[1000]; sprintf(buf, @@ -444,11 +460,12 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi } } - if (!action.node.is_invalid()) { + if (!action.hp_node.is_invalid()) { + action.__set_hp_target(action.hp_node); action.target = action.node; action.type = config_type::CT_ASSIGN_PRIMARY; - get_newly_partitions(*view.nodes, action.node) + get_newly_partitions(*view.nodes, action.hp_node) ->newly_add_primary(gpid.get_app_id(), false); } else { LOG_WARNING("{}: don't select any node for security reason, administrator can select " @@ -462,7 +479,8 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi for (int i = 0; i < cc.dropped.size(); ++i) { const dropped_replica &dr = cc.dropped[i]; ddd_node_info ninfo; - ninfo.node = dr.node; + ninfo.node = dsn::dns_resolver::instance().resolve_address(dr.node); + ninfo.__set_hp_node(dr.node); ninfo.drop_time_ms = dr.time; ninfo.ballot = invalid_ballot; ninfo.last_committed_decree = invalid_decree; @@ -529,6 +547,7 @@ pc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::g is_emergency = true; } action.node.set_invalid(); + action.__set_hp_node(host_port()); if (is_emergency) { std::ostringstream oss; @@ -562,7 +581,8 @@ pc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::g cc.prefered_dropped, cc.prefered_dropped, cc.prefered_dropped - 1); - action.node = server.node; + action.hp_node = server.node; + action.node = dsn::dns_resolver::instance().resolve_address(server.node); cc.prefered_dropped--; break; } else { @@ -577,30 +597,31 @@ pc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::g } } - if (action.node.is_invalid() || in_black_list(action.node)) { - if (!action.node.is_invalid()) { + if (action.hp_node.is_invalid() || in_black_list(action.hp_node)) { + if (!action.hp_node.is_invalid()) { LOG_INFO("gpid({}) refuse to use selected node({}) as it is in black list", gpid, - action.node); + action.hp_node); } newly_partitions *min_server_np = nullptr; for (auto &pairs : *view.nodes) { node_state &ns = pairs.second; - if (!ns.alive() || is_member(pc, ns.addr()) || in_black_list(ns.addr())) + if (!ns.alive() || is_member(pc, ns.host_port()) || in_black_list(ns.host_port())) continue; newly_partitions *np = newly_partitions_ext::get_inited(&ns); if (min_server_np == nullptr || np->less_partitions(*min_server_np, gpid.get_app_id())) { - action.node = ns.addr(); + action.__set_hp_node(ns.host_port()); + action.node = dsn::dns_resolver::instance().resolve_address(ns.host_port()); min_server_np = np; } } - if (!action.node.is_invalid()) { + if (!action.hp_node.is_invalid()) { LOG_INFO("gpid({}): can't find valid node in dropped list to add as secondary, " "choose new node({}) with minimal partitions serving", gpid, - action.node); + action.hp_node); } else { LOG_INFO("gpid({}): can't find valid node in dropped list to add as secondary, " "but also we can't find a new node to add as secondary", @@ -612,10 +633,11 @@ pc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::g const dropped_replica &server = cc.dropped.back(); if (is_node_alive(*view.nodes, server.node)) { CHECK(!server.node.is_invalid(), "invalid server address, address = {}", server.node); - action.node = server.node; + action.hp_node = server.node; + action.node = dsn::dns_resolver::instance().resolve_address(server.node); } - if (!action.node.is_invalid()) { + if (!action.hp_node.is_invalid()) { LOG_INFO("gpid({}): choose node({}) as secondary coz it is last_dropped_node and is " "alive now", gpid, @@ -628,11 +650,12 @@ pc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::g } } - if (!action.node.is_invalid()) { + if (!action.hp_node.is_invalid()) { action.type = config_type::CT_ADD_SECONDARY; action.target = pc.primary; + action.__set_hp_target(pc.hp_primary); - newly_partitions *np = get_newly_partitions(*(view.nodes), action.node); + newly_partitions *np = get_newly_partitions(*(view.nodes), action.hp_node); CHECK_NOTNULL(np, ""); np->newly_add_partition(gpid.get_app_id()); @@ -647,9 +670,9 @@ pc_status partition_guardian::on_redundant_secondary(meta_view &view, const dsn: const node_mapper &nodes = *(view.nodes); const partition_configuration &pc = *get_config(*(view.apps), gpid); int target = 0; - int load = nodes.find(pc.secondaries.front())->second.partition_count(); - for (int i = 0; i != pc.secondaries.size(); ++i) { - int l = nodes.find(pc.secondaries[i])->second.partition_count(); + int load = nodes.find(pc.hp_secondaries.front())->second.partition_count(); + for (int i = 0; i != pc.hp_secondaries.size(); ++i) { + int l = nodes.find(pc.hp_secondaries[i])->second.partition_count(); if (l > load) { load = l; target = i; @@ -660,6 +683,8 @@ pc_status partition_guardian::on_redundant_secondary(meta_view &view, const dsn: action.type = config_type::CT_REMOVE; action.node = pc.secondaries[target]; action.target = pc.primary; + action.hp_node = pc.hp_secondaries[target]; + action.hp_target = pc.hp_primary; // TODO: treat remove as cure proposals too get_config_context(*view.apps, gpid)->lb_actions.assign_balancer_proposals({action}); @@ -670,11 +695,11 @@ void partition_guardian::finish_cure_proposal(meta_view &view, const dsn::gpid &gpid, const configuration_proposal_action &act) { - newly_partitions *np = get_newly_partitions(*(view.nodes), act.node); + newly_partitions *np = get_newly_partitions(*(view.nodes), act.hp_node); if (np == nullptr) { LOG_INFO("can't get the newly_partitions extension structure for node({}), " "the node may be dead and removed", - act.node); + act.hp_node); } else { if (act.type == config_type::CT_ASSIGN_PRIMARY) { np->newly_remove_primary(gpid.get_app_id(), false); @@ -738,15 +763,15 @@ partition_guardian::ctrl_assign_secondary_black_list(const std::vector addr_list; + std::set hp_list; for (const std::string &s : ip_ports) { - const auto addr = rpc_address::from_host_port(s); - if (!addr) { + const auto hp = host_port::from_string(s); + if (!hp) { return invalid_arguments; } - addr_list.insert(addr); + hp_list.insert(hp); } - _assign_secondary_black_list = std::move(addr_list); + _assign_secondary_black_list = std::move(hp_list); return "set ok"; } diff --git a/src/meta/partition_guardian.h b/src/meta/partition_guardian.h index 612a56a9c1..ea387b6cbe 100644 --- a/src/meta/partition_guardian.h +++ b/src/meta/partition_guardian.h @@ -29,7 +29,7 @@ #include "dsn.layer2_types.h" #include "meta_admin_types.h" #include "meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/command_manager.h" #include "utils/zlocks.h" @@ -82,7 +82,7 @@ class partition_guardian _ddd_partitions[partition.config.pid] = std::move(partition); } - bool in_black_list(dsn::rpc_address addr) + bool in_black_list(dsn::host_port addr) { dsn::zauto_read_lock l(_black_list_lock); return _assign_secondary_black_list.count(addr) != 0; @@ -98,7 +98,7 @@ class partition_guardian // but when adding secondary, the black list is accessed in THREADPOOL_META_STATE // so we need a lock to protect it dsn::zrwlock_nr _black_list_lock; // [ - std::set _assign_secondary_black_list; + std::set _assign_secondary_black_list; // ] std::vector> _cmds; diff --git a/src/meta/server_load_balancer.cpp b/src/meta/server_load_balancer.cpp index 04b53f6720..219dc33f0a 100644 --- a/src/meta/server_load_balancer.cpp +++ b/src/meta/server_load_balancer.cpp @@ -32,6 +32,7 @@ #include "dsn.layer2_types.h" #include "meta/meta_data.h" #include "meta_admin_types.h" +#include "runtime/rpc/rpc_address.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" @@ -130,7 +131,7 @@ void newly_partitions::newly_remove_partition(int32_t app_id) --total_partitions; } -newly_partitions *get_newly_partitions(node_mapper &mapper, const dsn::rpc_address &addr) +newly_partitions *get_newly_partitions(node_mapper &mapper, const dsn::host_port &addr) { node_state *ns = get_node_state(mapper, addr, false); if (ns == nullptr) @@ -174,9 +175,10 @@ void server_load_balancer::register_proposals(meta_view view, // for these proposals, they should keep the target empty and // the meta-server will fill primary as target. if (act.target.is_invalid()) { - if (!pc.primary.is_invalid()) + if (!pc.hp_primary.is_invalid()) { act.target = pc.primary; - else { + act.__set_hp_target(pc.hp_primary); + } else { resp.err = ERR_INVALID_PARAMETERS; return; } diff --git a/src/meta/server_load_balancer.h b/src/meta/server_load_balancer.h index 774b7bb86a..7d03be5b9f 100644 --- a/src/meta/server_load_balancer.h +++ b/src/meta/server_load_balancer.h @@ -35,7 +35,7 @@ #include #include "meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/extensible_object.h" namespace dsn { @@ -83,7 +83,7 @@ class newly_partitions static void s_delete(void *_this); }; typedef dsn::object_extension_helper newly_partitions_ext; -newly_partitions *get_newly_partitions(node_mapper &mapper, const dsn::rpc_address &addr); +newly_partitions *get_newly_partitions(node_mapper &mapper, const dsn::host_port &addr); // The interface of the server load balancer which defines the scheduling policy of how to // place the partition replica to the nodes. @@ -165,10 +165,10 @@ class server_load_balancer virtual std::string get_balance_operation_count(const std::vector &args) = 0; public: - typedef std::function node_comparator; + typedef std::function node_comparator; static node_comparator primary_comparator(const node_mapper &nodes) { - return [&nodes](const rpc_address &r1, const rpc_address &r2) { + return [&nodes](const host_port &r1, const host_port &r2) { int p1 = nodes.find(r1)->second.primary_count(); int p2 = nodes.find(r2)->second.primary_count(); if (p1 != p2) @@ -179,7 +179,7 @@ class server_load_balancer static node_comparator partition_comparator(const node_mapper &nodes) { - return [&nodes](const rpc_address &r1, const rpc_address &r2) { + return [&nodes](const host_port &r1, const host_port &r2) { int p1 = nodes.find(r1)->second.partition_count(); int p2 = nodes.find(r2)->second.partition_count(); if (p1 != p2) diff --git a/src/meta/server_state.cpp b/src/meta/server_state.cpp index d5cb2294b1..0a5e4ccf69 100644 --- a/src/meta/server_state.cpp +++ b/src/meta/server_state.cpp @@ -60,7 +60,9 @@ #include "metadata_types.h" #include "replica_admin_types.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -540,6 +542,7 @@ error_code server_state::sync_apps_to_remote_storage() init_app_partition_node(app, i, init_callback); } } + tracker.wait_outstanding_tasks(); t = _meta_svc->get_remote_storage()->set_data(_apps_root, blob(unlock_state, 0, strlen(unlock_state)), @@ -570,6 +573,9 @@ dsn::error_code server_state::sync_apps_from_remote_storage() const blob &value) mutable { if (ec == ERR_OK) { partition_configuration pc; + pc.__isset.hp_secondaries = true; + pc.__isset.hp_last_drops = true; + pc.__isset.hp_primary = true; dsn::json::json_forwarder::decode(value, pc); CHECK(pc.pid.get_app_id() == app->app_id && @@ -578,8 +584,8 @@ dsn::error_code server_state::sync_apps_from_remote_storage() { zauto_write_lock l(_lock); app->partitions[partition_id] = pc; - for (const dsn::rpc_address &addr : pc.last_drops) { - app->helpers->contexts[partition_id].record_drop_history(addr); + for (const auto &hp : pc.hp_last_drops) { + app->helpers->contexts[partition_id].record_drop_history(hp); } if (app->status == app_status::AS_CREATING && @@ -723,11 +729,12 @@ void server_state::initialize_node_state() for (auto &app_pair : _all_apps) { app_state &app = *(app_pair.second); for (partition_configuration &pc : app.partitions) { - if (!pc.primary.is_invalid()) { - node_state *ns = get_node_state(_nodes, pc.primary, true); + if (!pc.hp_primary.is_invalid()) { + node_state *ns = get_node_state(_nodes, pc.hp_primary, true); ns->put_partition(pc.pid, true); } - for (auto &ep : pc.secondaries) { + + for (auto &ep : pc.hp_secondaries) { CHECK(!ep.is_invalid(), "invalid secondary address, addr = {}", ep); node_state *ns = get_node_state(_nodes, ep, true); ns->put_partition(pc.pid, false); @@ -790,7 +797,12 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) bool reject_this_request = false; response.__isset.gc_replicas = false; - LOG_INFO("got config sync request from {}, stored_replicas_count({})", + + host_port hp_node; + GET_HOST_PORT(request, node, hp_node); + + LOG_INFO("got config sync request from {}({}), stored_replicas_count({})", + hp_node, request.node, request.stored_replicas.size()); @@ -798,9 +810,9 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) zauto_read_lock l(_lock); // sync the partitions to the replica server - node_state *ns = get_node_state(_nodes, request.node, false); + node_state *ns = get_node_state(_nodes, hp_node, false); if (ns == nullptr) { - LOG_INFO("node({}) not found in meta server", request.node); + LOG_INFO("node({}({})) not found in meta server", hp_node, request.node); response.err = ERR_OBJECT_NOT_FOUND; } else { response.err = ERR_OK; @@ -819,7 +831,10 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) // when register child partition, stage is config_status::pending_remote_sync, // but cc.pending_sync_request is not set, see more in function // 'register_child_on_meta' - if (req == nullptr || req->node == request.node) + if (req == nullptr) + return false; + if ((req->__isset.hp_node && req->hp_node == hp_node) || + req->node == request.node) return false; } @@ -854,7 +869,8 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) // the app is deleted but not expired, we need to ignore it // if the app is deleted and expired, we need to gc it for (const replica_info &rep : replicas) { - LOG_DEBUG("receive stored replica from {}, pid({})", request.node, rep.pid); + LOG_DEBUG( + "receive stored replica from {}({}), pid({})", hp_node, request.node, rep.pid); std::shared_ptr app = get_app(rep.pid.get_app_id()); if (app == nullptr || rep.pid.get_partition_index() >= app->partition_count) { // This app has garbage partition after cancel split, the canceled child @@ -864,55 +880,63 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) rep.status == partition_status::PS_ERROR) { response.gc_replicas.push_back(rep); LOG_WARNING( - "notify node({}) to gc replica({}) because it is useless partition " + "notify node({}({})) to gc replica({}) because it is useless partition " "which is caused by cancel split", + hp_node, request.node, rep.pid); } else { // app is not recognized or partition is not recognized CHECK(false, - "gpid({}) on node({}) is not exist on meta server, administrator " + "gpid({}) on node({}({})) is not exist on meta server, administrator " "should check consistency of meta data", rep.pid, + hp_node, request.node); } } else if (app->status == app_status::AS_DROPPED) { if (app->expire_second == 0) { - LOG_INFO("gpid({}) on node({}) is of dropped table, but expire second is " - "not specified, do not delete it for safety reason", - rep.pid, - request.node); + LOG_INFO( + "gpid({}) on node({}({})) is of dropped table, but expire second is " + "not specified, do not delete it for safety reason", + rep.pid, + hp_node, + request.node); } else if (has_seconds_expired(app->expire_second)) { // can delete replica only when expire second is explicitely specified and // expired. if (level <= meta_function_level::fl_steady) { - LOG_INFO("gpid({}) on node({}) is of dropped and expired table, but " - "current function level is {}, do not delete it for safety " - "reason", - rep.pid, - request.node, - _meta_function_level_VALUES_TO_NAMES.find(level)->second); + LOG_INFO( + "gpid({}) on node({}({})) is of dropped and expired table, but " + "current function level is {}, do not delete it for safety " + "reason", + rep.pid, + hp_node, + request.node, + _meta_function_level_VALUES_TO_NAMES.find(level)->second); } else { response.gc_replicas.push_back(rep); - LOG_WARNING("notify node({}) to gc replica({}) coz the app is " + LOG_WARNING("notify node({}({})) to gc replica({}) coz the app is " "dropped and expired", + hp_node, request.node, rep.pid); } } } else if (app->status == app_status::AS_AVAILABLE) { - bool is_useful_replica = - collect_replica({&_all_apps, &_nodes}, request.node, rep); + bool is_useful_replica = collect_replica({&_all_apps, &_nodes}, hp_node, rep); if (!is_useful_replica) { if (level <= meta_function_level::fl_steady) { - LOG_INFO("gpid({}) on node({}) is useless, but current function " + LOG_INFO("gpid({}) on node({}({})) is useless, but current function " "level is {}, do not delete it for safety reason", rep.pid, + hp_node, request.node, _meta_function_level_VALUES_TO_NAMES.find(level)->second); } else { response.gc_replicas.push_back(rep); - LOG_WARNING("notify node({}) to gc replica({}) coz it is useless", + LOG_WARNING("notify node({}({})) to gc replica({}) coz it is useless", + hp_node, request.node, rep.pid); } @@ -930,8 +954,9 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) response.err = ERR_BUSY; response.partitions.clear(); } - LOG_INFO("send config sync response to {}, err({}), partitions_count({}), " + LOG_INFO("send config sync response to {}({}), err({}), partitions_count({}), " "gc_replicas_count({})", + hp_node, request.node, response.err, response.partitions.size(), @@ -1410,13 +1435,16 @@ void server_state::list_apps(const configuration_list_apps_request &request, response.err = dsn::ERR_OK; } -void server_state::send_proposal(rpc_address target, const configuration_update_request &proposal) +void server_state::send_proposal(host_port target, const configuration_update_request &proposal) { - LOG_INFO("send proposal {} for gpid({}), ballot = {}, target = {}, node = {}", + host_port hp_node; + GET_HOST_PORT(proposal, node, hp_node); + LOG_INFO("send proposal {} for gpid({}), ballot = {}, target = {}, node = {}({})", ::dsn::enum_to_string(proposal.type), proposal.config.pid, proposal.config.ballot, target, + hp_node, proposal.node); dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CONFIG_PROPOSAL, 0, proposal.config.pid.thread_hash()); @@ -1432,8 +1460,9 @@ void server_state::send_proposal(const configuration_proposal_action &action, request.info = app; request.type = action.type; request.node = action.node; + request.__set_hp_node(action.hp_node); request.config = pc; - send_proposal(action.target, request); + send_proposal(action.hp_target, request); } void server_state::request_check(const partition_configuration &old, @@ -1443,39 +1472,84 @@ void server_state::request_check(const partition_configuration &old, switch (request.type) { case config_type::CT_ASSIGN_PRIMARY: - CHECK_NE(old.primary, request.node); - CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == - old.secondaries.end(), - ""); + if (request.__isset.hp_node) { + CHECK_NE(old.hp_primary, request.hp_node); + CHECK(std::find(old.hp_secondaries.begin(), + old.hp_secondaries.end(), + request.hp_node) == old.hp_secondaries.end(), + ""); + } else { + CHECK_NE(old.primary, request.node); + CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == + old.secondaries.end(), + ""); + } break; case config_type::CT_UPGRADE_TO_PRIMARY: - CHECK_NE(old.primary, request.node); - CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) != - old.secondaries.end(), - ""); + if (request.__isset.hp_node) { + CHECK_NE(old.hp_primary, request.hp_node); + CHECK(std::find(old.hp_secondaries.begin(), + old.hp_secondaries.end(), + request.hp_node) != old.hp_secondaries.end(), + ""); + } else { + CHECK_NE(old.primary, request.node); + CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) != + old.secondaries.end(), + ""); + } break; case config_type::CT_DOWNGRADE_TO_SECONDARY: - CHECK_EQ(old.primary, request.node); - CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == - old.secondaries.end(), - ""); + if (request.__isset.hp_node) { + CHECK_EQ(old.hp_primary, request.hp_node); + CHECK(std::find(old.hp_secondaries.begin(), + old.hp_secondaries.end(), + request.hp_node) == old.hp_secondaries.end(), + ""); + } else { + CHECK_EQ(old.primary, request.node); + CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == + old.secondaries.end(), + ""); + } break; case config_type::CT_DOWNGRADE_TO_INACTIVE: case config_type::CT_REMOVE: - CHECK(old.primary == request.node || - std::find(old.secondaries.begin(), old.secondaries.end(), request.node) != - old.secondaries.end(), - ""); + if (request.__isset.hp_node) { + CHECK(old.hp_primary == request.hp_node || + std::find(old.hp_secondaries.begin(), + old.hp_secondaries.end(), + request.hp_node) != old.hp_secondaries.end(), + ""); + } else { + CHECK(old.primary == request.node || + std::find(old.secondaries.begin(), old.secondaries.end(), request.node) != + old.secondaries.end(), + ""); + } break; case config_type::CT_UPGRADE_TO_SECONDARY: - CHECK_NE(old.primary, request.node); - CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == - old.secondaries.end(), - ""); + if (request.__isset.hp_node) { + CHECK_NE(old.hp_primary, request.hp_node); + CHECK(std::find(old.hp_secondaries.begin(), + old.hp_secondaries.end(), + request.hp_node) == old.hp_secondaries.end(), + ""); + } else { + CHECK_NE(old.primary, request.node); + CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == + old.secondaries.end(), + ""); + } break; case config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT: - CHECK_EQ(old.primary, new_config.primary); - CHECK(old.secondaries == new_config.secondaries, ""); + if (request.__isset.hp_node) { + CHECK_EQ(old.hp_primary, new_config.hp_primary); + CHECK(old.hp_secondaries == new_config.hp_secondaries, ""); + } else { + CHECK_EQ(old.primary, new_config.primary); + CHECK(old.secondaries == new_config.secondaries, ""); + } break; default: break; @@ -1494,6 +1568,9 @@ void server_state::update_configuration_locally( health_status old_health_status = partition_health_status(old_cfg, min_2pc_count); health_status new_health_status = partition_health_status(new_cfg, min_2pc_count); + host_port hp_node; + GET_HOST_PORT(*config_request, node, hp_node); + if (app.is_stateful) { CHECK(old_cfg.ballot == invalid_ballot || old_cfg.ballot + 1 == new_cfg.ballot, "invalid configuration update request, old ballot {}, new ballot {}", @@ -1502,8 +1579,9 @@ void server_state::update_configuration_locally( node_state *ns = nullptr; if (config_request->type != config_type::CT_DROP_PARTITION) { - ns = get_node_state(_nodes, config_request->node, false); - CHECK_NOTNULL(ns, "invalid node address, address = {}", config_request->node); + ns = get_node_state(_nodes, hp_node, false); + CHECK_NOTNULL( + ns, "invalid node address, address = {}({})", hp_node, config_request->node); } #ifndef NDEBUG request_check(old_cfg, *config_request); @@ -1531,7 +1609,7 @@ void server_state::update_configuration_locally( break; case config_type::CT_DROP_PARTITION: - for (const rpc_address &node : new_cfg.last_drops) { + for (const auto &node : new_cfg.hp_last_drops) { ns = get_node_state(_nodes, node, false); if (ns != nullptr) ns->remove_partition(gpid, false); @@ -1544,9 +1622,17 @@ void server_state::update_configuration_locally( break; case config_type::CT_REGISTER_CHILD: { ns->put_partition(gpid, true); - for (auto &secondary : config_request->config.secondaries) { - auto secondary_node = get_node_state(_nodes, secondary, false); - secondary_node->put_partition(gpid, false); + if (config_request->config.__isset.hp_secondaries) { + for (const auto &secondary : config_request->config.hp_secondaries) { + auto secondary_node = get_node_state(_nodes, secondary, false); + secondary_node->put_partition(gpid, false); + } + } else { + for (const auto &secondary : config_request->config.secondaries) { + auto secondary_node = + get_node_state(_nodes, host_port::from_address(secondary), false); + secondary_node->put_partition(gpid, false); + } } break; } @@ -1557,22 +1643,25 @@ void server_state::update_configuration_locally( } else { CHECK_EQ(old_cfg.ballot, new_cfg.ballot); + auto host_node = host_port::from_address(config_request->host_node); new_cfg = old_cfg; partition_configuration_stateless pcs(new_cfg); if (config_request->type == config_type::type::CT_ADD_SECONDARY) { - pcs.hosts().emplace_back(config_request->host_node); - pcs.workers().emplace_back(config_request->node); + pcs.hosts().emplace_back(host_node); + pcs.workers().emplace_back(hp_node); } else { - auto it = - std::remove(pcs.hosts().begin(), pcs.hosts().end(), config_request->host_node); + auto it = std::remove(pcs.hosts().begin(), pcs.hosts().end(), host_node); pcs.hosts().erase(it); - it = std::remove(pcs.workers().begin(), pcs.workers().end(), config_request->node); + it = std::remove(pcs.workers().begin(), pcs.workers().end(), hp_node); pcs.workers().erase(it); } - auto it = _nodes.find(config_request->host_node); - CHECK(it != _nodes.end(), "invalid node address, address = {}", config_request->host_node); + auto it = _nodes.find(host_node); + CHECK(it != _nodes.end(), + "invalid node address, address = {}({})", + host_node, + config_request->host_node); if (config_type::CT_REMOVE == config_request->type) { it->second.remove_partition(gpid, false); } else { @@ -1698,8 +1787,9 @@ void server_state::on_update_configuration_on_remote_reply( } else { config_request->type = action.type; config_request->node = action.node; + config_request->__set_hp_node(action.hp_node); config_request->info = *app; - send_proposal(action.target, *config_request); + send_proposal(action.hp_target, *config_request); } } } @@ -1748,16 +1838,25 @@ void server_state::drop_partition(std::shared_ptr &app, int pidx) request.info = *app; request.type = config_type::CT_DROP_PARTITION; request.node = pc.primary; + request.__set_hp_node(pc.hp_primary); request.config = pc; + for (auto &node : pc.hp_secondaries) { + maintain_drops(request.config.hp_last_drops, node, request.type); + } for (auto &node : pc.secondaries) { maintain_drops(request.config.last_drops, node, request.type); } + if (!pc.hp_primary.is_invalid()) { + maintain_drops(request.config.hp_last_drops, pc.hp_primary, request.type); + } if (!pc.primary.is_invalid()) { maintain_drops(request.config.last_drops, pc.primary, request.type); } request.config.primary.set_invalid(); request.config.secondaries.clear(); + request.config.hp_primary.reset(); + request.config.hp_secondaries.clear(); CHECK_EQ((pc.partition_flags & pc_flags::dropped), 0); request.config.partition_flags |= pc_flags::dropped; @@ -1800,8 +1899,9 @@ void server_state::downgrade_primary_to_inactive(std::shared_ptr &app return; } else { LOG_WARNING("gpid({}) is syncing another request with remote, cancel it due to the " - "primary({}) is down", + "primary({}({})) is down", pc.pid, + pc.hp_primary, pc.primary); cc.cancel_sync(); } @@ -1814,8 +1914,11 @@ void server_state::downgrade_primary_to_inactive(std::shared_ptr &app request.config = pc; request.type = config_type::CT_DOWNGRADE_TO_INACTIVE; request.node = pc.primary; + request.__set_hp_node(pc.hp_primary); request.config.ballot++; request.config.primary.set_invalid(); + request.config.__set_hp_primary(host_port()); + maintain_drops(request.config.hp_last_drops, pc.hp_primary, request.type); maintain_drops(request.config.last_drops, pc.primary, request.type); cc.stage = config_status::pending_remote_sync; @@ -1827,19 +1930,20 @@ void server_state::downgrade_primary_to_inactive(std::shared_ptr &app void server_state::downgrade_secondary_to_inactive(std::shared_ptr &app, int pidx, - const rpc_address &node) + const host_port &node) { partition_configuration &pc = app->partitions[pidx]; config_context &cc = app->helpers->contexts[pidx]; - CHECK(!pc.primary.is_invalid(), "this shouldn't be called if the primary is invalid"); + CHECK(!pc.hp_primary.is_invalid(), "this shouldn't be called if the primary is invalid"); if (config_status::pending_remote_sync != cc.stage) { configuration_update_request request; request.info = *app; request.config = pc; request.type = config_type::CT_DOWNGRADE_TO_INACTIVE; - request.node = node; - send_proposal(pc.primary, request); + request.node = dsn::dns_resolver::instance().resolve_address(node); + request.__set_hp_node(node); + send_proposal(pc.hp_primary, request); } else { LOG_INFO("gpid({}.{}) is syncing with remote storage, ignore the remove seconary({})", app->app_id, @@ -1850,34 +1954,40 @@ void server_state::downgrade_secondary_to_inactive(std::shared_ptr &a void server_state::downgrade_stateless_nodes(std::shared_ptr &app, int pidx, - const rpc_address &address) + const host_port &address) { std::shared_ptr req = std::make_shared(); req->info = *app; req->type = config_type::CT_REMOVE; - req->host_node = address; + req->host_node = dsn::dns_resolver::instance().resolve_address(address); req->node.set_invalid(); + req->hp_node.reset(); req->config = app->partitions[pidx]; config_context &cc = app->helpers->contexts[pidx]; partition_configuration &pc = req->config; unsigned i = 0; - for (; i < pc.secondaries.size(); ++i) { - if (pc.secondaries[i] == address) { + for (; i < pc.hp_secondaries.size(); ++i) { + if (pc.hp_secondaries[i] == address) { req->node = pc.last_drops[i]; + req->__set_hp_node(pc.hp_last_drops[i]); break; } } CHECK(!req->node.is_invalid(), "invalid node address, address = {}", req->node); // remove host_node & node from secondaries/last_drops, as it will be sync to remote storage - for (++i; i < pc.secondaries.size(); ++i) { + for (++i; i < pc.hp_secondaries.size(); ++i) { pc.secondaries[i - 1] = pc.secondaries[i]; pc.last_drops[i - 1] = pc.last_drops[i]; + pc.hp_secondaries[i - 1] = pc.hp_secondaries[i]; + pc.hp_last_drops[i - 1] = pc.hp_last_drops[i]; } pc.secondaries.pop_back(); pc.last_drops.pop_back(); + pc.hp_secondaries.pop_back(); + pc.hp_last_drops.pop_back(); if (config_status::pending_remote_sync == cc.stage) { LOG_WARNING("gpid({}) is syncing another request with remote, cancel it due to meta is " @@ -1944,6 +2054,7 @@ void server_state::on_update_configuration( msg->release_ref(); return; } else { + maintain_drops(cfg_request->config.hp_last_drops, cfg_request->hp_node, cfg_request->type); maintain_drops(cfg_request->config.last_drops, cfg_request->node, cfg_request->type); } @@ -1963,14 +2074,14 @@ void server_state::on_update_configuration( void server_state::on_partition_node_dead(std::shared_ptr &app, int pidx, - const dsn::rpc_address &address) + const dsn::host_port &address) { partition_configuration &pc = app->partitions[pidx]; if (app->is_stateful) { if (is_primary(pc, address)) downgrade_primary_to_inactive(app, pidx); else if (is_secondary(pc, address)) { - if (!pc.primary.is_invalid()) + if (!pc.hp_primary.is_invalid()) downgrade_secondary_to_inactive(app, pidx, address); else if (is_secondary(pc, address)) { LOG_INFO("gpid({}): secondary({}) is down, ignored it due to no primary for this " @@ -1986,7 +2097,7 @@ void server_state::on_partition_node_dead(std::shared_ptr &app, } } -void server_state::on_change_node_state(rpc_address node, bool is_alive) +void server_state::on_change_node_state(host_port node, bool is_alive) { LOG_DEBUG("change node({}) state to {}", node, is_alive ? "alive" : "dead"); zauto_write_lock l(_lock); @@ -2036,7 +2147,7 @@ void server_state::on_propose_balancer(const configuration_balancer_request &req error_code server_state::construct_apps(const std::vector &query_app_responses, - const std::vector &replica_nodes, + const std::vector &replica_nodes, std::string &hint_message) { int max_app_id = 0; @@ -2130,7 +2241,7 @@ server_state::construct_apps(const std::vector &query_a error_code server_state::construct_partitions( const std::vector &query_replica_responses, - const std::vector &replica_nodes, + const std::vector &replica_nodes, bool skip_lost_partitions, std::string &hint_message) { @@ -2172,11 +2283,11 @@ error_code server_state::construct_partitions( app->app_id, pc.pid.get_partition_index(), boost::lexical_cast(pc)); - if (pc.last_drops.size() + 1 < pc.max_replica_count) { + if (pc.hp_last_drops.size() + 1 < pc.max_replica_count) { std::ostringstream oss; oss << "WARNING: partition(" << app->app_id << "." << pc.pid.get_partition_index() << ") only collects " - << (pc.last_drops.size() + 1) << "/" << pc.max_replica_count + << (pc.hp_last_drops.size() + 1) << "/" << pc.max_replica_count << " of replicas, may lost data" << std::endl; hint_message += oss.str(); } @@ -2219,7 +2330,7 @@ error_code server_state::construct_partitions( } dsn::error_code -server_state::sync_apps_from_replica_nodes(const std::vector &replica_nodes, +server_state::sync_apps_from_replica_nodes(const std::vector &replica_nodes, bool skip_bad_nodes, bool skip_lost_partitions, std::string &hint_message) @@ -2236,8 +2347,10 @@ server_state::sync_apps_from_replica_nodes(const std::vector & auto app_query_req = std::make_unique(); app_query_req->meta_server = dsn_primary_address(); + app_query_req->__set_hp_meta_server(dsn_primary_host_port()); query_app_info_rpc app_rpc(std::move(app_query_req), RPC_QUERY_APP_INFO); - app_rpc.call(replica_nodes[i], + const auto &addr = dsn::dns_resolver::instance().resolve_address(replica_nodes[i]); + app_rpc.call(addr, &tracker, [app_rpc, i, &replica_nodes, &query_app_errors, &query_app_responses]( error_code err) mutable { @@ -2254,10 +2367,10 @@ server_state::sync_apps_from_replica_nodes(const std::vector & }); auto replica_query_req = std::make_unique(); - replica_query_req->node = replica_nodes[i]; + replica_query_req->node = addr; query_replica_info_rpc replica_rpc(std::move(replica_query_req), RPC_QUERY_REPLICA_INFO); replica_rpc.call( - replica_nodes[i], + addr, &tracker, [replica_rpc, i, &replica_nodes, &query_replica_errors, &query_replica_responses]( error_code err) mutable { @@ -2295,16 +2408,18 @@ server_state::sync_apps_from_replica_nodes(const std::vector & failed_count++; query_app_errors[i] = err; query_replica_errors[i] = err; - std::ostringstream oss; if (skip_bad_nodes) { - oss << "WARNING: collect app and replica info from node(" << replica_nodes[i] - << ") failed with err(" << err << "), skip the bad node" << std::endl; + hint_message += fmt::format("WARNING: collect app and replica info from node({}) " + "failed with err({}), skip the bad node", + replica_nodes[i], + err); } else { - oss << "ERROR: collect app and replica info from node(" << replica_nodes[i] - << ") failed with err(" << err - << "), you can skip it by set skip_bad_nodes option" << std::endl; + hint_message += + fmt::format("ERROR: collect app and replica info from node({}) failed with " + "err({}), you can skip it by set skip_bad_nodes option", + replica_nodes[i], + err); } - hint_message += oss.str(); } else { succeed_count++; } @@ -2346,8 +2461,16 @@ void server_state::on_start_recovery(const configuration_recovery_request &req, req.skip_bad_nodes ? "true" : "false", req.skip_lost_partitions ? "true" : "false"); - resp.err = sync_apps_from_replica_nodes( - req.recovery_set, req.skip_bad_nodes, req.skip_lost_partitions, resp.hint_message); + if (req.__isset.hp_recovery_set) { + resp.err = sync_apps_from_replica_nodes( + req.hp_recovery_set, req.skip_bad_nodes, req.skip_lost_partitions, resp.hint_message); + } else { + auto hp_recovery_set = std::vector(); + host_port::fill_host_ports_from_addresses(req.recovery_set, hp_recovery_set); + resp.err = sync_apps_from_replica_nodes( + hp_recovery_set, req.skip_bad_nodes, req.skip_lost_partitions, resp.hint_message); + } + if (resp.err != dsn::ERR_OK) { LOG_ERROR("sync apps from replica nodes failed when do recovery, err = {}", resp.err); _all_apps.clear(); @@ -2382,7 +2505,7 @@ bool server_state::can_run_balancer() if (iter->second.partition_count() != 0) { LOG_INFO( "don't do replica migration coz dead node({}) has {} partitions not removed", - iter->second.addr(), + iter->second.host_port(), iter->second.partition_count()); return false; } @@ -2451,7 +2574,7 @@ bool server_state::check_all_partitions() std::vector add_secondary_actions; std::vector add_secondary_gpids; std::vector add_secondary_proposed; - std::map add_secondary_running_nodes; // node --> running_count + std::map add_secondary_running_nodes; // node --> running_count for (auto &app_pair : _exist_apps) { std::shared_ptr &app = app_pair.second; if (app->status == app_status::AS_CREATING || app->status == app_status::AS_DROPPING) { @@ -2496,10 +2619,11 @@ bool server_state::check_all_partitions() for (int i = 0; i < add_secondary_actions.size(); ++i) { gpid &pid = add_secondary_gpids[i]; partition_configuration &pc = *get_config(_all_apps, pid); - if (!add_secondary_proposed[i] && pc.secondaries.empty()) { + if (!add_secondary_proposed[i] && pc.hp_secondaries.empty()) { configuration_proposal_action &action = add_secondary_actions[i]; if (_add_secondary_enable_flow_control && - add_secondary_running_nodes[action.node] >= _add_secondary_max_count_for_one_node) { + add_secondary_running_nodes[action.hp_node] >= + _add_secondary_max_count_for_one_node) { // ignore continue; } @@ -2507,7 +2631,7 @@ bool server_state::check_all_partitions() send_proposal(action, pc, *app); send_proposal_count++; add_secondary_proposed[i] = true; - add_secondary_running_nodes[action.node]++; + add_secondary_running_nodes[action.hp_node]++; } } @@ -2518,7 +2642,8 @@ bool server_state::check_all_partitions() gpid pid = add_secondary_gpids[i]; partition_configuration &pc = *get_config(_all_apps, pid); if (_add_secondary_enable_flow_control && - add_secondary_running_nodes[action.node] >= _add_secondary_max_count_for_one_node) { + add_secondary_running_nodes[action.hp_node] >= + _add_secondary_max_count_for_one_node) { LOG_INFO("do not send {} proposal for gpid({}) for flow control reason, target = " "{}, node = {}", ::dsn::enum_to_string(action.type), @@ -2531,7 +2656,7 @@ bool server_state::check_all_partitions() send_proposal(action, pc, *app); send_proposal_count++; add_secondary_proposed[i] = true; - add_secondary_running_nodes[action.node]++; + add_secondary_running_nodes[action.hp_node]++; } } @@ -2617,22 +2742,23 @@ void server_state::check_consistency(const dsn::gpid &gpid) partition_configuration &config = app.partitions[gpid.get_partition_index()]; if (app.is_stateful) { - if (config.primary.is_invalid() == false) { - auto it = _nodes.find(config.primary); - CHECK(it != _nodes.end(), "invalid primary address, address = {}", config.primary); + if (config.hp_primary.is_invalid() == false) { + auto it = _nodes.find(config.hp_primary); + CHECK(it != _nodes.end(), "invalid primary address, address = {}", config.hp_primary); CHECK_EQ(it->second.served_as(gpid), partition_status::PS_PRIMARY); - CHECK(std::find(config.last_drops.begin(), config.last_drops.end(), config.primary) == - config.last_drops.end(), + CHECK(std::find(config.hp_last_drops.begin(), + config.hp_last_drops.end(), + config.hp_primary) == config.hp_last_drops.end(), "primary shouldn't appear in last_drops, address = {}", - config.primary); + config.hp_primary); } - for (auto &ep : config.secondaries) { + for (auto &ep : config.hp_secondaries) { auto it = _nodes.find(ep); CHECK(it != _nodes.end(), "invalid secondary address, address = {}", ep); CHECK_EQ(it->second.served_as(gpid), partition_status::PS_SECONDARY); - CHECK(std::find(config.last_drops.begin(), config.last_drops.end(), ep) == - config.last_drops.end(), + CHECK(std::find(config.hp_last_drops.begin(), config.hp_last_drops.end(), ep) == + config.hp_last_drops.end(), "secondary shouldn't appear in last_drops, address = {}", ep); } diff --git a/src/meta/server_state.h b/src/meta/server_state.h index e8c1c4f070..b6b20a790c 100644 --- a/src/meta/server_state.h +++ b/src/meta/server_state.h @@ -53,7 +53,7 @@ namespace dsn { class blob; class command_deregister; class message_ex; -class rpc_address; +class host_port; namespace replication { class configuration_balancer_request; @@ -182,7 +182,7 @@ class server_state error_code dump_from_remote_storage(const char *local_path, bool sync_immediately); error_code restore_from_local_storage(const char *local_path); - void on_change_node_state(rpc_address node, bool is_alive); + void on_change_node_state(host_port node, bool is_alive); void on_propose_balancer(const configuration_balancer_request &request, configuration_balancer_response &response); void on_start_recovery(const configuration_recovery_request &request, @@ -233,7 +233,7 @@ class server_state // else indicate error that remote storage responses error_code sync_apps_to_remote_storage(); - error_code sync_apps_from_replica_nodes(const std::vector &node_list, + error_code sync_apps_from_replica_nodes(const std::vector &node_list, bool skip_bad_nodes, bool skip_lost_partitions, std::string &hint_message); @@ -249,11 +249,11 @@ class server_state void check_consistency(const dsn::gpid &gpid); error_code construct_apps(const std::vector &query_app_responses, - const std::vector &replica_nodes, + const std::vector &replica_nodes, std::string &hint_message); error_code construct_partitions( const std::vector &query_replica_info_responses, - const std::vector &replica_nodes, + const std::vector &replica_nodes, bool skip_lost_partitions, std::string &hint_message); @@ -282,15 +282,14 @@ class server_state void downgrade_primary_to_inactive(std::shared_ptr &app, int pidx); void downgrade_secondary_to_inactive(std::shared_ptr &app, int pidx, - const rpc_address &node); - void downgrade_stateless_nodes(std::shared_ptr &app, - int pidx, - const rpc_address &address); + const host_port &node); + void + downgrade_stateless_nodes(std::shared_ptr &app, int pidx, const host_port &address); void on_partition_node_dead(std::shared_ptr &app, int pidx, - const dsn::rpc_address &address); - void send_proposal(rpc_address target, const configuration_update_request &proposal); + const dsn::host_port &address); + void send_proposal(host_port target, const configuration_update_request &proposal); void send_proposal(const configuration_proposal_action &action, const partition_configuration &pc, const app_state &app); diff --git a/src/meta/server_state_restore.cpp b/src/meta/server_state_restore.cpp index b2dada4a25..7dac1b424f 100644 --- a/src/meta/server_state_restore.cpp +++ b/src/meta/server_state_restore.cpp @@ -40,7 +40,7 @@ #include "meta/table_metrics.h" #include "meta_admin_types.h" #include "meta_service.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/task.h" @@ -251,7 +251,7 @@ void server_state::on_query_restore_status(configuration_query_restore_rpc rpc) for (int32_t i = 0; i < app->partition_count; i++) { const auto &r_state = app->helpers->restore_states[i]; const auto &p = app->partitions[i]; - if (!p.primary.is_invalid() || !p.secondaries.empty()) { + if (!p.hp_primary.is_invalid() || !p.hp_secondaries.empty()) { // already have primary, restore succeed continue; } diff --git a/src/meta/test/backup_test.cpp b/src/meta/test/backup_test.cpp index e2ff7d574d..21af00ef68 100644 --- a/src/meta/test/backup_test.cpp +++ b/src/meta/test/backup_test.cpp @@ -45,6 +45,7 @@ #include "runtime/api_layer1.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -187,7 +188,7 @@ class progress_liar : public meta_service public: // req is held by callback, we don't need to handle the life-time of it virtual void send_request(dsn::message_ex *req, - const rpc_address &target, + const host_port &target, const rpc_response_task_ptr &callback) { // need to handle life-time manually @@ -498,14 +499,16 @@ TEST_F(policy_context_test, test_app_dropped_during_backup) int64_t cur_start_time_ms = static_cast(dsn_now_ms()); { zauto_lock l(_mp._lock); - std::vector node_list; + std::vector> node_list; generate_node_list(node_list, 3, 3); app_state *app = state->_all_apps[3].get(); app->status = dsn::app_status::AS_AVAILABLE; for (partition_configuration &pc : app->partitions) { - pc.primary = node_list[0]; - pc.secondaries = {node_list[1], node_list[2]}; + pc.primary = node_list[0].second; + pc.secondaries = {node_list[1].second, node_list[2].second}; + pc.__set_hp_primary(node_list[0].first); + pc.__set_hp_secondaries({node_list[1].first, node_list[2].first}); } _mp._backup_history.clear(); diff --git a/src/meta/test/balancer_simulator/balancer_simulator.cpp b/src/meta/test/balancer_simulator/balancer_simulator.cpp index 6c3e25feba..e23bbdb834 100644 --- a/src/meta/test/balancer_simulator/balancer_simulator.cpp +++ b/src/meta/test/balancer_simulator/balancer_simulator.cpp @@ -43,6 +43,7 @@ #include "meta_admin_types.h" #include "runtime/app_model.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fmt_logging.h" using namespace dsn::replication; @@ -50,34 +51,34 @@ using namespace dsn::replication; class simple_priority_queue { public: - simple_priority_queue(const std::vector &nl, + simple_priority_queue(const std::vector &nl, server_load_balancer::node_comparator &&compare) : container(nl), cmp(std::move(compare)) { std::make_heap(container.begin(), container.end(), cmp); } - void push(const dsn::rpc_address &addr) + void push(const dsn::host_port &addr) { container.push_back(addr); std::push_heap(container.begin(), container.end(), cmp); } - dsn::rpc_address pop() + dsn::host_port pop() { std::pop_heap(container.begin(), container.end(), cmp); - dsn::rpc_address result = container.back(); + dsn::host_port result = container.back(); container.pop_back(); return result; } - dsn::rpc_address top() const { return container.front(); } + dsn::host_port top() const { return container.front(); } bool empty() const { return container.empty(); } private: - std::vector container; + std::vector container; server_load_balancer::node_comparator cmp; }; void generate_balanced_apps(/*out*/ app_mapper &apps, node_mapper &nodes, - const std::vector &node_list) + const std::vector &node_list) { nodes.clear(); for (const auto &node : node_list) @@ -98,22 +99,22 @@ void generate_balanced_apps(/*out*/ app_mapper &apps, simple_priority_queue pq1(node_list, server_load_balancer::primary_comparator(nodes)); // generate balanced primary for (dsn::partition_configuration &pc : the_app->partitions) { - dsn::rpc_address n = pq1.pop(); + const auto &n = pq1.pop(); nodes[n].put_partition(pc.pid, true); - pc.primary = n; + pc.hp_primary = n; pq1.push(n); } // generate balanced secondary simple_priority_queue pq2(node_list, server_load_balancer::partition_comparator(nodes)); - std::vector temp; + std::vector temp; for (dsn::partition_configuration &pc : the_app->partitions) { temp.clear(); - while (pc.secondaries.size() + 1 < pc.max_replica_count) { - dsn::rpc_address n = pq2.pop(); + while (pc.hp_secondaries.size() + 1 < pc.max_replica_count) { + const auto &n = pq2.pop(); if (!is_member(pc, n)) { - pc.secondaries.push_back(n); + pc.hp_secondaries.push_back(n); nodes[n].put_partition(pc.pid, false); } temp.push_back(n); @@ -153,9 +154,10 @@ void random_move_primary(app_mapper &apps, node_mapper &nodes, int primary_move_ int n = random32(1, space_size) / 100; if (n < primary_move_ratio) { int indice = random32(0, 1); - nodes[pc.primary].remove_partition(pc.pid, true); + nodes[pc.hp_primary].remove_partition(pc.pid, true); std::swap(pc.primary, pc.secondaries[indice]); - nodes[pc.primary].put_partition(pc.pid, true); + std::swap(pc.hp_primary, pc.hp_secondaries[indice]); + nodes[pc.hp_primary].put_partition(pc.pid, true); } } } @@ -164,9 +166,13 @@ void greedy_balancer_perfect_move_primary() { app_mapper apps; node_mapper nodes; - std::vector node_list; + std::vector> node_pairs; + std::vector node_list; + generate_node_list(node_pairs, 19, 100); + for (const auto &p : node_pairs) { + node_list.emplace_back(p.first); + } - generate_node_list(node_list, 20, 100); generate_balanced_apps(apps, nodes, node_list); random_move_primary(apps, nodes, 70); diff --git a/src/meta/test/balancer_validator.cpp b/src/meta/test/balancer_validator.cpp index 81091c6be0..804f9c61d2 100644 --- a/src/meta/test/balancer_validator.cpp +++ b/src/meta/test/balancer_validator.cpp @@ -48,6 +48,7 @@ #include "meta_service_test_app.h" #include "metadata_types.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fmt_logging.h" namespace dsn { @@ -67,22 +68,23 @@ static void check_cure(app_mapper &apps, node_mapper &nodes, ::dsn::partition_co break; switch (act.type) { case config_type::CT_ASSIGN_PRIMARY: - CHECK(pc.primary.is_invalid(), ""); - CHECK(pc.secondaries.empty(), ""); - CHECK_EQ(act.node, act.target); - CHECK(nodes.find(act.node) != nodes.end(), ""); + CHECK(pc.hp_primary.is_invalid(), ""); + CHECK(pc.hp_secondaries.empty(), ""); + CHECK_EQ(act.hp_node, act.hp_target); + CHECK(nodes.find(act.hp_node) != nodes.end(), ""); - CHECK_EQ(nodes[act.node].served_as(pc.pid), partition_status::PS_INACTIVE); - nodes[act.node].put_partition(pc.pid, true); + CHECK_EQ(nodes[act.hp_node].served_as(pc.pid), partition_status::PS_INACTIVE); + nodes[act.hp_node].put_partition(pc.pid, true); pc.primary = act.node; + pc.hp_primary = act.hp_node; break; case config_type::CT_ADD_SECONDARY: - CHECK(!is_member(pc, act.node), ""); - CHECK_EQ(pc.primary, act.target); - CHECK(nodes.find(act.node) != nodes.end(), ""); - pc.secondaries.push_back(act.node); - ns = &nodes[act.node]; + CHECK(!is_member(pc, act.hp_node), ""); + CHECK_EQ(pc.hp_primary, act.hp_target); + CHECK(nodes.find(act.hp_node) != nodes.end(), ""); + pc.hp_secondaries.push_back(act.hp_node); + ns = &nodes[act.hp_node]; CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_INACTIVE); ns->put_partition(pc.pid, false); break; @@ -94,20 +96,23 @@ static void check_cure(app_mapper &apps, node_mapper &nodes, ::dsn::partition_co } // test upgrade to primary - CHECK_EQ(nodes[pc.primary].served_as(pc.pid), partition_status::PS_PRIMARY); - nodes[pc.primary].remove_partition(pc.pid, true); + CHECK_EQ(nodes[pc.hp_primary].served_as(pc.pid), partition_status::PS_PRIMARY); + nodes[pc.hp_primary].remove_partition(pc.pid, true); pc.primary.set_invalid(); + pc.hp_primary.reset(); ps = guardian.cure({&apps, &nodes}, pc.pid, act); CHECK_EQ(act.type, config_type::CT_UPGRADE_TO_PRIMARY); - CHECK(pc.primary.is_invalid(), ""); - CHECK_EQ(act.node, act.target); - CHECK(is_secondary(pc, act.node), ""); - CHECK(nodes.find(act.node) != nodes.end(), ""); + CHECK(pc.hp_primary.is_invalid(), ""); + CHECK_EQ(act.hp_node, act.hp_target); + CHECK(is_secondary(pc, act.hp_node), ""); + CHECK(nodes.find(act.hp_node) != nodes.end(), ""); - ns = &nodes[act.node]; + ns = &nodes[act.hp_node]; pc.primary = act.node; + pc.__set_hp_primary(act.hp_node); std::remove(pc.secondaries.begin(), pc.secondaries.end(), pc.primary); + std::remove(pc.hp_secondaries.begin(), pc.hp_secondaries.end(), pc.hp_primary); CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_SECONDARY); ns->put_partition(pc.pid, true); @@ -115,8 +120,12 @@ static void check_cure(app_mapper &apps, node_mapper &nodes, ::dsn::partition_co void meta_service_test_app::balancer_validator() { - std::vector node_list; - generate_node_list(node_list, 20, 100); + std::vector> node_pairs; + std::vector node_list; + generate_node_list(node_pairs, 20, 100); + for (const auto &p : node_pairs) { + node_list.emplace_back(p.first); + } app_mapper apps; node_mapper nodes; @@ -156,17 +165,19 @@ void meta_service_test_app::balancer_validator() std::shared_ptr &the_app = apps[1]; for (::dsn::partition_configuration &pc : the_app->partitions) { - CHECK(!pc.primary.is_invalid(), ""); + CHECK(!pc.hp_primary.is_invalid(), ""); CHECK_GE(pc.secondaries.size(), pc.max_replica_count - 1); } // now test the cure ::dsn::partition_configuration &pc = the_app->partitions[0]; - nodes[pc.primary].remove_partition(pc.pid, false); - for (const dsn::rpc_address &addr : pc.secondaries) - nodes[addr].remove_partition(pc.pid, false); + nodes[pc.hp_primary].remove_partition(pc.pid, false); + for (const dsn::host_port &hp : pc.hp_secondaries) + nodes[hp].remove_partition(pc.pid, false); pc.primary.set_invalid(); pc.secondaries.clear(); + pc.hp_primary.reset(); + pc.hp_secondaries.clear(); // cure test check_cure(apps, nodes, pc); @@ -182,10 +193,10 @@ static void load_apps_and_nodes(const char *file, app_mapper &apps, node_mapper infile >> total_nodes; std::string ip_port; - std::vector node_list; + std::vector node_list; for (int i = 0; i < total_nodes; ++i) { infile >> ip_port; - node_list.push_back(rpc_address::from_ip_port(ip_port)); + node_list.push_back(host_port::from_string(ip_port)); } int total_apps; @@ -205,10 +216,10 @@ static void load_apps_and_nodes(const char *file, app_mapper &apps, node_mapper int n; infile >> n; infile >> ip_port; - app->partitions[j].primary = rpc_address::from_ip_port(ip_port); + app->partitions[j].hp_primary = host_port::from_string(ip_port); for (int k = 1; k < n; ++k) { infile >> ip_port; - app->partitions[j].secondaries.push_back(rpc_address::from_ip_port(ip_port)); + app->partitions[j].hp_secondaries.push_back(host_port::from_string(ip_port)); } } } diff --git a/src/meta/test/cluster_balance_policy_test.cpp b/src/meta/test/cluster_balance_policy_test.cpp index 0ade35a6c9..8b4b8f6c74 100644 --- a/src/meta/test/cluster_balance_policy_test.cpp +++ b/src/meta/test/cluster_balance_policy_test.cpp @@ -33,9 +33,10 @@ #include "meta/cluster_balance_policy.h" #include "meta/load_balance_policy.h" #include "meta/meta_data.h" +#include "meta/meta_service.h" #include "meta_admin_types.h" #include "metadata_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/defer.h" #include "utils/fail_point.h" @@ -65,33 +66,27 @@ TEST(cluster_balance_policy, node_migration_info) { { cluster_balance_policy::node_migration_info info1; - info1.address = rpc_address(1, 10086); + info1.hp = host_port("localhost", 10000); cluster_balance_policy::node_migration_info info2; - info2.address = rpc_address(2, 10086); + info2.hp = host_port("localhost", 10086); ASSERT_LT(info1, info2); } { cluster_balance_policy::node_migration_info info1; - info1.address = rpc_address(1, 10000); + info1.hp = host_port("localhost", 10086); cluster_balance_policy::node_migration_info info2; - info2.address = rpc_address(1, 10086); - ASSERT_LT(info1, info2); - } - - { - cluster_balance_policy::node_migration_info info1; - info1.address = rpc_address(1, 10086); - cluster_balance_policy::node_migration_info info2; - info2.address = rpc_address(1, 10086); + info2.hp = host_port("localhost", 10086); ASSERT_EQ(info1, info2); } } TEST(cluster_balance_policy, get_skew) { - std::map count_map = { - {rpc_address(1, 10086), 1}, {rpc_address(2, 10086), 3}, {rpc_address(3, 10086), 5}, + std::map count_map = { + {host_port("localhost", 10085), 1}, + {host_port("localhost", 10086), 3}, + {host_port("localhost", 10087), 5}, }; ASSERT_EQ(get_skew(count_map), count_map.rbegin()->second - count_map.begin()->second); @@ -112,23 +107,24 @@ TEST(cluster_balance_policy, get_partition_count) TEST(cluster_balance_policy, get_app_migration_info) { - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); int appid = 1; std::string appname = "test"; - auto address = rpc_address(1, 10086); + const auto &hp = host_port("localhost", 10086); app_info info; info.app_id = appid; info.app_name = appname; info.partition_count = 1; auto app = std::make_shared(info); - app->partitions[0].primary = address; + app->partitions[0].hp_primary = hp; node_state ns; - ns.set_addr(address); + ns.set_hp(hp); ns.put_partition(gpid(appid, 0), true); node_mapper nodes; - nodes[address] = ns; + nodes[hp] = ns; cluster_balance_policy::app_migration_info migration_info; { @@ -145,28 +141,29 @@ TEST(cluster_balance_policy, get_app_migration_info) ASSERT_TRUE(res); ASSERT_EQ(migration_info.app_id, appid); ASSERT_EQ(migration_info.app_name, appname); - std::map pstatus_map; - pstatus_map[address] = partition_status::type::PS_PRIMARY; + std::map pstatus_map; + pstatus_map[hp] = partition_status::type::PS_PRIMARY; ASSERT_EQ(migration_info.partitions[0], pstatus_map); - ASSERT_EQ(migration_info.replicas_count[address], 1); + ASSERT_EQ(migration_info.replicas_count[hp], 1); } } TEST(cluster_balance_policy, get_node_migration_info) { - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); int appid = 1; std::string appname = "test"; - auto address = rpc_address(1, 10086); + const auto &hp = host_port("localhost", 10086); app_info info; info.app_id = appid; info.app_name = appname; info.partition_count = 1; auto app = std::make_shared(info); - app->partitions[0].primary = address; + app->partitions[0].hp_primary = hp; serving_replica sr; - sr.node = address; + sr.node = hp; std::string disk_tag = "disk1"; sr.disk_tag = disk_tag; config_context context; @@ -180,14 +177,14 @@ TEST(cluster_balance_policy, get_node_migration_info) all_apps[appid] = app; node_state ns; - ns.set_addr(address); + ns.set_hp(hp); gpid pid = gpid(appid, 0); ns.put_partition(pid, true); cluster_balance_policy::node_migration_info migration_info; policy.get_node_migration_info(ns, all_apps, migration_info); - ASSERT_EQ(migration_info.address, address); + ASSERT_EQ(migration_info.hp, hp); ASSERT_NE(migration_info.partitions.find(disk_tag), migration_info.partitions.end()); ASSERT_EQ(migration_info.partitions.at(disk_tag).size(), 1); ASSERT_EQ(*migration_info.partitions.at(disk_tag).begin(), pid); @@ -195,34 +192,35 @@ TEST(cluster_balance_policy, get_node_migration_info) TEST(cluster_balance_policy, get_min_max_set) { - std::map node_count_map; - node_count_map.emplace(rpc_address(1, 10086), 1); - node_count_map.emplace(rpc_address(2, 10086), 3); - node_count_map.emplace(rpc_address(3, 10086), 5); - node_count_map.emplace(rpc_address(4, 10086), 5); + std::map node_count_map; + node_count_map.emplace(host_port("localhost", 10081), 1); + node_count_map.emplace(host_port("localhost", 10082), 3); + node_count_map.emplace(host_port("localhost", 10083), 5); + node_count_map.emplace(host_port("localhost", 10084), 5); - std::set min_set, max_set; + std::set min_set, max_set; get_min_max_set(node_count_map, min_set, max_set); ASSERT_EQ(min_set.size(), 1); - ASSERT_EQ(*min_set.begin(), rpc_address(1, 10086)); + ASSERT_EQ(*min_set.begin(), host_port("localhost", 10081)); ASSERT_EQ(max_set.size(), 2); - ASSERT_EQ(*max_set.begin(), rpc_address(3, 10086)); - ASSERT_EQ(*max_set.rbegin(), rpc_address(4, 10086)); + ASSERT_EQ(*max_set.begin(), host_port("localhost", 10083)); + ASSERT_EQ(*max_set.rbegin(), host_port("localhost", 10084)); } TEST(cluster_balance_policy, get_disk_partitions_map) { - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); cluster_balance_policy::cluster_migration_info cluster_info; - rpc_address addr(1, 10086); + const auto &hp = host_port("localhost", 10086); int32_t app_id = 1; - auto disk_partitions = policy.get_disk_partitions_map(cluster_info, addr, app_id); + auto disk_partitions = policy.get_disk_partitions_map(cluster_info, hp, app_id); ASSERT_TRUE(disk_partitions.empty()); - std::map partition; - partition[addr] = partition_status::PS_SECONDARY; + std::map partition; + partition[hp] = partition_status::PS_SECONDARY; cluster_balance_policy::app_migration_info app_info; app_info.partitions.push_back(partition); cluster_info.apps_info[app_id] = app_info; @@ -233,10 +231,10 @@ TEST(cluster_balance_policy, get_disk_partitions_map) cluster_balance_policy::node_migration_info node_info; std::string disk_tag = "disk1"; node_info.partitions[disk_tag] = partitions; - cluster_info.nodes_info[addr] = node_info; + cluster_info.nodes_info[hp] = node_info; cluster_info.type = balance_type::COPY_SECONDARY; - disk_partitions = policy.get_disk_partitions_map(cluster_info, addr, app_id); + disk_partitions = policy.get_disk_partitions_map(cluster_info, hp, app_id); ASSERT_EQ(disk_partitions.size(), 1); ASSERT_EQ(disk_partitions.count(disk_tag), 1); ASSERT_EQ(disk_partitions[disk_tag].size(), 1); @@ -249,13 +247,13 @@ TEST(cluster_balance_policy, get_max_load_disk_set) cluster_info.type = balance_type::COPY_SECONDARY; int32_t app_id = 1; - rpc_address addr(1, 10086); - rpc_address addr2(2, 10086); - std::map partition; - partition[addr] = partition_status::PS_SECONDARY; - std::map partition2; - partition2[addr] = partition_status::PS_SECONDARY; - partition2[addr2] = partition_status::PS_SECONDARY; + const auto &hp = host_port("localhost", 10086); + const auto &hp2 = host_port("localhost", 10087); + std::map partition; + partition[hp] = partition_status::PS_SECONDARY; + std::map partition2; + partition2[hp] = partition_status::PS_SECONDARY; + partition2[hp2] = partition_status::PS_SECONDARY; cluster_balance_policy::app_migration_info app_info; app_info.partitions.push_back(partition); app_info.partitions.push_back(partition2); @@ -272,7 +270,7 @@ TEST(cluster_balance_policy, get_max_load_disk_set) partitions2.insert(pid2); std::string disk_tag2 = "disk2"; node_info.partitions[disk_tag2] = partitions2; - cluster_info.nodes_info[addr] = node_info; + cluster_info.nodes_info[hp] = node_info; cluster_balance_policy::node_migration_info node_info2; partition_set partitions3; @@ -280,12 +278,13 @@ TEST(cluster_balance_policy, get_max_load_disk_set) partitions3.insert(pid3); std::string disk_tag3 = "disk3"; node_info2.partitions[disk_tag3] = partitions3; - cluster_info.nodes_info[addr2] = node_info2; + cluster_info.nodes_info[hp2] = node_info2; - cluster_balance_policy policy(nullptr); - std::set max_nodes; - max_nodes.insert(addr); - max_nodes.insert(addr2); + meta_service svc; + cluster_balance_policy policy(&svc); + std::set max_nodes; + max_nodes.insert(hp); + max_nodes.insert(hp2); std::set max_load_disk_set; policy.get_max_load_disk_set(cluster_info, max_nodes, app_id, max_load_disk_set); @@ -299,11 +298,11 @@ TEST(cluster_balance_policy, apply_move) int32_t app_id = 1; int32_t partition_index = 1; minfo.pid = gpid(app_id, partition_index); - rpc_address source_node(1, 10086); + host_port source_node("localhost", 10086); minfo.source_node = source_node; std::string disk_tag = "disk1"; minfo.source_disk_tag = disk_tag; - rpc_address target_node(2, 10086); + host_port target_node("localhost", 10087); minfo.target_node = target_node; minfo.type = balance_type::MOVE_PRIMARY; @@ -313,7 +312,8 @@ TEST(cluster_balance_policy, apply_move) view.apps = &apps; view.nodes = &nodes; - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); policy._global_view = &view; cluster_balance_policy::cluster_migration_info cluster_info; cluster_info.type = balance_type::COPY_SECONDARY; @@ -354,7 +354,7 @@ TEST(cluster_balance_policy, apply_move) ASSERT_FALSE(res); // all of the partition status are not PS_SECONDARY - std::map partition_status; + std::map partition_status; partition_status[source_node] = partition_status::type::PS_PRIMARY; cluster_info.apps_info[app_id].partitions.push_back(partition_status); cluster_info.apps_info[app_id].partitions.push_back(partition_status); @@ -389,15 +389,16 @@ TEST(cluster_balance_policy, apply_move) TEST(cluster_balance_policy, pick_up_partition) { cluster_balance_policy::cluster_migration_info cluster_info; - rpc_address addr(1, 10086); + host_port hp("localhost", 10086); int32_t app_id = 1; - std::map partition; - partition[addr] = partition_status::PS_SECONDARY; + std::map partition; + partition[hp] = partition_status::PS_SECONDARY; cluster_balance_policy::app_migration_info app_info; app_info.partitions.push_back(partition); cluster_info.apps_info[app_id] = app_info; - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); { // all of the partitions in max_load_partitions are not found in cluster_info partition_set max_load_partitions; @@ -407,7 +408,7 @@ TEST(cluster_balance_policy, pick_up_partition) partition_set selected_pid; gpid picked_pid; auto found = policy.pick_up_partition( - cluster_info, addr, max_load_partitions, selected_pid, picked_pid); + cluster_info, hp, max_load_partitions, selected_pid, picked_pid); ASSERT_FALSE(found); } @@ -420,7 +421,7 @@ TEST(cluster_balance_policy, pick_up_partition) gpid picked_pid; auto found = policy.pick_up_partition( - cluster_info, addr, max_load_partitions, selected_pid, picked_pid); + cluster_info, hp, max_load_partitions, selected_pid, picked_pid); ASSERT_FALSE(found); } @@ -432,7 +433,7 @@ TEST(cluster_balance_policy, pick_up_partition) gpid picked_pid; auto found = policy.pick_up_partition( - cluster_info, addr, max_load_partitions, selected_pid, picked_pid); + cluster_info, hp, max_load_partitions, selected_pid, picked_pid); ASSERT_FALSE(found); } @@ -441,11 +442,11 @@ TEST(cluster_balance_policy, pick_up_partition) gpid pid(app_id, 0); max_load_partitions.insert(pid); partition_set selected_pid; - rpc_address not_exist_addr(3, 12345); + const auto ¬_exist_hp = host_port("localhost", 12345); gpid picked_pid; auto found = policy.pick_up_partition( - cluster_info, not_exist_addr, max_load_partitions, selected_pid, picked_pid); + cluster_info, not_exist_hp, max_load_partitions, selected_pid, picked_pid); ASSERT_TRUE(found); ASSERT_EQ(pid, picked_pid); } @@ -470,7 +471,8 @@ TEST(cluster_balance_policy, execute_balance) app->helpers->split_states.splitting_count = 0; app_mapper apps; apps[app_id] = app; - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); app->status = app_status::AS_DROPPED; auto res = policy.execute_balance(apps, false, false, true, balance_func); @@ -502,9 +504,9 @@ TEST(cluster_balance_policy, execute_balance) TEST(cluster_balance_policy, calc_potential_moving) { - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); + const auto &hp1 = host_port("localhost", 1); + const auto &hp2 = host_port("localhost", 2); + const auto &hp3 = host_port("localhost", 3); int32_t app_id = 1; dsn::app_info info; @@ -512,9 +514,9 @@ TEST(cluster_balance_policy, calc_potential_moving) info.partition_count = 4; std::shared_ptr app = app_state::create(info); partition_configuration pc; - pc.primary = addr1; - pc.secondaries.push_back(addr2); - pc.secondaries.push_back(addr3); + pc.hp_primary = hp1; + pc.hp_secondaries.push_back(hp2); + pc.hp_secondaries.push_back(hp3); app->partitions[0] = pc; app->partitions[1] = pc; @@ -525,31 +527,32 @@ TEST(cluster_balance_policy, calc_potential_moving) node_state ns1; ns1.put_partition(gpid(app_id, 0), true); ns1.put_partition(gpid(app_id, 1), true); - nodes[addr1] = ns1; + nodes[hp1] = ns1; node_state ns2; ns2.put_partition(gpid(app_id, 0), false); ns2.put_partition(gpid(app_id, 1), false); - nodes[addr2] = ns2; - nodes[addr3] = ns2; + nodes[hp2] = ns2; + nodes[hp3] = ns2; struct meta_view view; view.nodes = &nodes; view.apps = &apps; - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); policy._global_view = &view; - auto gpids = policy.calc_potential_moving(app, addr1, addr2); + auto gpids = policy.calc_potential_moving(app, hp1, hp2); ASSERT_EQ(gpids.size(), 2); ASSERT_EQ(*gpids.begin(), gpid(app_id, 0)); ASSERT_EQ(*gpids.rbegin(), gpid(app_id, 1)); - gpids = policy.calc_potential_moving(app, addr1, addr3); + gpids = policy.calc_potential_moving(app, hp1, hp3); ASSERT_EQ(gpids.size(), 2); ASSERT_EQ(*gpids.begin(), gpid(app_id, 0)); ASSERT_EQ(*gpids.rbegin(), gpid(app_id, 1)); - gpids = policy.calc_potential_moving(app, addr2, addr3); + gpids = policy.calc_potential_moving(app, hp2, hp3); ASSERT_EQ(gpids.size(), 0); } } // namespace replication diff --git a/src/meta/test/copy_replica_operation_test.cpp b/src/meta/test/copy_replica_operation_test.cpp index c2f681fd32..2c0045cdf9 100644 --- a/src/meta/test/copy_replica_operation_test.cpp +++ b/src/meta/test/copy_replica_operation_test.cpp @@ -30,7 +30,7 @@ #include "meta/app_balance_policy.h" #include "meta/load_balance_policy.h" #include "meta/meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fail_point.h" namespace dsn { @@ -46,38 +46,38 @@ TEST(copy_primary_operation, misc) app_mapper apps; apps[app_id] = app; - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); + const auto &hp1 = host_port("localhost", 1); + const auto &hp2 = host_port("localhost", 2); + const auto &hp3 = host_port("localhost", 3); node_mapper nodes; node_state ns1; ns1.put_partition(gpid(app_id, 2), true); ns1.put_partition(gpid(app_id, 0), false); - nodes[addr1] = ns1; + nodes[hp1] = ns1; node_state ns2; ns2.put_partition(gpid(app_id, 0), true); ns2.put_partition(gpid(app_id, 1), true); - nodes[addr2] = ns2; + nodes[hp2] = ns2; node_state ns3; ns3.put_partition(gpid(app_id, 2), false); - nodes[addr3] = ns3; + nodes[hp3] = ns3; - std::vector address_vec{addr1, addr2, addr3}; - std::unordered_map address_id; - address_id[addr1] = 0; - address_id[addr2] = 1; - address_id[addr3] = 2; - copy_primary_operation op(app, apps, nodes, address_vec, address_id, false, 0); + std::vector host_port_vec{hp1, hp2, hp3}; + std::unordered_map host_port_id; + host_port_id[hp1] = 0; + host_port_id[hp2] = 1; + host_port_id[hp3] = 2; + copy_primary_operation op(app, apps, nodes, host_port_vec, host_port_id, false, 0); /** - * Test init_ordered_address_ids + * Test init_ordered_host_port_ids */ - op.init_ordered_address_ids(); - ASSERT_EQ(op._ordered_address_ids.size(), 3); - ASSERT_EQ(*op._ordered_address_ids.begin(), 2); - ASSERT_EQ(*(++op._ordered_address_ids.begin()), 0); - ASSERT_EQ(*op._ordered_address_ids.rbegin(), 1); + op.init_ordered_host_port_ids(); + ASSERT_EQ(op._ordered_host_port_ids.size(), 3); + ASSERT_EQ(*op._ordered_host_port_ids.begin(), 2); + ASSERT_EQ(*(++op._ordered_host_port_ids.begin()), 0); + ASSERT_EQ(*op._ordered_host_port_ids.rbegin(), 1); ASSERT_EQ(op._partition_counts[0], 1); ASSERT_EQ(op._partition_counts[1], 2); ASSERT_EQ(op._partition_counts[2], 0); @@ -97,14 +97,14 @@ TEST(copy_primary_operation, misc) disk_load load; load[disk1] = 2; load[disk2] = 6; - op._node_loads[addr2] = load; + op._node_loads[hp2] = load; serving_replica serving_partition0; - serving_partition0.node = addr2; + serving_partition0.node = hp2; serving_partition0.disk_tag = disk1; app->helpers->contexts[0].serving.push_back(serving_partition0); serving_replica serving_partition1; - serving_partition1.node = addr2; + serving_partition1.node = hp2; serving_partition1.disk_tag = disk2; app->helpers->contexts[1].serving.push_back(serving_partition1); @@ -126,23 +126,23 @@ TEST(copy_primary_operation, misc) ASSERT_TRUE(op.can_continue()); op._replicas_low = 0; - nodes[addr2].remove_partition(gpid(app_id, 1), false); - op.init_ordered_address_ids(); + nodes[hp2].remove_partition(gpid(app_id, 1), false); + op.init_ordered_host_port_ids(); ASSERT_FALSE(op.can_continue()); - nodes[addr2].put_partition(gpid(app_id, 1), true); + nodes[hp2].put_partition(gpid(app_id, 1), true); /** - * Test update_ordered_address_ids + * Test update_ordered_host_port_ids */ - nodes[addr1].put_partition(gpid(app_id, 3), true); - nodes[addr2].put_partition(gpid(app_id, 4), true); - nodes[addr2].put_partition(gpid(app_id, 5), true); - op.init_ordered_address_ids(); - op.update_ordered_address_ids(); - ASSERT_EQ(op._ordered_address_ids.size(), 3); - ASSERT_EQ(*op._ordered_address_ids.begin(), 2); - ASSERT_EQ(*(++op._ordered_address_ids.begin()), 0); - ASSERT_EQ(*op._ordered_address_ids.rbegin(), 1); + nodes[hp1].put_partition(gpid(app_id, 3), true); + nodes[hp2].put_partition(gpid(app_id, 4), true); + nodes[hp2].put_partition(gpid(app_id, 5), true); + op.init_ordered_host_port_ids(); + op.update_ordered_host_port_ids(); + ASSERT_EQ(op._ordered_host_port_ids.size(), 3); + ASSERT_EQ(*op._ordered_host_port_ids.begin(), 2); + ASSERT_EQ(*(++op._ordered_host_port_ids.begin()), 0); + ASSERT_EQ(*op._ordered_host_port_ids.rbegin(), 1); ASSERT_EQ(op._partition_counts[0], 2); ASSERT_EQ(op._partition_counts[1], 3); ASSERT_EQ(op._partition_counts[2], 1); @@ -166,9 +166,9 @@ TEST(copy_primary_operation, can_select) { app_mapper apps; node_mapper nodes; - std::vector address_vec; - std::unordered_map address_id; - copy_primary_operation op(nullptr, apps, nodes, address_vec, address_id, false, false); + std::vector host_port_vec; + std::unordered_map host_port_id; + copy_primary_operation op(nullptr, apps, nodes, host_port_vec, host_port_id, false, false); gpid cannot_select_gpid(1, 1); gpid can_select_gpid(1, 2); @@ -183,9 +183,9 @@ TEST(copy_primary_operation, only_copy_primary) { app_mapper apps; node_mapper nodes; - std::vector address_vec; - std::unordered_map address_id; - copy_primary_operation op(nullptr, apps, nodes, address_vec, address_id, false, false); + std::vector host_port_vec; + std::unordered_map host_port_id; + copy_primary_operation op(nullptr, apps, nodes, host_port_vec, host_port_id, false, false); ASSERT_TRUE(op.only_copy_primary()); } @@ -200,29 +200,29 @@ TEST(copy_secondary_operation, misc) app_mapper apps; apps[app_id] = app; - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); + const auto &hp1 = host_port("localhost", 1); + const auto &hp2 = host_port("localhost", 2); + const auto &hp3 = host_port("localhost", 3); node_mapper nodes; node_state ns1; ns1.put_partition(gpid(app_id, 2), true); ns1.put_partition(gpid(app_id, 0), false); - nodes[addr1] = ns1; + nodes[hp1] = ns1; node_state ns2; ns2.put_partition(gpid(app_id, 0), true); ns2.put_partition(gpid(app_id, 1), true); - nodes[addr2] = ns2; + nodes[hp2] = ns2; node_state ns3; - nodes[addr3] = ns3; + nodes[hp3] = ns3; - std::vector address_vec{addr1, addr2, addr3}; - std::unordered_map address_id; - address_id[addr1] = 0; - address_id[addr2] = 1; - address_id[addr3] = 2; - copy_secondary_operation op(app, apps, nodes, address_vec, address_id, 0); - op.init_ordered_address_ids(); + std::vector host_port_vec{hp1, hp2, hp3}; + std::unordered_map host_port_id; + host_port_id[hp1] = 0; + host_port_id[hp2] = 1; + host_port_id[hp3] = 2; + copy_secondary_operation op(app, apps, nodes, host_port_vec, host_port_id, 0); + op.init_ordered_host_port_ids(); /** * Test copy_secondary_operation::get_partition_count @@ -242,17 +242,17 @@ TEST(copy_secondary_operation, misc) ASSERT_FALSE(res); op._replicas_low = 0; - nodes[addr3].put_partition(gpid(app_id, 2), false); - op.init_ordered_address_ids(); + nodes[hp3].put_partition(gpid(app_id, 2), false); + op.init_ordered_host_port_ids(); res = op.can_continue(); ASSERT_FALSE(res); - nodes[addr3].remove_partition(gpid(app_id, 2), false); + nodes[hp3].remove_partition(gpid(app_id, 2), false); /** * Test copy_secondary_operation::can_select */ - nodes[addr1].put_partition(gpid(app_id, 3), true); - op.init_ordered_address_ids(); + nodes[hp1].put_partition(gpid(app_id, 3), true); + op.init_ordered_host_port_ids(); migration_list list; res = op.can_select(gpid(app_id, 3), &list); ASSERT_FALSE(res); @@ -263,13 +263,13 @@ TEST(copy_secondary_operation, misc) ASSERT_FALSE(res); list.clear(); - nodes[addr3].put_partition(secondary_gpid, true); - op.init_ordered_address_ids(); + nodes[hp3].put_partition(secondary_gpid, true); + op.init_ordered_host_port_ids(); res = op.can_select(secondary_gpid, &list); ASSERT_FALSE(res); - nodes[addr3].remove_partition(secondary_gpid, false); - op.init_ordered_address_ids(); + nodes[hp3].remove_partition(secondary_gpid, false); + op.init_ordered_host_port_ids(); res = op.can_select(secondary_gpid, &list); ASSERT_TRUE(res); diff --git a/src/meta/test/duplication_info_test.cpp b/src/meta/test/duplication_info_test.cpp index 9383958e47..9e11be6370 100644 --- a/src/meta/test/duplication_info_test.cpp +++ b/src/meta/test/duplication_info_test.cpp @@ -51,7 +51,7 @@ class duplication_info_test : public testing::Test 2, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); duplication_confirm_entry entry; ASSERT_FALSE(dup.alter_progress(0, entry)); @@ -104,7 +104,7 @@ class duplication_info_test : public testing::Test 4, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); ASSERT_FALSE(dup.is_altering()); ASSERT_EQ(dup._status, duplication_status::DS_INIT); @@ -134,7 +134,7 @@ class duplication_info_test : public testing::Test 4, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); dup.start(); @@ -153,7 +153,7 @@ class duplication_info_test : public testing::Test 4, 0, "slave-cluster", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); dup.start(); dup.persist_status(); @@ -187,7 +187,7 @@ TEST_F(duplication_info_test, alter_status_when_busy) 4, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); dup.start(); @@ -259,7 +259,7 @@ TEST_F(duplication_info_test, alter_status) 4, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); for (const auto from : tt.from_list) { force_update_status(dup, from); @@ -289,7 +289,7 @@ TEST_F(duplication_info_test, is_valid) 4, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); ASSERT_TRUE(dup.is_invalid_status()); diff --git a/src/meta/test/ford_fulkerson_test.cpp b/src/meta/test/ford_fulkerson_test.cpp index 00b49d9bee..1d3084e3f3 100644 --- a/src/meta/test/ford_fulkerson_test.cpp +++ b/src/meta/test/ford_fulkerson_test.cpp @@ -18,6 +18,7 @@ // IWYU pragma: no_include #include #include +#include #include #include @@ -26,7 +27,7 @@ #include "gtest/gtest.h" #include "meta/load_balance_policy.h" #include "meta/meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" namespace dsn { namespace replication { @@ -41,12 +42,12 @@ TEST(ford_fulkerson, build_failure) node_mapper nodes; node_state ns; ns.put_partition(gpid(app_id, 0), true); - nodes[rpc_address(1, 1)] = ns; - nodes[rpc_address(2, 2)] = ns; - nodes[rpc_address(3, 3)] = ns; + nodes[host_port("localhost", 1)] = ns; + nodes[host_port("localhost", 2)] = ns; + nodes[host_port("localhost", 3)] = ns; - std::unordered_map address_id; - auto ff = ford_fulkerson::builder(app, nodes, address_id).build(); + std::unordered_map host_port_id; + auto ff = ford_fulkerson::builder(app, nodes, host_port_id).build(); ASSERT_EQ(ff, nullptr); } @@ -58,21 +59,21 @@ TEST(ford_fulkerson, add_edge) info.partition_count = 4; std::shared_ptr app = app_state::create(info); - std::unordered_map address_id; - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); - address_id[addr1] = 1; - address_id[addr2] = 2; - address_id[addr3] = 3; + std::unordered_map host_port_id; + const auto &hp1 = host_port("localhost", 1); + const auto &hp2 = host_port("localhost", 2); + const auto &hp3 = host_port("localhost", 3); + host_port_id[hp1] = 1; + host_port_id[hp2] = 2; + host_port_id[hp3] = 3; node_mapper nodes; node_state ns; - nodes[addr1] = ns; - nodes[addr2] = ns; - nodes[addr3] = ns; + nodes[hp1] = ns; + nodes[hp2] = ns; + nodes[hp3] = ns; - auto ff = ford_fulkerson::builder(app, nodes, address_id).build(); + auto ff = ford_fulkerson::builder(app, nodes, host_port_id).build(); ff->add_edge(1, ns); ASSERT_EQ(ff->_network[1].back(), 1); @@ -85,9 +86,9 @@ TEST(ford_fulkerson, add_edge) TEST(ford_fulkerson, update_decree) { - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(2, 2); - auto addr3 = rpc_address(3, 3); + const auto &hp1 = host_port("localhost", 1); + const auto &hp2 = host_port("localhost", 2); + const auto &hp3 = host_port("localhost", 3); int32_t app_id = 1; dsn::app_info info; @@ -95,8 +96,8 @@ TEST(ford_fulkerson, update_decree) info.partition_count = 1; std::shared_ptr app = app_state::create(info); partition_configuration pc; - pc.secondaries.push_back(addr2); - pc.secondaries.push_back(addr3); + pc.hp_secondaries.push_back(hp2); + pc.hp_secondaries.push_back(hp3); app->partitions.push_back(pc); app->partitions.push_back(pc); @@ -104,17 +105,17 @@ TEST(ford_fulkerson, update_decree) node_state ns; ns.put_partition(gpid(app_id, 0), true); ns.put_partition(gpid(app_id, 1), true); - nodes[addr1] = ns; - nodes[addr2] = ns; - nodes[addr3] = ns; + nodes[hp1] = ns; + nodes[hp2] = ns; + nodes[hp3] = ns; - std::unordered_map address_id; - address_id[addr1] = 1; - address_id[addr2] = 2; - address_id[addr3] = 3; + std::unordered_map host_port_id; + host_port_id[hp1] = 1; + host_port_id[hp2] = 2; + host_port_id[hp3] = 3; auto node_id = 1; - auto ff = ford_fulkerson::builder(app, nodes, address_id).build(); + auto ff = ford_fulkerson::builder(app, nodes, host_port_id).build(); ff->update_decree(node_id, ns); ASSERT_EQ(ff->_network[1][2], 2); ASSERT_EQ(ff->_network[1][3], 2); @@ -122,9 +123,9 @@ TEST(ford_fulkerson, update_decree) TEST(ford_fulkerson, find_shortest_path) { - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(2, 2); - auto addr3 = rpc_address(3, 3); + auto hp1 = host_port("localhost", 1); + auto hp2 = host_port("localhost", 2); + auto hp3 = host_port("localhost", 3); int32_t app_id = 1; dsn::app_info info; @@ -133,9 +134,9 @@ TEST(ford_fulkerson, find_shortest_path) std::shared_ptr app = app_state::create(info); partition_configuration pc; - pc.primary = addr1; - pc.secondaries.push_back(addr2); - pc.secondaries.push_back(addr3); + pc.hp_primary = hp1; + pc.hp_secondaries.push_back(hp2); + pc.hp_secondaries.push_back(hp3); app->partitions[0] = pc; app->partitions[1] = pc; @@ -143,18 +144,18 @@ TEST(ford_fulkerson, find_shortest_path) node_state ns1; ns1.put_partition(gpid(app_id, 0), true); ns1.put_partition(gpid(app_id, 1), true); - nodes[addr1] = ns1; + nodes[hp1] = ns1; node_state ns2; ns2.put_partition(gpid(app_id, 0), false); ns2.put_partition(gpid(app_id, 1), false); - nodes[addr2] = ns2; - nodes[addr3] = ns2; + nodes[hp2] = ns2; + nodes[hp3] = ns2; - std::unordered_map address_id; - address_id[addr1] = 1; - address_id[addr2] = 2; - address_id[addr3] = 3; + std::unordered_map host_port_id; + host_port_id[hp1] = 1; + host_port_id[hp2] = 2; + host_port_id[hp3] = 3; /** * ford fulkerson graph: @@ -165,7 +166,7 @@ TEST(ford_fulkerson, find_shortest_path) * 2 --------> 4 (sink) * 1 */ - auto ff = ford_fulkerson::builder(app, nodes, address_id).build(); + auto ff = ford_fulkerson::builder(app, nodes, host_port_id).build(); ASSERT_EQ(ff->_network[0][0], 0); ASSERT_EQ(ff->_network[0][1], 1); ASSERT_EQ(ff->_network[0][2], 0); @@ -219,20 +220,20 @@ TEST(ford_fulkerson, max_value_pos) info.partition_count = 4; std::shared_ptr app = app_state::create(info); - std::unordered_map address_id; - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); - address_id[addr1] = 1; - address_id[addr2] = 2; - address_id[addr3] = 3; + std::unordered_map host_port_id; + auto hp1 = host_port("localhost", 1); + auto hp2 = host_port("localhost", 2); + auto hp3 = host_port("localhost", 3); + host_port_id[hp1] = 1; + host_port_id[hp2] = 2; + host_port_id[hp3] = 3; node_mapper nodes; node_state ns; - nodes[addr1] = ns; - nodes[addr2] = ns; - nodes[addr3] = ns; - auto ff = ford_fulkerson::builder(app, nodes, address_id).build(); + nodes[hp1] = ns; + nodes[hp2] = ns; + nodes[hp3] = ns; + auto ff = ford_fulkerson::builder(app, nodes, host_port_id).build(); std::vector visit(5, false); std::vector flow(5, 0); @@ -257,20 +258,20 @@ TEST(ford_fulkerson, select_node) info.partition_count = 4; std::shared_ptr app = app_state::create(info); - std::unordered_map address_id; - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); - address_id[addr1] = 1; - address_id[addr2] = 2; - address_id[addr3] = 3; + std::unordered_map host_port_id; + auto hp1 = host_port("localhost", 1); + auto hp2 = host_port("localhost", 2); + auto hp3 = host_port("localhost", 3); + host_port_id[hp1] = 1; + host_port_id[hp2] = 2; + host_port_id[hp3] = 3; node_mapper nodes; node_state ns; - nodes[addr1] = ns; - nodes[addr2] = ns; - nodes[addr3] = ns; - auto ff = ford_fulkerson::builder(app, nodes, address_id).build(); + nodes[hp1] = ns; + nodes[hp2] = ns; + nodes[hp3] = ns; + auto ff = ford_fulkerson::builder(app, nodes, host_port_id).build(); std::vector visit(5, false); std::vector flow(5, 0); diff --git a/src/meta/test/json_compacity.cpp b/src/meta/test/json_compacity.cpp index 66b870ede9..c1ca9a7511 100644 --- a/src/meta/test/json_compacity.cpp +++ b/src/meta/test/json_compacity.cpp @@ -39,6 +39,7 @@ #include "meta/meta_backup_service.h" #include "meta_service_test_app.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" namespace dsn { @@ -86,14 +87,18 @@ void meta_service_test_app::json_compacity() // 4. old pc version const char *json3 = "{\"pid\":\"1.1\",\"ballot\":234,\"max_replica_count\":3," "\"primary\":\"invalid address\",\"secondaries\":[\"127.0.0.1:6\"]," + "\"hp_primary\":\"invalid host_port\",\"hp_secondaries\":[\"localhost:6\"]," "\"last_drops\":[],\"last_committed_decree\":157}"; dsn::partition_configuration pc; dsn::json::json_forwarder::decode( dsn::blob(json3, 0, strlen(json3)), pc); ASSERT_EQ(234, pc.ballot); + ASSERT_TRUE(pc.hp_primary.is_invalid()); ASSERT_TRUE(pc.primary.is_invalid()); + ASSERT_EQ(1, pc.hp_secondaries.size()); ASSERT_EQ(1, pc.secondaries.size()); ASSERT_STREQ("127.0.0.1:6", pc.secondaries[0].to_string()); + ASSERT_EQ("localhost:6", pc.hp_secondaries[0].to_string()); ASSERT_EQ(157, pc.last_committed_decree); ASSERT_EQ(0, pc.partition_flags); diff --git a/src/meta/test/meta_app_operation_test.cpp b/src/meta/test/meta_app_operation_test.cpp index ae8a4da358..8cd472aae6 100644 --- a/src/meta/test/meta_app_operation_test.cpp +++ b/src/meta/test/meta_app_operation_test.cpp @@ -41,7 +41,7 @@ #include "meta_service_test_app.h" #include "meta_test_base.h" #include "misc/misc.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_tracker.h" #include "utils/defer.h" @@ -507,7 +507,7 @@ TEST_F(meta_app_operation_test, create_app) // keep the number of all nodes greater than that of alive nodes const int total_node_count = 10; - std::vector nodes = ensure_enough_alive_nodes(total_node_count); + auto nodes = ensure_enough_alive_nodes(total_node_count); // the meta function level will become freezed once // alive_nodes * 100 < total_nodes * _node_live_percentage_threshold_for_update diff --git a/src/meta/test/meta_backup_test.cpp b/src/meta/test/meta_backup_test.cpp index 3fdf84c5fe..5a77b13bc6 100644 --- a/src/meta/test/meta_backup_test.cpp +++ b/src/meta/test/meta_backup_test.cpp @@ -35,7 +35,7 @@ #include "meta/server_state.h" #include "meta_test_base.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/env.h" #include "utils/error_code.h" #include "utils/fail_point.h" @@ -250,8 +250,7 @@ class backup_engine_test : public meta_test_base int32_t progress) { gpid pid = gpid(_app_id, partition_index); - const auto mock_primary_address = - rpc_address::from_ip_port("127.0.0.1", 10000 + partition_index); + const auto hp_mock_primary = host_port("localhost", 10000 + partition_index); backup_response resp; resp.backup_id = _backup_engine->_cur_backup.backup_id; @@ -259,16 +258,15 @@ class backup_engine_test : public meta_test_base resp.err = resp_err; resp.progress = progress; - _backup_engine->on_backup_reply(rpc_err, resp, pid, mock_primary_address); + _backup_engine->on_backup_reply(rpc_err, resp, pid, hp_mock_primary); } void mock_on_backup_reply_when_timeout(int32_t partition_index, error_code rpc_err) { gpid pid = gpid(_app_id, partition_index); - const auto mock_primary_address = - rpc_address::from_ip_port("127.0.0.1", 10000 + partition_index); + const auto hp_mock_primary = host_port("localhost", 10000 + partition_index); backup_response resp; - _backup_engine->on_backup_reply(rpc_err, resp, pid, mock_primary_address); + _backup_engine->on_backup_reply(rpc_err, resp, pid, hp_mock_primary); } bool is_backup_failed() const diff --git a/src/meta/test/meta_bulk_load_ingestion_test.cpp b/src/meta/test/meta_bulk_load_ingestion_test.cpp index 41a9db01b9..daed55067c 100644 --- a/src/meta/test/meta_bulk_load_ingestion_test.cpp +++ b/src/meta/test/meta_bulk_load_ingestion_test.cpp @@ -27,7 +27,7 @@ #include "meta/meta_bulk_load_ingestion_context.h" #include "meta/meta_data.h" #include "meta_test_base.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fail_point.h" namespace dsn { @@ -88,7 +88,7 @@ class node_context_test : public meta_test_base public: ingestion_context::node_context _context; - const rpc_address NODE = rpc_address::from_ip_port("127.0.0.1", 10086); + const host_port NODE = host_port("localhost", 10086); const std::string TAG = "default"; const std::string TAG2 = "tag2"; }; @@ -182,7 +182,7 @@ class ingestion_context_test : public meta_test_base } bool check_node_ingestion(const uint32_t max_node_count, - const rpc_address &node, + const host_port &node, const std::string &tag) { _context->reset_all(); @@ -223,15 +223,15 @@ class ingestion_context_test : public meta_test_base } void mock_partition(const uint32_t pidx, - std::vector nodes, + std::vector nodes, const std::vector tags, partition_configuration &config, config_context &cc) { config.pid = gpid(APP_ID, pidx); - config.primary = nodes[0]; - config.secondaries.emplace_back(nodes[1]); - config.secondaries.emplace_back(nodes[2]); + config.hp_primary = nodes[0]; + config.hp_secondaries.emplace_back(nodes[1]); + config.hp_secondaries.emplace_back(nodes[2]); auto count = nodes.size(); for (auto i = 0; i < count; i++) { @@ -242,7 +242,7 @@ class ingestion_context_test : public meta_test_base } } - void add_node_context(std::vector nodes) + void add_node_context(std::vector nodes) { for (const auto &address : nodes) { ingestion_context::node_context node(address, TAG1); @@ -276,7 +276,7 @@ class ingestion_context_test : public meta_test_base void reset_app() { return _context->reset_app(APP_ID); } - int32_t get_node_running_count(const rpc_address &node) + int32_t get_node_running_count(const host_port &node) { if (_context->_nodes_context.find(node) == _context->_nodes_context.end()) { return 0; @@ -284,7 +284,7 @@ class ingestion_context_test : public meta_test_base return _context->_nodes_context[node].node_ingesting_count; } - uint32_t get_disk_running_count(const rpc_address &node, const std::string &disk_tag) + uint32_t get_disk_running_count(const host_port &node, const std::string &disk_tag) { if (_context->_nodes_context.find(node) == _context->_nodes_context.end()) { return 0; @@ -296,7 +296,7 @@ class ingestion_context_test : public meta_test_base return node_cc.disk_ingesting_counts[disk_tag]; } - bool validate_count(const rpc_address &node, + bool validate_count(const host_port &node, const uint32_t expected_node_count, const uint32_t expected_disk1_count, const uint32_t expected_disk2_count) @@ -313,10 +313,10 @@ class ingestion_context_test : public meta_test_base const uint32_t PARTITION_COUNT = 4; const uint32_t MAX_NODE_COUNT = 2; const uint32_t MIN_DISK_COUNT = 2; - const rpc_address NODE1 = rpc_address::from_ip_port("127.0.0.1", 10086); - const rpc_address NODE2 = rpc_address::from_ip_port("127.0.0.1", 10085); - const rpc_address NODE3 = rpc_address::from_ip_port("127.0.0.1", 10087); - const rpc_address NODE4 = rpc_address::from_ip_port("127.0.0.1", 10088); + const host_port NODE1 = host_port("localhost", 10086); + const host_port NODE2 = host_port("localhost", 10085); + const host_port NODE3 = host_port("localhost", 10087); + const host_port NODE4 = host_port("localhost", 10088); const std::string TAG1 = "tag1"; const std::string TAG2 = "tag2"; }; @@ -325,7 +325,7 @@ TEST_F(ingestion_context_test, check_node_ingestion_test) { struct check_node_ingestion_test { - rpc_address node; + host_port node; std::string tag; uint32_t max_node_count; bool expected_result; diff --git a/src/meta/test/meta_bulk_load_service_test.cpp b/src/meta/test/meta_bulk_load_service_test.cpp index 41f84f866d..331a4a0d28 100644 --- a/src/meta/test/meta_bulk_load_service_test.cpp +++ b/src/meta/test/meta_bulk_load_service_test.cpp @@ -50,6 +50,7 @@ #include "meta_test_base.h" #include "metadata_types.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/error_code.h" #include "utils/fail_point.h" @@ -177,6 +178,10 @@ class bulk_load_service_test : public meta_test_base config.primary = PRIMARY; config.secondaries.emplace_back(SECONDARY1); config.secondaries.emplace_back(SECONDARY2); + config.hp_primary = PRIMARY_HP; + config.__set_hp_secondaries(std::vector()); + config.hp_secondaries.emplace_back(SECONDARY1_HP); + config.hp_secondaries.emplace_back(SECONDARY2_HP); app->partitions.clear(); app->partitions.emplace_back(config); mock_meta_bulk_load_context(app->app_id, app->partition_count, status); @@ -192,9 +197,11 @@ class bulk_load_service_test : public meta_test_base std::shared_ptr app = find_app(name); if (mock_primary_invalid) { app->partitions[pid.get_partition_index()].primary.set_invalid(); + app->partitions[pid.get_partition_index()].hp_primary.reset(); } if (mock_lack_secondary) { app->partitions[pid.get_partition_index()].secondaries.clear(); + app->partitions[pid.get_partition_index()].hp_secondaries.clear(); } partition_configuration pconfig; bool flag = bulk_svc().check_partition_status( @@ -213,13 +220,13 @@ class bulk_load_service_test : public meta_test_base { partition_bulk_load_info &pinfo = bulk_svc()._partition_bulk_load_info[pid]; pinfo.status = bulk_load_status::BLS_INGESTING; - pinfo.addresses.clear(); - pinfo.addresses.emplace_back(PRIMARY); - pinfo.addresses.emplace_back(SECONDARY1); + pinfo.host_ports.clear(); + pinfo.host_ports.emplace_back(PRIMARY_HP); + pinfo.host_ports.emplace_back(SECONDARY1_HP); if (use_secondary3) { - pinfo.addresses.emplace_back(SECONDARY3); + pinfo.host_ports.emplace_back(SECONDARY3_HP); } else { - pinfo.addresses.emplace_back(SECONDARY2); + pinfo.host_ports.emplace_back(SECONDARY2_HP); } pinfo.ever_ingest_succeed = ever_ingest_succeed; } @@ -233,16 +240,24 @@ class bulk_load_service_test : public meta_test_base partition_configuration config; config.pid = pid; config.primary = PRIMARY; + config.__set_hp_primary(PRIMARY_HP); + config.__set_hp_secondaries(std::vector()); if (same) { config.secondaries.emplace_back(SECONDARY1); config.secondaries.emplace_back(SECONDARY2); + config.hp_secondaries.emplace_back(SECONDARY1_HP); + config.hp_secondaries.emplace_back(SECONDARY2_HP); } else { config.secondaries.emplace_back(SECONDARY1); + config.hp_secondaries.emplace_back(SECONDARY1_HP); if (secondary_count == 2) { config.secondaries.emplace_back(SECONDARY3); + config.hp_secondaries.emplace_back(SECONDARY3_HP); } else if (secondary_count >= 3) { config.secondaries.emplace_back(SECONDARY2); config.secondaries.emplace_back(SECONDARY3); + config.hp_secondaries.emplace_back(SECONDARY2_HP); + config.hp_secondaries.emplace_back(SECONDARY3_HP); } } auto flag = bulk_svc().check_ever_ingestion_succeed(config, APP_NAME, pid); @@ -291,7 +306,8 @@ class bulk_load_service_test : public meta_test_base const gpid &pid, error_code rpc_err = ERR_OK) { - bulk_svc().on_partition_ingestion_reply(rpc_err, std::move(resp), APP_NAME, pid, PRIMARY); + bulk_svc().on_partition_ingestion_reply( + rpc_err, std::move(resp), APP_NAME, pid, PRIMARY_HP); wait_all(); } @@ -506,6 +522,11 @@ class bulk_load_service_test : public meta_test_base const rpc_address SECONDARY1 = rpc_address::from_ip_port("127.0.0.1", 10085); const rpc_address SECONDARY2 = rpc_address::from_ip_port("127.0.0.1", 10087); const rpc_address SECONDARY3 = rpc_address::from_ip_port("127.0.0.1", 10080); + + const host_port PRIMARY_HP = host_port("localhost", 10086); + const host_port SECONDARY1_HP = host_port("localhost", 10085); + const host_port SECONDARY2_HP = host_port("localhost", 10087); + const host_port SECONDARY3_HP = host_port("localhost", 10080); }; /// start bulk load unit tests @@ -771,6 +792,7 @@ class bulk_load_process_test : public bulk_load_service_test _req.pid = gpid(_app_id, _pidx); _req.primary_addr = PRIMARY; _req.meta_bulk_load_status = status; + _req.__set_hp_primary(PRIMARY_HP); } void create_basic_response(error_code err, bulk_load_status::type status) @@ -797,6 +819,11 @@ class bulk_load_process_test : public bulk_load_service_test _resp.group_bulk_load_state[SECONDARY1] = state; _resp.group_bulk_load_state[SECONDARY2] = state2; _resp.__set_total_download_progress(total_progress); + + _resp.__set_hp_group_bulk_load_state({}); + _resp.hp_group_bulk_load_state[PRIMARY_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY1_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY2_HP] = state2; } void mock_response_bulk_load_metadata() @@ -828,6 +855,12 @@ class bulk_load_process_test : public bulk_load_service_test _resp.group_bulk_load_state[SECONDARY1] = state; _resp.group_bulk_load_state[SECONDARY2] = state2; _resp.__set_is_group_ingestion_finished(secondary_istatus == ingestion_status::IS_SUCCEED); + + _resp.__set_hp_group_bulk_load_state({}); + _resp.hp_group_bulk_load_state[PRIMARY_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY1_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY2_HP] = state2; + set_app_ingesting_count(_app_id, ingestion_count); } @@ -840,8 +873,13 @@ class bulk_load_process_test : public bulk_load_service_test _resp.group_bulk_load_state[PRIMARY] = state; _resp.group_bulk_load_state[SECONDARY1] = state; + _resp.__set_hp_group_bulk_load_state({}); + _resp.hp_group_bulk_load_state[PRIMARY_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY1_HP] = state; + state2.__set_is_cleaned_up(all_cleaned_up); _resp.group_bulk_load_state[SECONDARY2] = state2; + _resp.hp_group_bulk_load_state[SECONDARY2_HP] = state2; _resp.__set_is_group_bulk_load_context_cleaned_up(all_cleaned_up); } @@ -856,6 +894,12 @@ class bulk_load_process_test : public bulk_load_service_test _resp.group_bulk_load_state[PRIMARY] = state; _resp.group_bulk_load_state[SECONDARY1] = state; _resp.group_bulk_load_state[SECONDARY2] = state2; + + _resp.__set_hp_group_bulk_load_state({}); + _resp.hp_group_bulk_load_state[PRIMARY_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY1_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY2_HP] = state2; + _resp.__set_is_group_bulk_load_paused(is_group_paused); } @@ -975,7 +1019,7 @@ TEST_F(bulk_load_process_test, ingestion_one_succeed) const auto &pinfo = get_partition_bulk_load_info(gpid(_app_id, _pidx)); ASSERT_EQ(pinfo.status, bulk_load_status::BLS_SUCCEED); ASSERT_TRUE(pinfo.ever_ingest_succeed); - ASSERT_EQ(pinfo.addresses.size(), 3); + ASSERT_EQ(pinfo.host_ports.size(), 3); } TEST_F(bulk_load_process_test, ingestion_one_succeed_update) @@ -989,9 +1033,9 @@ TEST_F(bulk_load_process_test, ingestion_one_succeed_update) const auto &pinfo = get_partition_bulk_load_info(pid); ASSERT_EQ(pinfo.status, bulk_load_status::BLS_SUCCEED); ASSERT_TRUE(pinfo.ever_ingest_succeed); - ASSERT_EQ(pinfo.addresses.size(), 3); - ASSERT_EQ(std::find(pinfo.addresses.begin(), pinfo.addresses.end(), SECONDARY3), - pinfo.addresses.end()); + ASSERT_EQ(pinfo.host_ports.size(), 3); + ASSERT_EQ(std::find(pinfo.host_ports.begin(), pinfo.host_ports.end(), SECONDARY3_HP), + pinfo.host_ports.end()); } TEST_F(bulk_load_process_test, normal_succeed) diff --git a/src/meta/test/meta_data.cpp b/src/meta/test/meta_data.cpp index 16bcf7f1c4..33a2f4954c 100644 --- a/src/meta/test/meta_data.cpp +++ b/src/meta/test/meta_data.cpp @@ -26,6 +26,7 @@ #include #include +#include #include #include "client/partition_resolver.h" @@ -36,12 +37,13 @@ #include "metadata_types.h" #include "misc/misc.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" using namespace dsn::replication; TEST(meta_data, dropped_cmp) { - dsn::rpc_address n; + dsn::host_port n; dropped_replica d1, d2; // time not equal @@ -129,11 +131,14 @@ TEST(meta_data, collect_replica) dsn::partition_configuration &pc = *get_config(app, rep.pid); config_context &cc = *get_config_context(app, rep.pid); - std::vector node_list; + std::vector> node_list; generate_node_list(node_list, 10, 10); #define CLEAR_REPLICA \ do { \ + pc.__set_hp_primary(dsn::host_port()); \ + pc.__set_hp_secondaries({}); \ + pc.__set_hp_last_drops({}); \ pc.primary.set_invalid(); \ pc.secondaries.clear(); \ pc.last_drops.clear(); \ @@ -153,52 +158,54 @@ TEST(meta_data, collect_replica) CLEAR_ALL; rep.ballot = 10; pc.ballot = 9; - pc.primary = node_list[0]; - ASSERT_TRUE(collect_replica(view, node_list[0], rep)); + pc.primary = node_list[0].second; + pc.__set_hp_primary(node_list[0].first); + ASSERT_TRUE(collect_replica(view, node_list[0].first, rep)); } { // replica is secondary of partition CLEAR_ALL; - pc.secondaries.push_back(node_list[0]); - ASSERT_TRUE(collect_replica(view, node_list[0], rep)); + pc.secondaries.push_back(node_list[0].second); + pc.hp_secondaries.push_back(node_list[0].first); + ASSERT_TRUE(collect_replica(view, node_list[0].first, rep)); } { // replica has been in the drop_list CLEAR_ALL; - cc.dropped.push_back({node_list[0], 5, 0, 0}); - ASSERT_TRUE(collect_replica(view, node_list[0], rep)); + cc.dropped.push_back({node_list[0].first, 5, 0, 0}); + ASSERT_TRUE(collect_replica(view, node_list[0].first, rep)); } { // drop_list all have timestamp, full CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], 5, 1, 1, 2}, - dropped_replica{node_list[1], 6, 1, 1, 2}, - dropped_replica{node_list[2], 7, 1, 1, 2}, - dropped_replica{node_list[3], 8, 1, 1, 2}, + dropped_replica{node_list[0].first, 5, 1, 1, 2}, + dropped_replica{node_list[1].first, 6, 1, 1, 2}, + dropped_replica{node_list[2].first, 7, 1, 1, 2}, + dropped_replica{node_list[3].first, 8, 1, 1, 2}, }; rep.ballot = 10; rep.last_prepared_decree = 10; - ASSERT_FALSE(collect_replica(view, node_list[5], rep)); + ASSERT_FALSE(collect_replica(view, node_list[5].first, rep)); } { // drop_list all have timestamp, not full CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], 5, 1, 1, 2}, - dropped_replica{node_list[1], 6, 1, 1, 2}, - dropped_replica{node_list[2], 7, 1, 1, 2}, + dropped_replica{node_list[0].first, 5, 1, 1, 2}, + dropped_replica{node_list[1].first, 6, 1, 1, 2}, + dropped_replica{node_list[2].first, 7, 1, 1, 2}, }; rep.ballot = 10; rep.last_durable_decree = 6; rep.last_committed_decree = 8; rep.last_prepared_decree = 10; - ASSERT_TRUE(collect_replica(view, node_list[4], rep)); + ASSERT_TRUE(collect_replica(view, node_list[4].first, rep)); dropped_replica &d = cc.dropped.front(); ASSERT_EQ(d.ballot, rep.ballot); ASSERT_EQ(d.last_prepared_decree, rep.last_prepared_decree); @@ -208,33 +215,33 @@ TEST(meta_data, collect_replica) // drop_list mixed, full, minimal position CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 3, 5}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 5}, - dropped_replica{node_list[2], 7, 1, 1, 5}, - dropped_replica{node_list[3], 8, 1, 1, 5}, + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 3, 5}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 5}, + dropped_replica{node_list[2].first, 7, 1, 1, 5}, + dropped_replica{node_list[3].first, 8, 1, 1, 5}, }; rep.ballot = 1; rep.last_committed_decree = 3; rep.last_prepared_decree = 5; - ASSERT_FALSE(collect_replica(view, node_list[5], rep)); + ASSERT_FALSE(collect_replica(view, node_list[5].first, rep)); } { // drop_list mixed, not full, minimal position CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 3, 5}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 5}, - dropped_replica{node_list[2], 7, 1, 1, 6}, + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 3, 5}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 5}, + dropped_replica{node_list[2].first, 7, 1, 1, 6}, }; rep.ballot = 1; rep.last_committed_decree = 3; rep.last_prepared_decree = 5; - ASSERT_TRUE(collect_replica(view, node_list[5], rep)); + ASSERT_TRUE(collect_replica(view, node_list[5].first, rep)); dropped_replica &d = cc.dropped.front(); - ASSERT_EQ(d.node, node_list[5]); + ASSERT_EQ(d.node, node_list[5].first); ASSERT_EQ(d.ballot, rep.ballot); ASSERT_EQ(d.last_prepared_decree, rep.last_prepared_decree); } @@ -243,16 +250,16 @@ TEST(meta_data, collect_replica) // drop_list mixed, full, not minimal position CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, - dropped_replica{node_list[2], 7, 1, 1, 6}, - dropped_replica{node_list[3], 8, 1, 1, 6}, + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, + dropped_replica{node_list[2].first, 7, 1, 1, 6}, + dropped_replica{node_list[3].first, 8, 1, 1, 6}, }; rep.ballot = 2; rep.last_committed_decree = 3; rep.last_prepared_decree = 6; - ASSERT_TRUE(collect_replica(view, node_list[5], rep)); + ASSERT_TRUE(collect_replica(view, node_list[5].first, rep)); dropped_replica &d = cc.dropped.front(); ASSERT_EQ(rep.ballot, d.ballot); ASSERT_EQ(rep.last_committed_decree, rep.last_committed_decree); @@ -263,20 +270,21 @@ TEST(meta_data, collect_replica) { // drop_list mixed, not full, not minimal position CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, - dropped_replica{node_list[2], 7, 1, 1, 6}}; + cc.dropped = { + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, + dropped_replica{node_list[2].first, 7, 1, 1, 6}}; rep.ballot = 3; rep.last_committed_decree = 1; rep.last_prepared_decree = 6; - ASSERT_TRUE(collect_replica(view, node_list[5], rep)); + ASSERT_TRUE(collect_replica(view, node_list[5].first, rep)); std::vector result_dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, - dropped_replica{node_list[5], dropped_replica::INVALID_TIMESTAMP, 3, 1, 6}, - dropped_replica{node_list[2], 7, 1, 1, 6}}; + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, + dropped_replica{node_list[5].first, dropped_replica::INVALID_TIMESTAMP, 3, 1, 6}, + dropped_replica{node_list[2].first, 7, 1, 1, 6}}; ASSERT_TRUE(vec_equal(result_dropped, cc.dropped)); } @@ -285,38 +293,38 @@ TEST(meta_data, collect_replica) // drop_list no timestamp, full, minimal position CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 8}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 2, 8}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, }; rep.ballot = 1; rep.last_committed_decree = 7; rep.last_prepared_decree = 10; - ASSERT_FALSE(collect_replica(view, node_list[5], rep)); + ASSERT_FALSE(collect_replica(view, node_list[5].first, rep)); } { // drop_list no timestamp, full, middle position CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 8}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 2, 8}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, }; rep.ballot = 3; rep.last_committed_decree = 6; rep.last_prepared_decree = 8; - ASSERT_TRUE(collect_replica(view, node_list[5], rep)); + ASSERT_TRUE(collect_replica(view, node_list[5].first, rep)); std::vector result_dropped = { - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, - dropped_replica{node_list[5], dropped_replica::INVALID_TIMESTAMP, 3, 6, 8}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, + dropped_replica{node_list[5].first, dropped_replica::INVALID_TIMESTAMP, 3, 6, 8}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, }; ASSERT_TRUE(vec_equal(result_dropped, cc.dropped)); @@ -325,21 +333,22 @@ TEST(meta_data, collect_replica) { // drop_list no timestamp, full, largest position CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, - dropped_replica{node_list[4], dropped_replica::INVALID_TIMESTAMP, 4, 6, 8}}; + cc.dropped = { + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, + dropped_replica{node_list[4].first, dropped_replica::INVALID_TIMESTAMP, 4, 6, 8}}; rep.ballot = 4; rep.last_committed_decree = 8; rep.last_prepared_decree = 8; - ASSERT_TRUE(collect_replica(view, node_list[5], rep)); + ASSERT_TRUE(collect_replica(view, node_list[5].first, rep)); std::vector result_dropped = { - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, - dropped_replica{node_list[4], dropped_replica::INVALID_TIMESTAMP, 4, 6, 8}, - dropped_replica{node_list[5], dropped_replica::INVALID_TIMESTAMP, 4, 8, 8}}; + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, + dropped_replica{node_list[4].first, dropped_replica::INVALID_TIMESTAMP, 4, 6, 8}, + dropped_replica{node_list[5].first, dropped_replica::INVALID_TIMESTAMP, 4, 8, 8}}; ASSERT_TRUE(vec_equal(result_dropped, cc.dropped)); } @@ -372,14 +381,17 @@ TEST(meta_data, construct_replica) dsn::partition_configuration &pc = *get_config(app, rep.pid); config_context &cc = *get_config_context(app, rep.pid); - std::vector node_list; + std::vector> node_list; generate_node_list(node_list, 10, 10); #define CLEAR_REPLICA \ do { \ - pc.primary.set_invalid(); \ - pc.secondaries.clear(); \ - pc.last_drops.clear(); \ + pc.hp_primary.reset(); \ + pc.hp_secondaries.clear(); \ + pc.hp_last_drops.clear(); \ + pc.__set_hp_primary(dsn::host_port()); \ + pc.__set_hp_secondaries({}); \ + pc.__set_hp_last_drops({}); \ } while (false) #define CLEAR_DROP_LIST \ @@ -401,10 +413,11 @@ TEST(meta_data, construct_replica) // only have one node in drop_list { CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 5, 10, 12}}; + cc.dropped = { + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 5, 10, 12}}; ASSERT_TRUE(construct_replica(view, rep.pid, 3)); - ASSERT_EQ(node_list[0], pc.primary); - ASSERT_TRUE(pc.secondaries.empty()); + ASSERT_EQ(node_list[0].first, pc.hp_primary); + ASSERT_TRUE(pc.hp_secondaries.empty()); ASSERT_TRUE(cc.dropped.empty()); ASSERT_EQ(-1, cc.prefered_dropped); } @@ -412,16 +425,17 @@ TEST(meta_data, construct_replica) // have multiple nodes, ballots are not same { CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 6, 10, 12}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 7, 10, 12}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 8, 10, 12}, - dropped_replica{node_list[4], dropped_replica::INVALID_TIMESTAMP, 9, 11, 12}}; + cc.dropped = { + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 6, 10, 12}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 7, 10, 12}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 8, 10, 12}, + dropped_replica{node_list[4].first, dropped_replica::INVALID_TIMESTAMP, 9, 11, 12}}; ASSERT_TRUE(construct_replica(view, rep.pid, 3)); - ASSERT_EQ(node_list[4], pc.primary); - ASSERT_TRUE(pc.secondaries.empty()); + ASSERT_EQ(node_list[4].first, pc.hp_primary); + ASSERT_TRUE(pc.hp_secondaries.empty()); - std::vector nodes = {node_list[2], node_list[3]}; - ASSERT_EQ(nodes, pc.last_drops); + std::vector nodes = {node_list[2].first, node_list[3].first}; + ASSERT_EQ(nodes, pc.hp_last_drops); ASSERT_EQ(3, cc.dropped.size()); ASSERT_EQ(2, cc.prefered_dropped); } @@ -429,16 +443,17 @@ TEST(meta_data, construct_replica) // have multiple node, two have same ballots { CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 5, 10, 12}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 7, 11, 12}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 7, 12, 12}}; + cc.dropped = { + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 5, 10, 12}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 7, 11, 12}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 7, 12, 12}}; ASSERT_TRUE(construct_replica(view, rep.pid, 3)); - ASSERT_EQ(node_list[2], pc.primary); - ASSERT_TRUE(pc.secondaries.empty()); + ASSERT_EQ(node_list[2].first, pc.hp_primary); + ASSERT_TRUE(pc.hp_secondaries.empty()); - std::vector nodes = {node_list[0], node_list[1]}; - ASSERT_EQ(nodes, pc.last_drops); + std::vector nodes = {node_list[0].first, node_list[1].first}; + ASSERT_EQ(nodes, pc.hp_last_drops); ASSERT_EQ(2, cc.dropped.size()); ASSERT_EQ(1, cc.prefered_dropped); } @@ -446,17 +461,18 @@ TEST(meta_data, construct_replica) // have multiple nodes, all have same ballots { CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 7, 11, 14}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 7, 12, 14}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 7, 13, 14}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 7, 14, 14}}; + cc.dropped = { + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 7, 11, 14}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 7, 12, 14}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 7, 13, 14}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 7, 14, 14}}; ASSERT_TRUE(construct_replica(view, rep.pid, 3)); - ASSERT_EQ(node_list[3], pc.primary); - ASSERT_TRUE(pc.secondaries.empty()); + ASSERT_EQ(node_list[3].first, pc.hp_primary); + ASSERT_TRUE(pc.hp_secondaries.empty()); - std::vector nodes = {node_list[1], node_list[2]}; - ASSERT_EQ(nodes, pc.last_drops); + std::vector nodes = {node_list[1].first, node_list[2].first}; + ASSERT_EQ(nodes, pc.hp_last_drops); ASSERT_EQ(3, cc.dropped.size()); ASSERT_EQ(2, cc.prefered_dropped); diff --git a/src/meta/test/meta_duplication_service_test.cpp b/src/meta/test/meta_duplication_service_test.cpp index 644c8c17c6..f1435ed375 100644 --- a/src/meta/test/meta_duplication_service_test.cpp +++ b/src/meta/test/meta_duplication_service_test.cpp @@ -57,7 +57,9 @@ #include "meta/server_state.h" #include "meta/test/misc/misc.h" #include "meta_test_base.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/error_code.h" #include "utils/fail_point.h" @@ -127,11 +129,13 @@ class meta_duplication_service_test : public meta_test_base } duplication_sync_response - duplication_sync(const rpc_address &node, + duplication_sync(const rpc_address &addr, + const host_port &hp, std::map> confirm_list) { auto req = std::make_unique(); - req->node = node; + req->node = addr; + req->__set_hp_node(hp); req->confirm_list = confirm_list; duplication_sync_rpc rpc(std::move(req), RPC_CM_DUPLICATION_SYNC); @@ -180,8 +184,7 @@ class meta_duplication_service_test : public meta_test_base int last_dup = 0; for (int i = 0; i < 1000; i++) { - auto dup = dup_svc().new_dup_from_init( - remote_cluster_address, std::vector(), app); + auto dup = dup_svc().new_dup_from_init(remote_cluster_address, {}, app); ASSERT_GT(dup->id, 0); ASSERT_FALSE(dup->is_altering()); @@ -537,8 +540,9 @@ TEST_F(meta_duplication_service_test, remove_dup) TEST_F(meta_duplication_service_test, duplication_sync) { - std::vector server_nodes = ensure_enough_alive_nodes(3); - rpc_address node = server_nodes[0]; + const auto &server_nodes = ensure_enough_alive_nodes(3); + const auto &node = server_nodes[0]; + const auto &addr = dsn::dns_resolver::instance().resolve_address(server_nodes[0]); std::string test_app = "test_app_0"; create_app(test_app); @@ -547,9 +551,10 @@ TEST_F(meta_duplication_service_test, duplication_sync) // generate all primaries on node[0] for (partition_configuration &pc : app->partitions) { pc.ballot = random32(1, 10000); - pc.primary = server_nodes[0]; - pc.secondaries.push_back(server_nodes[1]); - pc.secondaries.push_back(server_nodes[2]); + pc.primary = addr; + pc.__set_hp_primary(server_nodes[0]); + pc.hp_secondaries.push_back(server_nodes[1]); + pc.hp_secondaries.push_back(server_nodes[2]); } initialize_node_state(); @@ -574,7 +579,7 @@ TEST_F(meta_duplication_service_test, duplication_sync) ce.confirmed_decree = 7; confirm_list[gpid(app->app_id, 3)].push_back(ce); - duplication_sync_response resp = duplication_sync(node, confirm_list); + duplication_sync_response resp = duplication_sync(addr, node, confirm_list); ASSERT_EQ(resp.err, ERR_OK); ASSERT_EQ(resp.dup_map.size(), 1); ASSERT_EQ(resp.dup_map[app->app_id].size(), 1); @@ -605,7 +610,7 @@ TEST_F(meta_duplication_service_test, duplication_sync) ce.confirmed_decree = 5; confirm_list[gpid(app->app_id, 1)].push_back(ce); - duplication_sync_response resp = duplication_sync(node, confirm_list); + duplication_sync_response resp = duplication_sync(addr, node, confirm_list); ASSERT_EQ(resp.err, ERR_OK); ASSERT_EQ(resp.dup_map.size(), 1); ASSERT_TRUE(resp.dup_map[app->app_id].find(dupid + 1) == resp.dup_map[app->app_id].end()); @@ -619,7 +624,7 @@ TEST_F(meta_duplication_service_test, duplication_sync) ce.confirmed_decree = 5; confirm_list[gpid(app->app_id + 1, 1)].push_back(ce); - duplication_sync_response resp = duplication_sync(node, confirm_list); + const auto resp = duplication_sync(addr, node, confirm_list); ASSERT_EQ(resp.err, ERR_OK); ASSERT_EQ(resp.dup_map.size(), 1); ASSERT_TRUE(resp.dup_map.find(app->app_id + 1) == resp.dup_map.end()); @@ -635,7 +640,7 @@ TEST_F(meta_duplication_service_test, duplication_sync) ce.confirmed_decree = 5; confirm_list[gpid(app->app_id, 1)].push_back(ce); - duplication_sync_response resp = duplication_sync(node, confirm_list); + const auto resp = duplication_sync(addr, node, confirm_list); ASSERT_EQ(resp.err, ERR_OK); ASSERT_EQ(resp.dup_map.size(), 0); } @@ -772,13 +777,13 @@ TEST_F(meta_duplication_service_test, fail_mode) ASSERT_EQ(dup->status(), duplication_status::DS_PAUSE); // ensure dup_sync will synchronize fail_mode - std::vector server_nodes = generate_node_list(3); - rpc_address node = server_nodes[0]; + auto node = generate_node_list(3)[0]; for (partition_configuration &pc : app->partitions) { - pc.primary = server_nodes[0]; + pc.primary = node.second; + pc.__set_hp_primary(node.first); } initialize_node_state(); - duplication_sync_response sync_resp = duplication_sync(node, {}); + auto sync_resp = duplication_sync(node.second, node.first, {}); ASSERT_TRUE(sync_resp.dup_map[app->app_id][dup->id].__isset.fail_mode); ASSERT_EQ(sync_resp.dup_map[app->app_id][dup->id].fail_mode, duplication_fail_mode::FAIL_SKIP); diff --git a/src/meta/test/meta_partition_guardian_test.cpp b/src/meta/test/meta_partition_guardian_test.cpp index e4f62f1fb5..b0a7b5f73d 100644 --- a/src/meta/test/meta_partition_guardian_test.cpp +++ b/src/meta/test/meta_partition_guardian_test.cpp @@ -55,6 +55,7 @@ #include "meta_test_base.h" #include "metadata_types.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -78,33 +79,41 @@ static void apply_update_request(/*in-out*/ configuration_update_request &update case config_type::CT_ASSIGN_PRIMARY: case config_type::CT_UPGRADE_TO_PRIMARY: pc.primary = update_req.node; + pc.__set_hp_primary(update_req.hp_node); replica_helper::remove_node(update_req.node, pc.secondaries); + replica_helper::remove_node(update_req.hp_node, pc.hp_secondaries); break; case config_type::CT_ADD_SECONDARY: case config_type::CT_ADD_SECONDARY_FOR_LB: pc.secondaries.push_back(update_req.node); + pc.hp_secondaries.push_back(update_req.hp_node); update_req.type = config_type::CT_UPGRADE_TO_SECONDARY; break; case config_type::CT_REMOVE: case config_type::CT_DOWNGRADE_TO_INACTIVE: - if (update_req.node == pc.primary) + if (update_req.hp_node == pc.hp_primary) { pc.primary.set_invalid(); - else + pc.hp_primary.reset(); + } else { replica_helper::remove_node(update_req.node, pc.secondaries); + replica_helper::remove_node(update_req.hp_node, pc.hp_secondaries); + } break; case config_type::CT_DOWNGRADE_TO_SECONDARY: pc.secondaries.push_back(pc.primary); + pc.hp_secondaries.push_back(pc.hp_primary); pc.primary.set_invalid(); + pc.hp_primary.reset(); break; default: break; } } -static auto default_filter = [](const dsn::rpc_address &target, dsn::message_ex *request) { +static auto default_filter = [](const dsn::host_port &target, dsn::message_ex *request) { dsn::message_ex *recv_request = create_corresponding_receive(request); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); @@ -140,7 +149,7 @@ class meta_partition_guardian_test : public meta_test_base class message_filter : public dsn::replication::meta_service { public: - typedef std::function filter; + typedef std::function filter; message_filter(meta_partition_guardian_test *app) : meta_service(), _app(app) {} void set_filter(const filter &f) { _filter = f; } virtual void reply_message(dsn::message_ex *request, dsn::message_ex *response) override @@ -148,7 +157,7 @@ class message_filter : public dsn::replication::meta_service destroy_message(response); } - virtual void send_message(const dsn::rpc_address &target, dsn::message_ex *request) override + virtual void send_message(const dsn::host_port &target, dsn::message_ex *request) override { // we expect this is a configuration_update_request proposal cur_ptr update_request = _filter(target, request); @@ -171,7 +180,7 @@ void meta_partition_guardian_test::cure_test() std::shared_ptr svc(new message_filter(this)); svc->_failure_detector.reset(new dsn::replication::meta_server_failure_detector(svc.get())); bool proposal_sent; - dsn::rpc_address last_addr; + dsn::host_port last_addr; ec = svc->remote_storage_initialize(); ASSERT_EQ(ec, dsn::ERR_OK); @@ -195,8 +204,12 @@ void meta_partition_guardian_test::cure_test() ASSERT_TRUE(state->spin_wait_staging(20)); svc->_started = true; - std::vector nodes; + std::vector> nodes; generate_node_list(nodes, 4, 4); + std::vector nodes_list; + for (const auto &p : nodes) { + nodes_list.emplace_back(p.first); + } dsn::partition_configuration &pc = app->partitions[0]; config_context &cc = *get_config_context(state->_all_apps, dsn::gpid(1, 0)); @@ -211,24 +224,26 @@ void meta_partition_guardian_test::cure_test() // initialize state->_nodes.clear(); pc.primary.set_invalid(); - pc.secondaries = {nodes[0], nodes[1]}; + pc.hp_primary.reset(); + pc.secondaries = {nodes[0].second, nodes[1].second}; + pc.__set_hp_secondaries({nodes[0].first, nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then ignore the proposal - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_UPGRADE_TO_PRIMARY); - EXPECT_TRUE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, update_req->node); + EXPECT_TRUE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, update_req->hp_node); - last_addr = update_req->node; + last_addr = update_req->hp_node; proposal_sent = true; return nullptr; }); @@ -241,15 +256,15 @@ void meta_partition_guardian_test::cure_test() PROPOSAL_FLAG_CHECK; // check partitions again - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(config_type::CT_UPGRADE_TO_PRIMARY, update_req->type); - EXPECT_EQ(update_req->node, last_addr); - EXPECT_EQ(target, update_req->node); + EXPECT_EQ(update_req->hp_node, last_addr); + EXPECT_EQ(target, update_req->hp_node); proposal_sent = true; apply_update_request(*update_req); @@ -264,32 +279,34 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.primary == last_addr; }); + CONDITION_CHECK([&] { return pc.hp_primary == last_addr; }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: upgrade secondary to primary, and the candidate died" << std::endl; // initialize state->_nodes.clear(); pc.primary.set_invalid(); - pc.secondaries = {nodes[0], nodes[1]}; + pc.hp_primary.reset(); + pc.secondaries = {nodes[0].second, nodes[1].second}; + pc.__set_hp_secondaries({nodes[0].first, nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then inject a event that node[0] is dead - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_UPGRADE_TO_PRIMARY); - EXPECT_TRUE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, update_req->node); + EXPECT_TRUE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, update_req->hp_node); proposal_sent = true; - last_addr = update_req->node; + last_addr = update_req->hp_node; svc->set_node_state({target}, false); return nullptr; }); @@ -302,15 +319,15 @@ void meta_partition_guardian_test::cure_test() PROPOSAL_FLAG_CHECK; // check partitions again - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_UPGRADE_TO_PRIMARY); - EXPECT_TRUE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, update_req->node); + EXPECT_TRUE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, update_req->hp_node); EXPECT_NE(target, last_addr); proposal_sent = true; @@ -325,31 +342,33 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return !pc.primary.is_invalid() && pc.primary != last_addr; }); + CONDITION_CHECK([&] { return !pc.hp_primary.is_invalid() && pc.hp_primary != last_addr; }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: add secondary, and the message lost" << std::endl; // initialize state->_nodes.clear(); - pc.primary = nodes[0]; - pc.secondaries = {nodes[1]}; + pc.primary = nodes[0].second; + pc.secondaries = {nodes[1].second}; + pc.__set_hp_primary(nodes[0].first); + pc.__set_hp_secondaries({nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then ignore the proposal - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_FALSE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, nodes[0]); + EXPECT_FALSE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, nodes[0].first); - last_addr = update_req->node; + last_addr = update_req->hp_node; proposal_sent = true; return nullptr; }); @@ -362,15 +381,15 @@ void meta_partition_guardian_test::cure_test() PROPOSAL_FLAG_CHECK; // check partitions again - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_EQ(update_req->node, last_addr); - EXPECT_EQ(target, nodes[0]); + EXPECT_EQ(update_req->hp_node, last_addr); + EXPECT_EQ(target, nodes[0].first); proposal_sent = true; apply_update_request(*update_req); @@ -384,34 +403,38 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.secondaries.size() == 2 && is_secondary(pc, last_addr); }); + CONDITION_CHECK([&] { return pc.hp_secondaries.size() == 2 && is_secondary(pc, last_addr); }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: add secondary, but the primary is removing another" << std::endl; // initialize state->_nodes.clear(); - pc.primary = nodes[0]; - pc.secondaries = {nodes[1]}; + pc.primary = nodes[0].second; + pc.secondaries = {nodes[1].second}; + pc.__set_hp_primary(nodes[0].first); + pc.__set_hp_secondaries({nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then inject another update_request - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_FALSE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, nodes[0]); + EXPECT_FALSE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, nodes[0].first); update_req->config.ballot++; update_req->type = config_type::CT_DOWNGRADE_TO_INACTIVE; update_req->node = update_req->config.secondaries[0]; + update_req->hp_node = update_req->config.hp_secondaries[0]; update_req->config.secondaries.clear(); + update_req->config.hp_secondaries.clear(); proposal_sent = true; @@ -425,32 +448,34 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.secondaries.size() == 2; }); + CONDITION_CHECK([&] { return pc.hp_secondaries.size() == 2; }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: add secondary, and the added secondary is dead" << std::endl; // initialize state->_nodes.clear(); - pc.primary = nodes[0]; - pc.secondaries = {nodes[1]}; + pc.primary = nodes[0].second; + pc.secondaries = {nodes[1].second}; + pc.__set_hp_primary(nodes[0].first); + pc.__set_hp_secondaries({nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then inject the nodes[2] dead - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_FALSE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, nodes[0]); + EXPECT_FALSE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, nodes[0].first); - last_addr = update_req->node; - svc->set_node_state({update_req->node}, false); + last_addr = update_req->hp_node; + svc->set_node_state({update_req->hp_node}, false); proposal_sent = true; return nullptr; }); @@ -463,19 +488,19 @@ void meta_partition_guardian_test::cure_test() PROPOSAL_FLAG_CHECK; // check partitions again - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_NE(update_req->node, last_addr); - EXPECT_FALSE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, nodes[0]); + EXPECT_NE(update_req->hp_node, last_addr); + EXPECT_FALSE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, nodes[0].first); proposal_sent = true; - last_addr = update_req->node; + last_addr = update_req->hp_node; apply_update_request(*update_req); svc->set_filter(default_filter); return update_req; @@ -487,32 +512,34 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.secondaries.size() == 2 && is_secondary(pc, last_addr); }); + CONDITION_CHECK([&] { return pc.hp_secondaries.size() == 2 && is_secondary(pc, last_addr); }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: add secondary, and the primary is dead" << std::endl; // initialize state->_nodes.clear(); - pc.primary = nodes[0]; - pc.secondaries = {nodes[1]}; + pc.primary = nodes[0].second; + pc.__set_hp_primary(nodes[0].first); + pc.secondaries = {nodes[1].second}; + pc.__set_hp_secondaries({nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then ignore the proposal - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_FALSE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, pc.primary); + EXPECT_FALSE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, pc.hp_primary); proposal_sent = true; - svc->set_node_state({pc.primary}, false); + svc->set_node_state({pc.hp_primary}, false); svc->set_filter(default_filter); return nullptr; }); @@ -523,26 +550,28 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.primary == nodes[1]; }); + CONDITION_CHECK([&] { return pc.hp_primary == nodes[1].first; }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); state->_nodes.clear(); pc.primary.set_invalid(); - pc.secondaries.clear(); - pc.last_drops = {nodes[0], nodes[1], nodes[2]}; + pc.hp_primary.reset(); + pc.hp_secondaries.clear(); + pc.last_drops = {nodes[0].second, nodes[1].second, nodes[2].second}; + pc.__set_hp_last_drops({nodes[0].first, nodes[1].first, nodes[2].first}); pc.ballot = 4; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY); - EXPECT_EQ(update_req->node, nodes[2]); - EXPECT_EQ(target, nodes[2]); + EXPECT_EQ(update_req->hp_node, nodes[2].first); + EXPECT_EQ(target, nodes[2].first); proposal_sent = true; svc->set_filter(default_filter); @@ -551,11 +580,11 @@ void meta_partition_guardian_test::cure_test() }); std::cerr << "Case: recover from DDD state, nodes[1] isn't alive" << std::endl; - svc->set_node_state({nodes[1]}, false); + svc->set_node_state({nodes[1].first}, false); cc.dropped = { - dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[2].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, }; t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, @@ -563,13 +592,13 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: recover from DDD state, nodes[2] is not in dropped" << std::endl; - svc->set_node_state({nodes[1]}, true); - cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}}; + svc->set_node_state({nodes[1].first}, true); + cc.dropped = {dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}}; t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, @@ -577,15 +606,15 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, haven't collect nodes[2]'s info from replica, and " "nodes[2]'s info haven't updated" << std::endl; - cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[2], 500, -1, -1, -1}}; + cc.dropped = {dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[2].first, 500, -1, -1, -1}}; t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, @@ -593,21 +622,21 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, haven't collect nodes[2]'s info from replica, and " "nodes[2]'s info have updated" << std::endl; - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY); - EXPECT_EQ(update_req->node, nodes[1]); - EXPECT_EQ(target, nodes[1]); + EXPECT_EQ(update_req->hp_node, nodes[1].first); + EXPECT_EQ(target, nodes[1].first); proposal_sent = true; svc->set_filter(default_filter); @@ -615,11 +644,11 @@ void meta_partition_guardian_test::cure_test() return update_req; }); - cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[2], 500, -1, -1, -1}}; + cc.dropped = {dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[2].first, 500, -1, -1, -1}}; pc.last_committed_decree = 0; - get_node_state(state->_nodes, nodes[2], false)->set_replicas_collect_flag(true); + get_node_state(state->_nodes, nodes[2].first, false)->set_replicas_collect_flag(true); t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, std::bind(&server_state::check_all_partitions, state), @@ -627,21 +656,23 @@ void meta_partition_guardian_test::cure_test() t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.primary == nodes[1]; }); + CONDITION_CHECK([&] { return pc.hp_primary == nodes[1].first; }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD, haven't collect nodes[1/2]'s info from replica, and " "nodes[1/2]'s info both have updated" << std::endl; - cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], 500, -1, -1, -1}, - dropped_replica{nodes[2], 500, -1, -1, -1}}; - get_node_state(state->_nodes, nodes[1], false)->set_replicas_collect_flag(true); - get_node_state(state->_nodes, nodes[2], false)->set_replicas_collect_flag(true); + cc.dropped = {dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, 500, -1, -1, -1}, + dropped_replica{nodes[2].first, 500, -1, -1, -1}}; + get_node_state(state->_nodes, nodes[1].first, false)->set_replicas_collect_flag(true); + get_node_state(state->_nodes, nodes[2].first, false)->set_replicas_collect_flag(true); pc.primary.set_invalid(); - pc.secondaries.clear(); - pc.last_drops = {nodes[0], nodes[1], nodes[2]}; + pc.hp_primary.reset(); + pc.hp_secondaries.clear(); + pc.last_drops = {nodes[0].second, nodes[1].second, nodes[2].second}; + pc.__set_hp_last_drops({nodes[0].first, nodes[1].first, nodes[2].first}); t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, @@ -649,15 +680,15 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, larger ballot not match with larger decree" << std::endl; cc.dropped = { - dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 0, 1}, - dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 0, 1, 1}, + dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 0, 1}, + dropped_replica{nodes[2].first, dropped_replica::INVALID_TIMESTAMP, 0, 1, 1}, }; t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, @@ -666,14 +697,14 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, committed decree less than meta's" << std::endl; cc.dropped = { - dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 10, 15}, - dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 1, 15, 15}, + dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 10, 15}, + dropped_replica{nodes[2].first, dropped_replica::INVALID_TIMESTAMP, 1, 15, 15}, }; pc.last_committed_decree = 30; t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, @@ -682,26 +713,26 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, select primary from config_context::dropped" << std::endl; cc.dropped = { - dropped_replica{nodes[0], 12344, -1, -1, -1}, - dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 4, 2, 4}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 4, 3, 4}, + dropped_replica{nodes[0].first, 12344, -1, -1, -1}, + dropped_replica{nodes[2].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 4}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 4, 3, 4}, }; pc.last_committed_decree = 2; - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY); - EXPECT_EQ(update_req->node, nodes[1]); - EXPECT_EQ(target, nodes[1]); + EXPECT_EQ(update_req->hp_node, nodes[1].first); + EXPECT_EQ(target, nodes[1].first); proposal_sent = true; svc->set_filter(default_filter); @@ -715,19 +746,19 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.primary == nodes[1]; }); + CONDITION_CHECK([&] { return pc.hp_primary == nodes[1].first; }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, only one primary" << std::endl; - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY); - EXPECT_EQ(update_req->node, nodes[0]); - EXPECT_EQ(target, nodes[0]); + EXPECT_EQ(update_req->hp_node, nodes[0].first); + EXPECT_EQ(target, nodes[0].first); proposal_sent = true; svc->set_filter(default_filter); @@ -736,12 +767,14 @@ void meta_partition_guardian_test::cure_test() }); pc.primary.set_invalid(); - pc.secondaries.clear(); - pc.last_drops = {nodes[0]}; + pc.hp_primary.reset(); + pc.hp_secondaries.clear(); + pc.last_drops = {nodes[0].second}; + pc.__set_hp_last_drops({nodes[0].first}); state->_nodes.clear(); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state({nodes[0], nodes[1], nodes[2]}, true); + svc->set_node_state({nodes[0].first, nodes[1].first, nodes[2].first}, true); t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, @@ -749,7 +782,7 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.primary == nodes[0]; }); + CONDITION_CHECK([&] { return pc.hp_primary == nodes[0].first; }); } static void check_nodes_loads(node_mapper &nodes) @@ -770,8 +803,12 @@ static void check_nodes_loads(node_mapper &nodes) void meta_partition_guardian_test::cure() { - std::vector node_list; - generate_node_list(node_list, 20, 100); + std::vector> nodes_pairs; + std::vector nodes_list; + generate_node_list(nodes_pairs, 20, 100); + for (const auto &p : nodes_pairs) { + nodes_list.emplace_back(p.first); + } app_mapper app; node_mapper nodes; @@ -789,8 +826,8 @@ void meta_partition_guardian_test::cure() std::shared_ptr the_app = app_state::create(info); app.emplace(the_app->app_id, the_app); - for (const auto &address : node_list) { - get_node_state(nodes, address, true)->set_alive(true); + for (const auto &hp : nodes_list) { + get_node_state(nodes, hp, true)->set_alive(true); } bool all_partitions_healthy = false; @@ -811,6 +848,7 @@ void meta_partition_guardian_test::cure() fake_request.config = the_app->partitions[i]; fake_request.type = action.type; fake_request.node = action.node; + fake_request.__set_hp_node(action.hp_node); fake_request.host_node = action.node; guardian.reconfig({&app, &nodes}, fake_request); @@ -822,8 +860,8 @@ void meta_partition_guardian_test::cure() void meta_partition_guardian_test::from_proposal_test() { - std::vector node_list; - generate_node_list(node_list, 3, 3); + std::vector> nodes_list; + generate_node_list(nodes_list, 3, 3); app_mapper app; node_mapper nodes; @@ -842,8 +880,8 @@ void meta_partition_guardian_test::from_proposal_test() std::shared_ptr the_app = app_state::create(info); app.emplace(the_app->app_id, the_app); - for (const dsn::rpc_address &addr : node_list) { - get_node_state(nodes, addr, true)->set_alive(true); + for (const auto &p : nodes_list) { + get_node_state(nodes, p.first, true)->set_alive(true); } meta_view mv{&app, &nodes}; @@ -859,69 +897,109 @@ void meta_partition_guardian_test::from_proposal_test() ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 2: test invalid proposal: invalid target" << std::endl; - cpa2 = - new_proposal_action(dsn::rpc_address(), node_list[0], config_type::CT_UPGRADE_TO_PRIMARY); + cpa2 = new_proposal_action(dsn::rpc_address(), + nodes_list[0].second, + dsn::host_port(), + nodes_list[0].first, + config_type::CT_UPGRADE_TO_PRIMARY); cc.lb_actions.assign_balancer_proposals({cpa2}); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 3: test invalid proposal: invalid node" << std::endl; - cpa2 = - new_proposal_action(node_list[0], dsn::rpc_address(), config_type::CT_UPGRADE_TO_PRIMARY); + cpa2 = new_proposal_action(nodes_list[0].second, + dsn::rpc_address(), + nodes_list[0].first, + dsn::host_port(), + config_type::CT_UPGRADE_TO_PRIMARY); cc.lb_actions.assign_balancer_proposals({cpa2}); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 4: test invalid proposal: dead target" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[0], config_type::CT_UPGRADE_TO_PRIMARY); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[0].second, + nodes_list[0].first, + nodes_list[0].first, + config_type::CT_UPGRADE_TO_PRIMARY); cc.lb_actions.assign_balancer_proposals({cpa2}); - get_node_state(nodes, node_list[0], false)->set_alive(false); + get_node_state(nodes, nodes_list[0].first, false)->set_alive(false); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); - get_node_state(nodes, node_list[0], false)->set_alive(true); + get_node_state(nodes, nodes_list[0].first, false)->set_alive(true); std::cerr << "Case 5: test invalid proposal: dead node" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_ADD_SECONDARY); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[1].second, + nodes_list[0].first, + nodes_list[1].first, + config_type::CT_ADD_SECONDARY); cc.lb_actions.assign_balancer_proposals({cpa2}); - get_node_state(nodes, node_list[1], false)->set_alive(false); + get_node_state(nodes, nodes_list[1].first, false)->set_alive(false); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); - get_node_state(nodes, node_list[1], false)->set_alive(true); + get_node_state(nodes, nodes_list[1].first, false)->set_alive(true); std::cerr << "Case 6: test invalid proposal: already have priamry but assign" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[0], config_type::CT_ASSIGN_PRIMARY); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[0].second, + nodes_list[0].first, + nodes_list[0].first, + config_type::CT_ASSIGN_PRIMARY); cc.lb_actions.assign_balancer_proposals({cpa2}); - pc.primary = node_list[1]; + pc.primary = nodes_list[1].second; + pc.__set_hp_primary(nodes_list[1].first); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 7: test invalid proposal: upgrade non-secondary" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[0], config_type::CT_UPGRADE_TO_PRIMARY); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[0].second, + nodes_list[0].first, + nodes_list[0].first, + config_type::CT_UPGRADE_TO_PRIMARY); cc.lb_actions.assign_balancer_proposals({cpa2}); pc.primary.set_invalid(); + pc.hp_primary.reset(); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 8: test invalid proposal: add exist secondary" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_ADD_SECONDARY); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[1].second, + nodes_list[0].first, + nodes_list[1].first, + config_type::CT_ADD_SECONDARY); cc.lb_actions.assign_balancer_proposals({cpa2}); - pc.primary = node_list[0]; - pc.secondaries = {node_list[1]}; + pc.primary = nodes_list[1].second; + pc.__set_hp_primary(nodes_list[1].first); + pc.secondaries = {nodes_list[1].second}; + pc.__set_hp_secondaries({nodes_list[1].first}); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 9: test invalid proposal: downgrade non member" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_REMOVE); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[1].second, + nodes_list[0].first, + nodes_list[1].first, + config_type::CT_REMOVE); cc.lb_actions.assign_balancer_proposals({cpa2}); - pc.primary = node_list[0]; - pc.secondaries.clear(); + pc.primary = nodes_list[0].second; + pc.__set_hp_primary(nodes_list[0].first); + pc.hp_secondaries.clear(); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 10: test abnormal learning detect" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_ADD_SECONDARY); - pc.primary = node_list[0]; - pc.secondaries.clear(); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[1].second, + nodes_list[0].first, + nodes_list[1].first, + config_type::CT_ADD_SECONDARY); + pc.primary = nodes_list[0].second; + pc.__set_hp_primary(nodes_list[0].first); + pc.hp_secondaries.clear(); cc.lb_actions.assign_balancer_proposals({cpa2}); replica_info i; @@ -932,12 +1010,12 @@ void meta_partition_guardian_test::from_proposal_test() i.last_committed_decree = 10; i.last_prepared_decree = 10; - collect_replica(mv, node_list[1], i); + collect_replica(mv, nodes_list[1].first, i); ASSERT_TRUE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_ADD_SECONDARY, cpa.type); i.status = partition_status::PS_ERROR; - collect_replica(mv, node_list[1], i); + collect_replica(mv, nodes_list[1].first, i); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); } diff --git a/src/meta/test/meta_service_test.cpp b/src/meta/test/meta_service_test.cpp index 8e47525179..802ca62bd9 100644 --- a/src/meta/test/meta_service_test.cpp +++ b/src/meta/test/meta_service_test.cpp @@ -29,6 +29,7 @@ #include "runtime/rpc/network.sim.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "utils/autoref_ptr.h" @@ -44,17 +45,17 @@ class meta_service_test : public meta_test_base void check_status_failure() { fail::setup(); - fail::cfg("meta_server_failure_detector_get_leader", "return(false#1.2.3.4:10086)"); + fail::cfg("meta_server_failure_detector_get_leader", "return(false#localhost:10086)"); /** can't forward to others */ RPC_MOCKING(app_env_rpc) { - rpc_address leader; + host_port leader; auto rpc = create_fake_rpc(); rpc.dsn_request()->header->context.u.is_forward_supported = false; ASSERT_FALSE(_ms->check_status_and_authz(rpc, &leader)); ASSERT_EQ(ERR_FORWARD_TO_OTHERS, rpc.response().err); - ASSERT_STREQ("1.2.3.4:10086", leader.to_string()); + ASSERT_EQ(leader.to_string(), "localhost:10086"); ASSERT_EQ(app_env_rpc::forward_mail_box().size(), 0); } @@ -64,7 +65,7 @@ class meta_service_test : public meta_test_base auto rpc = create_fake_rpc(); ASSERT_FALSE(_ms->check_status_and_authz(rpc)); ASSERT_EQ(app_env_rpc::forward_mail_box().size(), 1); - ASSERT_STREQ("1.2.3.4:10086", + ASSERT_STREQ("127.0.0.1:10086", app_env_rpc::forward_mail_box()[0].remote_address().to_string()); } @@ -74,11 +75,11 @@ class meta_service_test : public meta_test_base void check_status_success() { fail::setup(); - fail::cfg("meta_server_failure_detector_get_leader", "return(true#1.2.3.4:10086)"); + fail::cfg("meta_server_failure_detector_get_leader", "return(true#localhost:10086)"); RPC_MOCKING(app_env_rpc) { - rpc_address leader; + host_port leader; auto rpc = create_fake_rpc(); ASSERT_TRUE(_ms->check_status_and_authz(rpc, &leader)); ASSERT_EQ(app_env_rpc::forward_mail_box().size(), 0); diff --git a/src/meta/test/meta_split_service_test.cpp b/src/meta/test/meta_split_service_test.cpp index 07d0210b7b..420066d8ee 100644 --- a/src/meta/test/meta_split_service_test.cpp +++ b/src/meta/test/meta_split_service_test.cpp @@ -60,6 +60,7 @@ #include "metadata_types.h" #include "partition_split_types.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" @@ -148,7 +149,7 @@ class meta_split_service_test : public meta_test_base request->app.app_id = app->app_id; request->parent_config = parent_config; request->child_config = child_config; - request->primary_address = NODE; + request->hp_primary = NODE; register_child_rpc rpc(std::move(request), RPC_CM_REGISTER_CHILD_REPLICA); split_svc().register_child_on_meta(rpc); @@ -377,7 +378,8 @@ class meta_split_service_test : public meta_test_base const int32_t PARENT_BALLOT = 3; const int32_t PARENT_INDEX = 0; const int32_t CHILD_INDEX = 4; - const rpc_address NODE = rpc_address::from_ip_port("127.0.0.1", 10086); + const host_port NODE = host_port("localhost", 10086); + const rpc_address NODE_ADDR = rpc_address::from_host_port("127.0.0.1", 10086); std::shared_ptr app; }; @@ -505,7 +507,8 @@ TEST_F(meta_split_service_test, on_config_sync_test) info1.pid = pid1; info2.pid = pid2; configuration_query_by_node_request req; - req.node = NODE; + req.node = NODE_ADDR; + req.__set_hp_node(NODE); req.__isset.stored_replicas = true; req.stored_replicas.emplace_back(info1); req.stored_replicas.emplace_back(info2); diff --git a/src/meta/test/meta_test_base.cpp b/src/meta/test/meta_test_base.cpp index 0a9d440fc3..be808b7c77 100644 --- a/src/meta/test/meta_test_base.cpp +++ b/src/meta/test/meta_test_base.cpp @@ -38,7 +38,7 @@ #include "meta/server_state.h" #include "meta/test/misc/misc.h" #include "meta_service_test_app.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_tracker.h" #include "utils/error_code.h" @@ -121,9 +121,9 @@ void meta_test_base::set_node_live_percentage_threshold_for_update(int32_t perce _ms->_node_live_percentage_threshold_for_update = percentage_threshold; } -std::vector meta_test_base::get_alive_nodes() const +std::vector meta_test_base::get_alive_nodes() const { - std::vector nodes; + std::vector nodes; zauto_read_lock l(_ss->_lock); @@ -136,13 +136,13 @@ std::vector meta_test_base::get_alive_nodes() const return nodes; } -std::vector meta_test_base::ensure_enough_alive_nodes(int min_node_count) +std::vector meta_test_base::ensure_enough_alive_nodes(int min_node_count) { if (min_node_count < 1) { - return std::vector(); + return std::vector(); } - std::vector nodes(get_alive_nodes()); + std::vector nodes(get_alive_nodes()); if (!nodes.empty()) { auto node_count = static_cast(nodes.size()); CHECK_GE_MSG(node_count, @@ -161,12 +161,15 @@ std::vector meta_test_base::ensure_enough_alive_nodes(int min_node_ return nodes; } - nodes = generate_node_list(min_node_count); + auto node_pairs = generate_node_list(min_node_count); + for (const auto &p : node_pairs) { + nodes.emplace_back(p.first); + } _ms->set_node_state(nodes, true); while (true) { { - std::vector alive_nodes(get_alive_nodes()); + std::vector alive_nodes(get_alive_nodes()); if (static_cast(alive_nodes.size()) >= min_node_count) { break; } @@ -242,7 +245,7 @@ meta_test_base::update_app_envs(const std::string &app_name, return rpc.response(); } -void meta_test_base::mock_node_state(const rpc_address &addr, const node_state &node) +void meta_test_base::mock_node_state(const host_port &addr, const node_state &node) { _ss->_nodes[addr] = node; } diff --git a/src/meta/test/meta_test_base.h b/src/meta/test/meta_test_base.h index 843ff91e0e..2f27e883a4 100644 --- a/src/meta/test/meta_test_base.h +++ b/src/meta/test/meta_test_base.h @@ -25,7 +25,7 @@ #include "meta/meta_service.h" // IWYU pragma: keep namespace dsn { -class rpc_address; +class host_port; namespace replication { @@ -55,7 +55,7 @@ class meta_test_base : public testing::Test void set_node_live_percentage_threshold_for_update(int32_t percentage_threshold); - std::vector ensure_enough_alive_nodes(int min_node_count); + std::vector ensure_enough_alive_nodes(int min_node_count); // create an app for test with specified name and specified partition count void create_app(const std::string &name, uint32_t partition_count); @@ -69,7 +69,7 @@ class meta_test_base : public testing::Test const std::vector &env_keys, const std::vector &env_vals); - void mock_node_state(const rpc_address &addr, const node_state &node); + void mock_node_state(const host_port &addr, const node_state &node); std::shared_ptr find_app(const std::string &name); @@ -84,7 +84,7 @@ class meta_test_base : public testing::Test std::string _app_root; private: - std::vector get_alive_nodes() const; + std::vector get_alive_nodes() const; }; } // namespace replication diff --git a/src/meta/test/misc/misc.cpp b/src/meta/test/misc/misc.cpp index 11570913a5..a2fa90ab54 100644 --- a/src/meta/test/misc/misc.cpp +++ b/src/meta/test/misc/misc.cpp @@ -47,6 +47,9 @@ #include "duplication_types.h" #include "meta_admin_types.h" #include "metadata_types.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fmt_logging.h" #include "utils/rand.h" @@ -58,12 +61,15 @@ uint32_t random32(uint32_t min, uint32_t max) return res + min; } -void generate_node_list(std::vector &output_list, int min_count, int max_count) +void generate_node_list(std::vector> &output_list, + int min_count, + int max_count) { const auto count = random32(min_count, max_count); output_list.resize(count); - for (auto i = 0; i < count; ++i) { - output_list[i] = dsn::rpc_address::from_ip_port("127.0.0.1", i + 1); + for (int i = 0; i < count; ++i) { + output_list[i] = std::make_pair(dsn::host_port("localhost", i + 1), + dsn::rpc_address::from_ip_port("127.0.0.1", i + 1)); } } @@ -74,10 +80,10 @@ void verbose_apps(const app_mapper &input_apps) const std::shared_ptr &app = apps.second; std::cout << apps.first << " " << app->partition_count << std::endl; for (int i = 0; i < app->partition_count; ++i) { - std::cout << app->partitions[i].secondaries.size() + 1 << " " - << app->partitions[i].primary; - for (int j = 0; j < app->partitions[i].secondaries.size(); ++j) { - std::cout << " " << app->partitions[i].secondaries[j]; + std::cout << app->partitions[i].hp_secondaries.size() + 1 << " " + << app->partitions[i].hp_primary; + for (int j = 0; j < app->partitions[i].hp_secondaries.size(); ++j) { + std::cout << " " << app->partitions[i].hp_secondaries[j]; } std::cout << std::endl; } @@ -87,22 +93,22 @@ void verbose_apps(const app_mapper &input_apps) void generate_node_mapper( /*out*/ node_mapper &output_nodes, const app_mapper &input_apps, - const std::vector &input_node_list) + const std::vector &input_node_list) { output_nodes.clear(); - for (auto &addr : input_node_list) { - get_node_state(output_nodes, addr, true)->set_alive(true); + for (auto &hp : input_node_list) { + get_node_state(output_nodes, hp, true)->set_alive(true); } for (auto &kv : input_apps) { const std::shared_ptr &app = kv.second; for (const dsn::partition_configuration &pc : app->partitions) { node_state *ns; - if (!pc.primary.is_invalid()) { - ns = get_node_state(output_nodes, pc.primary, true); + if (!pc.hp_primary.is_invalid()) { + ns = get_node_state(output_nodes, pc.hp_primary, true); ns->put_partition(pc.pid, true); } - for (const dsn::rpc_address &sec : pc.secondaries) { + for (const dsn::host_port &sec : pc.hp_secondaries) { CHECK(!sec.is_invalid(), ""); ns = get_node_state(output_nodes, sec, true); ns->put_partition(pc.pid, false); @@ -112,7 +118,7 @@ void generate_node_mapper( } void generate_app(/*out*/ std::shared_ptr &app, - const std::vector &node_list) + const std::vector &node_list) { for (dsn::partition_configuration &pc : app->partitions) { pc.ballot = random32(1, 10000); @@ -122,16 +128,21 @@ void generate_app(/*out*/ std::shared_ptr &app, indices[2] = random32(indices[1] + 1, node_list.size() - 1); int p = random32(0, 2); - pc.primary = node_list[indices[p]]; - pc.secondaries.clear(); - for (unsigned int i = 0; i != indices.size(); ++i) - if (i != p) - pc.secondaries.push_back(node_list[indices[i]]); + pc.__set_hp_primary(node_list[indices[p]]); + pc.__set_hp_secondaries({}); + pc.primary = dsn::dns_resolver::instance().resolve_address(node_list[indices[p]]); + for (unsigned int i = 0; i != indices.size(); ++i) { + if (i != p) { + pc.secondaries.push_back( + dsn::dns_resolver::instance().resolve_address(node_list[indices[i]])); + pc.hp_secondaries.push_back(node_list[indices[i]]); + } + } - CHECK(!pc.primary.is_invalid(), ""); - CHECK(!is_secondary(pc, pc.primary), ""); - CHECK_EQ(pc.secondaries.size(), 2); - CHECK_NE(pc.secondaries[0], pc.secondaries[1]); + CHECK(!pc.hp_primary.is_invalid(), ""); + CHECK(!is_secondary(pc, pc.hp_primary), ""); + CHECK_EQ(pc.hp_secondaries.size(), 2); + CHECK_NE(pc.hp_secondaries[0], pc.hp_secondaries[1]); } } @@ -146,18 +157,18 @@ void generate_app_serving_replica_info(/*out*/ std::shared_ptr &node_list, + const std::vector &node_list, int apps_count, int disks_per_node, std::pair partitions_range, @@ -203,10 +214,10 @@ void generate_node_fs_manager(const app_mapper &apps, for (const auto &kv : nodes) { const node_state &ns = kv.second; - if (nfm.find(ns.addr()) == nfm.end()) { - nfm.emplace(ns.addr(), std::make_shared()); + if (nfm.find(ns.host_port()) == nfm.end()) { + nfm.emplace(ns.host_port(), std::make_shared()); } - fs_manager &manager = *(nfm.find(ns.addr())->second); + fs_manager &manager = *(nfm.find(ns.host_port())->second); manager.initialize(data_dirs, tags); ns.for_each_partition([&](const dsn::gpid &pid) { const config_context &cc = *get_config_context(apps, pid); @@ -214,10 +225,10 @@ void generate_node_fs_manager(const app_mapper &apps, 256, "%s%s/%d.%d.test", prefix, - cc.find_from_serving(ns.addr())->disk_tag.c_str(), + cc.find_from_serving(ns.host_port())->disk_tag.c_str(), pid.get_app_id(), pid.get_partition_index()); - LOG_DEBUG("concat pid_dir({}) of node({})", pid_dir, ns.addr()); + LOG_DEBUG("concat pid_dir({}) of node({})", pid_dir, ns.host_port()); manager.add_replica(pid, pid_dir); return true; }); @@ -233,9 +244,13 @@ void track_disk_info_check_and_apply(const dsn::replication::configuration_propo config_context *cc = get_config_context(apps, pid); CHECK_NOTNULL(cc, ""); - fs_manager *target_manager = get_fs_manager(manager, act.target); + dsn::host_port hp_target, hp_node; + GET_HOST_PORT(act, target, hp_target); + GET_HOST_PORT(act, node, hp_node); + + fs_manager *target_manager = get_fs_manager(manager, hp_target); CHECK_NOTNULL(target_manager, ""); - fs_manager *node_manager = get_fs_manager(manager, act.node); + fs_manager *node_manager = get_fs_manager(manager, hp_node); CHECK_NOTNULL(node_manager, ""); std::string dir; @@ -245,7 +260,7 @@ void track_disk_info_check_and_apply(const dsn::replication::configuration_propo auto selected = target_manager->find_best_dir_for_new_replica(pid); CHECK_NOTNULL(selected, ""); selected->holding_replicas[pid.get_app_id()].emplace(pid); - cc->collect_serving_replica(act.target, ri); + cc->collect_serving_replica(hp_target, ri); break; } case config_type::CT_ADD_SECONDARY: @@ -253,7 +268,7 @@ void track_disk_info_check_and_apply(const dsn::replication::configuration_propo auto selected = node_manager->find_best_dir_for_new_replica(pid); CHECK_NOTNULL(selected, ""); selected->holding_replicas[pid.get_app_id()].emplace(pid); - cc->collect_serving_replica(act.node, ri); + cc->collect_serving_replica(hp_node, ri); break; } case config_type::CT_DOWNGRADE_TO_SECONDARY: @@ -263,7 +278,7 @@ void track_disk_info_check_and_apply(const dsn::replication::configuration_propo case config_type::CT_REMOVE: case config_type::CT_DOWNGRADE_TO_INACTIVE: node_manager->remove_replica(pid); - cc->remove_from_serving(act.node); + cc->remove_from_serving(hp_node); break; default: @@ -290,24 +305,33 @@ void proposal_action_check_and_apply(const configuration_proposal_action &act, track_disk_info_check_and_apply(act, pid, apps, nodes, *manager); } + dsn::host_port hp_target, hp_node; + GET_HOST_PORT(act, target, hp_target); + GET_HOST_PORT(act, node, hp_node); + switch (act.type) { case config_type::CT_ASSIGN_PRIMARY: CHECK_EQ(act.node, act.target); + CHECK(pc.hp_primary.is_invalid(), ""); CHECK(pc.primary.is_invalid(), ""); + CHECK(pc.hp_secondaries.empty(), ""); CHECK(pc.secondaries.empty(), ""); pc.primary = act.node; - ns = &nodes[act.node]; + pc.__set_hp_primary(hp_node); + ns = &nodes[hp_node]; CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_INACTIVE); ns->put_partition(pc.pid, true); break; case config_type::CT_ADD_SECONDARY: + CHECK_EQ(hp_target, pc.hp_primary); CHECK_EQ(act.target, pc.primary); - CHECK(!is_member(pc, act.node), ""); + CHECK(!is_member(pc, hp_node), ""); + pc.hp_secondaries.push_back(hp_node); pc.secondaries.push_back(act.node); - ns = &nodes[act.node]; + ns = &nodes[hp_node]; CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_INACTIVE); ns->put_partition(pc.pid, false); @@ -315,33 +339,47 @@ void proposal_action_check_and_apply(const configuration_proposal_action &act, case config_type::CT_DOWNGRADE_TO_SECONDARY: CHECK_EQ(act.node, act.target); + CHECK_EQ(hp_node, hp_target); CHECK_EQ(act.node, pc.primary); - CHECK(nodes.find(act.node) != nodes.end(), ""); - CHECK(!is_secondary(pc, pc.primary), ""); - nodes[act.node].remove_partition(pc.pid, true); + CHECK_EQ(hp_node, pc.hp_primary); + CHECK(nodes.find(hp_node) != nodes.end(), ""); + CHECK(!is_secondary(pc, pc.hp_primary), ""); + nodes[hp_node].remove_partition(pc.pid, true); pc.secondaries.push_back(pc.primary); + pc.hp_secondaries.push_back(pc.hp_primary); pc.primary.set_invalid(); + pc.__set_hp_primary(dsn::host_port()); break; case config_type::CT_UPGRADE_TO_PRIMARY: + CHECK(pc.hp_primary.is_invalid(), ""); CHECK(pc.primary.is_invalid(), ""); + CHECK_EQ(hp_node, hp_target); CHECK_EQ(act.node, act.target); - CHECK(is_secondary(pc, act.node), ""); - CHECK(nodes.find(act.node) != nodes.end(), ""); + CHECK(is_secondary(pc, hp_node), ""); + CHECK(nodes.find(hp_node) != nodes.end(), ""); - ns = &nodes[act.node]; + ns = &nodes[hp_node]; + pc.hp_primary = hp_node; pc.primary = act.node; + CHECK(replica_helper::remove_node(hp_node, pc.hp_secondaries), ""); CHECK(replica_helper::remove_node(act.node, pc.secondaries), ""); ns->put_partition(pc.pid, true); break; case config_type::CT_ADD_SECONDARY_FOR_LB: + CHECK_EQ(hp_target, pc.hp_primary); CHECK_EQ(act.target, pc.primary); - CHECK(!is_member(pc, act.node), ""); + CHECK(!is_member(pc, hp_node), ""); + CHECK(!act.hp_node.is_invalid(), ""); CHECK(!act.node.is_invalid(), ""); + if (!pc.__isset.hp_secondaries) { + pc.__set_hp_secondaries({}); + } + pc.hp_secondaries.push_back(hp_node); pc.secondaries.push_back(act.node); - ns = &nodes[act.node]; + ns = &nodes[hp_node]; ns->put_partition(pc.pid, false); CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_SECONDARY); break; @@ -349,13 +387,16 @@ void proposal_action_check_and_apply(const configuration_proposal_action &act, // in balancer, remove primary is not allowed case config_type::CT_REMOVE: case config_type::CT_DOWNGRADE_TO_INACTIVE: + CHECK(!pc.hp_primary.is_invalid(), ""); CHECK(!pc.primary.is_invalid(), ""); + CHECK_EQ(pc.hp_primary, hp_target); CHECK_EQ(pc.primary, act.target); - CHECK(is_secondary(pc, act.node), ""); - CHECK(nodes.find(act.node) != nodes.end(), ""); + CHECK(is_secondary(pc, hp_node), ""); + CHECK(nodes.find(hp_node) != nodes.end(), ""); + CHECK(replica_helper::remove_node(hp_node, pc.hp_secondaries), ""); CHECK(replica_helper::remove_node(act.node, pc.secondaries), ""); - ns = &nodes[act.node]; + ns = &nodes[hp_node]; CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_SECONDARY); ns->remove_partition(pc.pid, false); break; @@ -382,19 +423,21 @@ void migration_check_and_apply(app_mapper &apps, dsn::partition_configuration &pc = the_app->partitions[proposal->gpid.get_partition_index()]; - CHECK(!pc.primary.is_invalid(), ""); - CHECK_EQ(pc.secondaries.size(), 2); - for (auto &addr : pc.secondaries) { - CHECK(!addr.is_invalid(), ""); + CHECK(!pc.hp_primary.is_invalid(), ""); + CHECK_EQ(pc.hp_secondaries.size(), 2); + for (auto &host_port : pc.hp_secondaries) { + CHECK(!host_port.is_invalid(), ""); } - CHECK(!is_secondary(pc, pc.primary), ""); + CHECK(!is_secondary(pc, pc.hp_primary), ""); for (unsigned int j = 0; j < proposal->action_list.size(); ++j) { configuration_proposal_action &act = proposal->action_list[j]; - LOG_DEBUG("the {}th round of action, type: {}, node: {}, target: {}", + LOG_DEBUG("the {}th round of action, type: {}, node: {}({}), target: {}({})", j, dsn::enum_to_string(act.type), + act.hp_node, act.node, + act.hp_target, act.target); proposal_action_check_and_apply(act, proposal->gpid, apps, nodes, manager); } diff --git a/src/meta/test/misc/misc.h b/src/meta/test/misc/misc.h index 15643fbc58..867b49781f 100644 --- a/src/meta/test/misc/misc.h +++ b/src/meta/test/misc/misc.h @@ -38,6 +38,7 @@ #include "meta/meta_data.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" namespace dsn { class gpid; @@ -48,10 +49,10 @@ class fs_manager; } // namespace replication } // namespace dsn -typedef std::map> nodes_fs_manager; +typedef std::map> nodes_fs_manager; inline dsn::replication::fs_manager *get_fs_manager(nodes_fs_manager &nfm, - const dsn::rpc_address &node) + const dsn::host_port &node) { auto iter = nfm.find(node); if (nfm.end() == iter) @@ -64,18 +65,22 @@ uint32_t random32(uint32_t min, uint32_t max); // Generates a random number [min_count, max_count] of node addresses // each node is given a random port value in range of [min_count, max_count] -void generate_node_list(/*out*/ std::vector &output_list, - int min_count, - int max_count); +void generate_node_list( + /*out*/ std::vector> &output_list, + int min_count, + int max_count); // Generates `size` of node addresses, each with port value in range [start_port, start_port + size] -inline std::vector generate_node_list(size_t size, int start_port = 12321) +inline std::vector> +generate_node_list(size_t size, int start_port = 12321) { - std::vector result; + std::vector> result; result.resize(size); - for (int i = 0; i < size; ++i) - result[i] = + for (int i = 0; i < size; ++i) { + result[i].first = dsn::host_port("localhost", static_cast(start_port + i + 1)); + result[i].second = dsn::rpc_address::from_ip_port("127.0.0.1", static_cast(start_port + i + 1)); + } return result; } @@ -84,12 +89,12 @@ inline std::vector generate_node_list(size_t size, int start_p // REQUIRES: node_list.size() >= 3 void generate_app( /*out*/ std::shared_ptr &app, - const std::vector &node_list); + const std::vector &node_list); void generate_node_mapper( /*out*/ dsn::replication::node_mapper &output_nodes, const dsn::replication::app_mapper &input_apps, - const std::vector &input_node_list); + const std::vector &input_node_list); void generate_app_serving_replica_info(/*out*/ std::shared_ptr &app, int total_disks); @@ -100,7 +105,7 @@ void generate_node_fs_manager(const dsn::replication::app_mapper &apps, int total_disks); void generate_apps(/*out*/ dsn::replication::app_mapper &apps, - const std::vector &node_list, + const std::vector &node_list, int apps_count, int disks_per_node, std::pair partitions_range, diff --git a/src/meta/test/state_sync_test.cpp b/src/meta/test/state_sync_test.cpp index 6ebc610d50..54f3d5a4d7 100644 --- a/src/meta/test/state_sync_test.cpp +++ b/src/meta/test/state_sync_test.cpp @@ -47,6 +47,7 @@ #include "meta_admin_types.h" #include "meta_service_test_app.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "utils/autoref_ptr.h" #include "utils/error_code.h" @@ -61,13 +62,15 @@ namespace dsn { namespace replication { class meta_options; -static void random_assign_partition_config(std::shared_ptr &app, - const std::vector &server_list, - int max_replica_count) +static void random_assign_partition_config( + std::shared_ptr &app, + std::vector> &server_list, + int max_replica_count) { auto get_server = [&server_list](int indice) { - if (indice % 2 != 0) - return dsn::rpc_address(); + if (indice % 2 != 0) { + return std::make_pair(dsn::host_port(), dsn::rpc_address()); + } return server_list[indice / 2]; }; @@ -79,13 +82,21 @@ static void random_assign_partition_config(std::shared_ptr &app, indices.push_back(random32(start, max_servers)); start = indices.back() + 1; } - pc.primary = get_server(indices[0]); + const auto &server = get_server(indices[0]); + pc.primary = server.second; + pc.__set_hp_primary(server.first); + if (!pc.__isset.hp_secondaries) { + pc.__set_hp_secondaries({}); + } for (int i = 1; i < indices.size(); ++i) { - dsn::rpc_address addr = get_server(indices[i]); - if (!addr.is_invalid()) - pc.secondaries.push_back(addr); + const auto &s = get_server(indices[i]); + if (!s.first.is_invalid()) { + pc.secondaries.push_back(s.second); + pc.hp_secondaries.push_back(s.first); + } } - pc.last_drops = {server_list.back()}; + pc.__set_hp_last_drops({server_list.back().first}); + pc.last_drops = {server_list.back().second}; } } @@ -120,7 +131,7 @@ void meta_service_test_app::state_sync_test() { int apps_count = 15; int drop_ratio = 5; - std::vector server_list; + std::vector> server_list; std::vector drop_set; generate_node_list(server_list, 10, 10); @@ -187,7 +198,7 @@ void meta_service_test_app::state_sync_test() for (int j = 0; j < app->partition_count; ++j) { config_context &cc = app->helpers->contexts[j]; ASSERT_EQ(1, cc.dropped.size()); - ASSERT_NE(cc.dropped.end(), cc.find_from_dropped(server_list.back())); + ASSERT_NE(cc.dropped.end(), cc.find_from_dropped(server_list.back().first)); } } ec = ss2->dump_from_remote_storage("meta_state.dump1", false); @@ -381,10 +392,14 @@ void meta_service_test_app::construct_apps_test() std::shared_ptr svc(new meta_service()); - std::vector nodes; + std::vector> nodes; std::string hint_message; generate_node_list(nodes, 1, 1); - svc->_state->construct_apps({resp}, nodes, hint_message); + std::vector hps; + for (const auto &p : nodes) { + hps.emplace_back(p.first); + } + svc->_state->construct_apps({resp}, hps, hint_message); meta_view mv = svc->_state->get_meta_view(); const app_mapper &mapper = *(mv.apps); diff --git a/src/meta/test/update_configuration_test.cpp b/src/meta/test/update_configuration_test.cpp index 7db2a96268..f7ab3f754f 100644 --- a/src/meta/test/update_configuration_test.cpp +++ b/src/meta/test/update_configuration_test.cpp @@ -56,6 +56,7 @@ #include "metadata_types.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -86,7 +87,7 @@ class fake_sender_meta_service : public dsn::replication::meta_service { destroy_message(response); } - virtual void send_message(const dsn::rpc_address &target, dsn::message_ex *request) override + virtual void send_message(const dsn::host_port &target, dsn::message_ex *request) override { // we expect this is a configuration_update_request proposal dsn::message_ex *recv_request = create_corresponding_receive(request); @@ -105,26 +106,34 @@ class fake_sender_meta_service : public dsn::replication::meta_service case config_type::CT_ASSIGN_PRIMARY: case config_type::CT_UPGRADE_TO_PRIMARY: pc.primary = update_req->node; + pc.__set_hp_primary(update_req->hp_node); replica_helper::remove_node(update_req->node, pc.secondaries); + replica_helper::remove_node(update_req->hp_node, pc.hp_secondaries); break; case config_type::CT_ADD_SECONDARY: case config_type::CT_ADD_SECONDARY_FOR_LB: pc.secondaries.push_back(update_req->node); + pc.hp_secondaries.push_back(update_req->hp_node); update_req->type = config_type::CT_UPGRADE_TO_SECONDARY; break; case config_type::CT_REMOVE: case config_type::CT_DOWNGRADE_TO_INACTIVE: - if (update_req->node == pc.primary) + if (update_req->hp_node == pc.hp_primary) { pc.primary.set_invalid(); - else + pc.hp_primary.reset(); + } else { replica_helper::remove_node(update_req->node, pc.secondaries); + replica_helper::remove_node(update_req->hp_node, pc.hp_secondaries); + } break; case config_type::CT_DOWNGRADE_TO_SECONDARY: pc.secondaries.push_back(pc.primary); pc.primary.set_invalid(); + pc.hp_secondaries.push_back(pc.hp_primary); + pc.hp_primary.reset(); break; default: break; @@ -137,7 +146,7 @@ class fake_sender_meta_service : public dsn::replication::meta_service class null_meta_service : public dsn::replication::meta_service { public: - void send_message(const dsn::rpc_address &target, dsn::message_ex *request) + void send_message(const dsn::host_port &target, dsn::message_ex *request) { LOG_INFO("send request to {}", target); request->add_ref(); @@ -154,7 +163,7 @@ class dummy_partition_guardian : public partition_guardian { action.type = config_type::CT_INVALID; const dsn::partition_configuration &pc = *get_config(*view.apps, gpid); - if (!pc.primary.is_invalid() && pc.secondaries.size() == 2) + if (!pc.hp_primary.is_invalid() && pc.hp_secondaries.size() == 2) return pc_status::healthy; return pc_status::ill; } @@ -238,52 +247,54 @@ void meta_service_test_app::update_configuration_test() ss->_all_apps.emplace(1, app); - std::vector nodes; + std::vector> nodes; generate_node_list(nodes, 4, 4); dsn::partition_configuration &pc0 = app->partitions[0]; - pc0.primary = nodes[0]; - pc0.secondaries.push_back(nodes[1]); - pc0.secondaries.push_back(nodes[2]); + pc0.primary = nodes[0].second; + pc0.__set_hp_primary(nodes[0].first); + pc0.secondaries = {nodes[1].second, nodes[2].second}; + pc0.__set_hp_secondaries({nodes[1].first, nodes[2].first}); pc0.ballot = 3; dsn::partition_configuration &pc1 = app->partitions[1]; - pc1.primary = nodes[1]; - pc1.secondaries.push_back(nodes[0]); - pc1.secondaries.push_back(nodes[2]); + pc1.primary = nodes[1].second; + pc1.__set_hp_primary(nodes[1].first); + pc1.secondaries = {nodes[0].second, nodes[2].second}; + pc1.__set_hp_secondaries({nodes[0].first, nodes[2].first}); pc1.ballot = 3; ss->sync_apps_to_remote_storage(); ASSERT_TRUE(ss->spin_wait_staging(30)); ss->initialize_node_state(); - svc->set_node_state({nodes[0], nodes[1], nodes[2]}, true); + svc->set_node_state({nodes[0].first, nodes[1].first, nodes[2].first}, true); svc->_started = true; // test remove primary state_validator validator1 = [pc0](const app_mapper &apps) { const dsn::partition_configuration *pc = get_config(apps, pc0.pid); - return pc->ballot == pc0.ballot + 2 && pc->secondaries.size() == 1 && - std::find(pc0.secondaries.begin(), pc0.secondaries.end(), pc->primary) != - pc0.secondaries.end(); + return pc->ballot == pc0.ballot + 2 && pc->hp_secondaries.size() == 1 && + std::find(pc0.hp_secondaries.begin(), pc0.hp_secondaries.end(), pc->hp_primary) != + pc0.hp_secondaries.end(); }; // test kickoff secondary - dsn::rpc_address addr = nodes[0]; - state_validator validator2 = [pc1, addr](const app_mapper &apps) { + auto hp = nodes[0].first; + state_validator validator2 = [pc1, hp](const app_mapper &apps) { const dsn::partition_configuration *pc = get_config(apps, pc1.pid); - return pc->ballot == pc1.ballot + 1 && pc->secondaries.size() == 1 && - pc->secondaries.front() != addr; + return pc->ballot == pc1.ballot + 1 && pc->hp_secondaries.size() == 1 && + pc->hp_secondaries.front() != hp; }; - svc->set_node_state({nodes[0]}, false); + svc->set_node_state({nodes[0].first}, false); ASSERT_TRUE(wait_state(ss, validator1, 30)); ASSERT_TRUE(wait_state(ss, validator2, 30)); // test add secondary - svc->set_node_state({nodes[3]}, true); + svc->set_node_state({nodes[3].first}, true); state_validator validator3 = [pc0](const app_mapper &apps) { const dsn::partition_configuration *pc = get_config(apps, pc0.pid); - return pc->ballot == pc0.ballot + 1 && pc->secondaries.size() == 2; + return pc->ballot == pc0.ballot + 1 && pc->hp_secondaries.size() == 2; }; // the default delay for add node is 5 miniutes ASSERT_FALSE(wait_state(ss, validator3, 10)); @@ -318,32 +329,41 @@ void meta_service_test_app::adjust_dropped_size() ss->_all_apps.emplace(1, app); - std::vector nodes; + std::vector> nodes; generate_node_list(nodes, 10, 10); // first, the replica is healthy, and there are 2 dropped dsn::partition_configuration &pc = app->partitions[0]; - pc.primary = nodes[0]; - pc.secondaries = {nodes[1], nodes[2]}; + pc.primary = nodes[0].second; + pc.__set_hp_primary(nodes[0].first); + pc.secondaries = {nodes[1].second, nodes[2].second}; + pc.__set_hp_secondaries({nodes[1].first, nodes[2].first}); pc.ballot = 10; config_context &cc = *get_config_context(ss->_all_apps, pc.pid); cc.dropped = { - dropped_replica{nodes[3], dropped_replica::INVALID_TIMESTAMP, 7, 11, 14}, - dropped_replica{nodes[4], 20, invalid_ballot, invalid_decree, invalid_decree}, + dropped_replica{nodes[3].first, dropped_replica::INVALID_TIMESTAMP, 7, 11, 14}, + dropped_replica{nodes[4].first, 20, invalid_ballot, invalid_decree, invalid_decree}, }; ss->sync_apps_to_remote_storage(); - generate_node_mapper(ss->_nodes, ss->_all_apps, nodes); + + std::vector hps; + for (const auto &p : nodes) { + hps.emplace_back(p.first); + } + generate_node_mapper(ss->_nodes, ss->_all_apps, hps); // then we receive a request for upgrade a node to secondary std::shared_ptr req = std::make_shared(); req->config = pc; req->config.ballot++; - req->config.secondaries.push_back(nodes[5]); + req->config.secondaries.push_back(nodes[5].second); + req->config.__set_hp_secondaries({nodes[5].first}); req->info = info; - req->node = nodes[5]; + req->node = nodes[5].second; + req->__set_hp_node(nodes[5].first); req->type = config_type::CT_UPGRADE_TO_SECONDARY; call_update_configuration(svc.get(), req); @@ -352,7 +372,8 @@ void meta_service_test_app::adjust_dropped_size() // then receive a config_sync request fro nodes[4], which has less data than node[3] std::shared_ptr req2 = std::make_shared(); - req2->__set_node(nodes[4]); + req2->node = nodes[4].second; + req2->__set_hp_node(nodes[4].first); replica_info rep_info; rep_info.pid = pc.pid; @@ -372,7 +393,7 @@ void meta_service_test_app::adjust_dropped_size() dropped_replica &d = cc.dropped[0]; if (d.time != dropped_replica::INVALID_TIMESTAMP) return false; - if (d.node != nodes[4]) + if (d.node != nodes[4].first) return false; if (d.last_committed_decree != rep_info.last_committed_decree) return false; @@ -408,17 +429,22 @@ void meta_service_test_app::apply_balancer_test() meta_svc->_balancer.reset(new greedy_load_balancer(meta_svc.get())); // initialize data structure - std::vector node_list; + std::vector> node_list; generate_node_list(node_list, 5, 10); + std::vector hps; + for (const auto &p : node_list) { + hps.emplace_back(p.first); + } + server_state *ss = meta_svc->_state.get(); - generate_apps(ss->_all_apps, node_list, 5, 5, std::pair(2, 5), false); + generate_apps(ss->_all_apps, hps, 5, 5, std::pair(2, 5), false); app_mapper backed_app; node_mapper backed_nodes; clone_app_mapper(backed_app, ss->_all_apps); - generate_node_mapper(backed_nodes, backed_app, node_list); + generate_node_mapper(backed_nodes, backed_app, hps); // before initialize, we need to mark apps to AS_CREATING: for (auto &kv : ss->_all_apps) { @@ -430,7 +456,7 @@ void meta_service_test_app::apply_balancer_test() ss->initialize_node_state(); meta_svc->_started = true; - meta_svc->set_node_state(node_list, true); + meta_svc->set_node_state(hps, true); app_mapper_compare(backed_app, ss->_all_apps); // run balancer @@ -480,7 +506,7 @@ void meta_service_test_app::cannot_run_balancer_test() svc->_balancer.reset(new dummy_balancer(svc.get())); svc->_partition_guardian.reset(new dummy_partition_guardian(svc.get())); - std::vector nodes; + std::vector> nodes; generate_node_list(nodes, 10, 10); dsn::app_info info; @@ -499,12 +525,19 @@ void meta_service_test_app::cannot_run_balancer_test() svc->_state->_table_metric_entities.create_entity(info.app_id, info.partition_count); dsn::partition_configuration &pc = the_app->partitions[0]; - pc.primary = nodes[0]; - pc.secondaries = {nodes[1], nodes[2]}; + pc.primary = nodes[0].second; + pc.__set_hp_primary(nodes[0].first); + pc.secondaries = {nodes[1].second, nodes[2].second}; + pc.__set_hp_secondaries({nodes[1].first, nodes[2].first}); + + std::vector hps; + for (const auto &p : nodes) { + hps.emplace_back(p.first); + } #define REGENERATE_NODE_MAPPER \ svc->_state->_nodes.clear(); \ - generate_node_mapper(svc->_state->_nodes, svc->_state->_all_apps, nodes) + generate_node_mapper(svc->_state->_nodes, svc->_state->_all_apps, hps) REGENERATE_NODE_MAPPER; // stage are freezed @@ -518,14 +551,16 @@ void meta_service_test_app::cannot_run_balancer_test() // all the partitions are not healthy svc->_function_level.store(meta_function_level::fl_lively); pc.primary.set_invalid(); + pc.hp_primary.reset(); REGENERATE_NODE_MAPPER; ASSERT_FALSE(svc->_state->check_all_partitions()); // some dropped node still exists in nodes - pc.primary = nodes[0]; + pc.primary = nodes[0].second; + pc.__set_hp_primary(nodes[0].first); REGENERATE_NODE_MAPPER; - get_node_state(svc->_state->_nodes, pc.primary, true)->set_alive(false); + get_node_state(svc->_state->_nodes, pc.hp_primary, true)->set_alive(false); ASSERT_FALSE(svc->_state->check_all_partitions()); // some apps are staging diff --git a/src/nfs/nfs.thrift b/src/nfs/nfs.thrift index 3f0f96bbb9..54716d3c44 100644 --- a/src/nfs/nfs.thrift +++ b/src/nfs/nfs.thrift @@ -30,16 +30,17 @@ namespace cpp dsn.service struct copy_request { - 1: dsn.rpc_address source; - 2: string source_dir; - 3: string dst_dir; - 4: string file_name; - 5: i64 offset; - 6: i32 size; - 7: bool is_last; - 8: bool overwrite; - 9: optional string source_disk_tag; - 10: optional dsn.gpid pid; + 1: dsn.rpc_address source; + 2: string source_dir; + 3: string dst_dir; + 4: string file_name; + 5: i64 offset; + 6: i32 size; + 7: bool is_last; + 8: bool overwrite; + 9: optional string source_disk_tag; + 10: optional dsn.gpid pid; + 11: optional dsn.host_port hp_source; } struct copy_response @@ -52,14 +53,15 @@ struct copy_response struct get_file_size_request { - 1: dsn.rpc_address source; - 2: string dst_dir; - 3: list file_list; - 4: string source_dir; - 5: bool overwrite; - 6: optional string source_disk_tag; - 7: optional string dest_disk_tag; - 8: optional dsn.gpid pid; + 1: dsn.rpc_address source; + 2: string dst_dir; + 3: list file_list; + 4: string source_dir; + 5: bool overwrite; + 6: optional string source_disk_tag; + 7: optional string dest_disk_tag; + 8: optional dsn.gpid pid; + 9: optional dsn.host_port hp_source; } struct get_file_size_response diff --git a/src/nfs/nfs_client_impl.cpp b/src/nfs/nfs_client_impl.cpp index 1f2656ddd2..0feb717114 100644 --- a/src/nfs/nfs_client_impl.cpp +++ b/src/nfs/nfs_client_impl.cpp @@ -33,6 +33,8 @@ #include "fmt/core.h" #include "nfs/nfs_code_definition.h" #include "nfs/nfs_node.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/command_manager.h" #include "utils/filesystem.h" @@ -136,7 +138,7 @@ void nfs_client_impl::begin_remote_copy(std::shared_ptr &rc { user_request_ptr req(new user_request()); req->high_priority = rci->high_priority; - req->file_size_req.source = rci->source; + req->file_size_req.source = dsn::dns_resolver::instance().resolve_address(rci->source); req->file_size_req.dst_dir = rci->dest_dir; req->file_size_req.file_list = rci->files; req->file_size_req.source_dir = rci->source_dir; @@ -144,15 +146,17 @@ void nfs_client_impl::begin_remote_copy(std::shared_ptr &rc req->file_size_req.__set_source_disk_tag(rci->source_disk_tag); req->file_size_req.__set_dest_disk_tag(rci->dest_disk_tag); req->file_size_req.__set_pid(rci->pid); + req->file_size_req.__set_hp_source(rci->source); req->nfs_task = nfs_task; req->is_finished = false; - async_nfs_get_file_size(req->file_size_req, - [=](error_code err, get_file_size_response &&resp) { - end_get_file_size(err, std::move(resp), req); - }, - std::chrono::milliseconds(FLAGS_rpc_timeout_ms), - req->file_size_req.source); + async_nfs_get_file_size( + req->file_size_req, + [=](error_code err, get_file_size_response &&resp) { + end_get_file_size(err, std::move(resp), req); + }, + std::chrono::milliseconds(FLAGS_rpc_timeout_ms), + dsn::dns_resolver::instance().resolve_address(req->file_size_req.hp_source)); } void nfs_client_impl::end_get_file_size(::dsn::error_code err, @@ -160,7 +164,8 @@ void nfs_client_impl::end_get_file_size(::dsn::error_code err, const user_request_ptr &ureq) { if (err != ::dsn::ERR_OK) { - LOG_ERROR("[nfs_service] remote get file size failed, source = {}, dir = {}, err = {}", + LOG_ERROR("[nfs_service] remote get file size failed, source = {}({}), dir = {}, err = {}", + ureq->file_size_req.hp_source, ureq->file_size_req.source, ureq->file_size_req.source_dir, err); @@ -170,7 +175,8 @@ void nfs_client_impl::end_get_file_size(::dsn::error_code err, err = dsn::error_code(resp.error); if (err != ::dsn::ERR_OK) { - LOG_ERROR("[nfs_service] remote get file size failed, source = {}, dir = {}, err = {}", + LOG_ERROR("[nfs_service] remote get file size failed, source = {}({}), dir = {}, err = {}", + ureq->file_size_req.hp_source, ureq->file_size_req.source, ureq->file_size_req.source_dir, err); @@ -301,6 +307,7 @@ void nfs_client_impl::continue_copy() copy_req.is_last = req->is_last; copy_req.__set_source_disk_tag(ureq->file_size_req.source_disk_tag); copy_req.__set_pid(ureq->file_size_req.pid); + copy_req.__set_hp_source(ureq->file_size_req.hp_source); req->remote_copy_task = async_nfs_copy(copy_req, [=](error_code err, copy_response &&resp) { @@ -314,7 +321,8 @@ void nfs_client_impl::continue_copy() } }, std::chrono::milliseconds(FLAGS_rpc_timeout_ms), - req->file_ctx->user_req->file_size_req.source); + dsn::dns_resolver::instance().resolve_address( + req->file_ctx->user_req->file_size_req.hp_source)); } else { --ureq->concurrent_copy_count; --_concurrent_copy_request_count; @@ -347,14 +355,17 @@ void nfs_client_impl::end_copy(::dsn::error_code err, METRIC_VAR_INCREMENT(nfs_client_copy_failed_requests); if (!fc->user_req->is_finished) { + host_port hp = fc->user_req->file_size_req.hp_source; if (reqc->retry_count > 0) { - LOG_WARNING("[nfs_service] remote copy failed, source = {}, dir = {}, file = {}, " - "err = {}, retry_count = {}", - fc->user_req->file_size_req.source, - fc->user_req->file_size_req.source_dir, - fc->file_name, - err, - reqc->retry_count); + LOG_WARNING( + "[nfs_service] remote copy failed, source = {}({}), dir = {}, file = {}, " + "err = {}, retry_count = {}", + hp, + fc->user_req->file_size_req.source, + fc->user_req->file_size_req.source_dir, + fc->file_name, + err, + reqc->retry_count); // retry copy reqc->retry_count--; @@ -366,8 +377,9 @@ void nfs_client_impl::end_copy(::dsn::error_code err, else _copy_requests_low.push_retry(reqc); } else { - LOG_ERROR("[nfs_service] remote copy failed, source = {}, dir = {}, file = {}, " + LOG_ERROR("[nfs_service] remote copy failed, source = {}({}), dir = {}, file = {}, " "err = {}, retry_count = {}", + hp, fc->user_req->file_size_req.source, fc->user_req->file_size_req.source_dir, fc->file_name, diff --git a/src/nfs/nfs_node.cpp b/src/nfs/nfs_node.cpp index 0103b9a35e..e4282dcbe1 100644 --- a/src/nfs/nfs_node.cpp +++ b/src/nfs/nfs_node.cpp @@ -40,7 +40,7 @@ std::unique_ptr nfs_node::create() return std::make_unique(); } -aio_task_ptr nfs_node::copy_remote_directory(const rpc_address &remote, +aio_task_ptr nfs_node::copy_remote_directory(const host_port &remote, const std::string &source_disk_tag, const std::string &source_dir, const std::string &dest_disk_tag, @@ -68,7 +68,7 @@ aio_task_ptr nfs_node::copy_remote_directory(const rpc_address &remote, hash); } -aio_task_ptr nfs_node::copy_remote_files(const rpc_address &remote, +aio_task_ptr nfs_node::copy_remote_files(const host_port &remote, const std::string &source_disk_tag, const std::string &source_dir, const std::vector &files, diff --git a/src/nfs/nfs_node.h b/src/nfs/nfs_node.h index 635562c669..f22810cd84 100644 --- a/src/nfs/nfs_node.h +++ b/src/nfs/nfs_node.h @@ -33,7 +33,7 @@ #include "aio/aio_task.h" #include "common/gpid.h" #include "runtime/api_task.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_code.h" #include "utils/error_code.h" @@ -50,7 +50,7 @@ class rpc_replier; struct remote_copy_request { - dsn::rpc_address source; + dsn::host_port source; std::string source_disk_tag; std::string source_dir; std::vector files; @@ -67,7 +67,7 @@ class nfs_node static std::unique_ptr create(); public: - aio_task_ptr copy_remote_directory(const rpc_address &remote, + aio_task_ptr copy_remote_directory(const host_port &remote, const std::string &source_disk_tag, const std::string &source_dir, const std::string &dest_disk_tag, @@ -79,7 +79,7 @@ class nfs_node task_tracker *tracker, aio_handler &&callback, int hash = 0); - aio_task_ptr copy_remote_files(const rpc_address &remote, + aio_task_ptr copy_remote_files(const host_port &remote, const std::string &source_disk_tag, const std::string &source_dir, const std::vector &files, // empty for all diff --git a/src/nfs/test/main.cpp b/src/nfs/test/main.cpp index 2994345e8c..c8ebd6d4f8 100644 --- a/src/nfs/test/main.cpp +++ b/src/nfs/test/main.cpp @@ -38,7 +38,7 @@ #include "gtest/gtest.h" #include "nfs/nfs_node.h" #include "runtime/app_model.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_code.h" #include "runtime/tool_api.h" #include "test_util/test_util.h" @@ -109,23 +109,22 @@ TEST_P(nfs_test, basic) ASSERT_TRUE(dst_filenames.empty()); aio_result r; - dsn::aio_task_ptr t = - nfs->copy_remote_files(dsn::rpc_address::from_host_port("localhost", 20101), - "default", - ".", - kSrcFilenames, - "default", - kDstDir, - fake_pid, - false, - false, - LPC_AIO_TEST_NFS, - nullptr, - [&r](dsn::error_code err, size_t sz) { - r.err = err; - r.sz = sz; - }, - 0); + auto t = nfs->copy_remote_files(dsn::host_port("localhost", 20101), + "default", + ".", + kSrcFilenames, + "default", + kDstDir, + fake_pid, + false, + false, + LPC_AIO_TEST_NFS, + nullptr, + [&r](dsn::error_code err, size_t sz) { + r.err = err; + r.sz = sz; + }, + 0); ASSERT_NE(nullptr, t); ASSERT_TRUE(t->wait(20000)); ASSERT_EQ(r.err, t->error()); @@ -152,23 +151,22 @@ TEST_P(nfs_test, basic) // copy files to the destination directory, files will be overwritten. { aio_result r; - dsn::aio_task_ptr t = - nfs->copy_remote_files(dsn::rpc_address::from_host_port("localhost", 20101), - "default", - ".", - kSrcFilenames, - "default", - kDstDir, - fake_pid, - true, - false, - LPC_AIO_TEST_NFS, - nullptr, - [&r](dsn::error_code err, size_t sz) { - r.err = err; - r.sz = sz; - }, - 0); + auto t = nfs->copy_remote_files(dsn::host_port("localhost", 20101), + "default", + ".", + kSrcFilenames, + "default", + kDstDir, + fake_pid, + true, + false, + LPC_AIO_TEST_NFS, + nullptr, + [&r](dsn::error_code err, size_t sz) { + r.err = err; + r.sz = sz; + }, + 0); ASSERT_NE(nullptr, t); ASSERT_TRUE(t->wait(20000)); ASSERT_EQ(r.err, t->error()); @@ -205,22 +203,21 @@ TEST_P(nfs_test, basic) ASSERT_FALSE(utils::filesystem::directory_exists(kNewDstDir)); aio_result r; - dsn::aio_task_ptr t = - nfs->copy_remote_directory(dsn::rpc_address::from_host_port("localhost", 20101), - "default", - kDstDir, - "default", - kNewDstDir, - fake_pid, - false, - false, - LPC_AIO_TEST_NFS, - nullptr, - [&r](dsn::error_code err, size_t sz) { - r.err = err; - r.sz = sz; - }, - 0); + auto t = nfs->copy_remote_directory(dsn::host_port("localhost", 20101), + "default", + kDstDir, + "default", + kNewDstDir, + fake_pid, + false, + false, + LPC_AIO_TEST_NFS, + nullptr, + [&r](dsn::error_code err, size_t sz) { + r.err = err; + r.sz = sz; + }, + 0); ASSERT_NE(nullptr, t); ASSERT_TRUE(t->wait(20000)); ASSERT_EQ(r.err, t->error()); diff --git a/src/redis_protocol/proxy_lib/proxy_layer.cpp b/src/redis_protocol/proxy_lib/proxy_layer.cpp index fc415f21d1..f3f15e6d5a 100644 --- a/src/redis_protocol/proxy_lib/proxy_layer.cpp +++ b/src/redis_protocol/proxy_lib/proxy_layer.cpp @@ -22,6 +22,7 @@ #include "proxy_layer.h" #include "runtime/rpc/network.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_spec.h" #include "utils/autoref_ptr.h" @@ -61,7 +62,7 @@ proxy_stub::proxy_stub(const proxy_session::factory &f, void proxy_stub::on_rpc_request(dsn::message_ex *request) { - ::dsn::rpc_address source = request->header->from_address; + auto source = ::dsn::host_port::from_address(request->header->from_address); std::shared_ptr session; { ::dsn::zauto_read_lock l(_lock); @@ -86,21 +87,21 @@ void proxy_stub::on_rpc_request(dsn::message_ex *request) void proxy_stub::on_recv_remove_session_request(dsn::message_ex *request) { - ::dsn::rpc_address source = request->header->from_address; + auto source = ::dsn::host_port::from_address(request->header->from_address); remove_session(source); } -void proxy_stub::remove_session(dsn::rpc_address remote_address) +void proxy_stub::remove_session(dsn::host_port remote) { std::shared_ptr session; { ::dsn::zauto_write_lock l(_lock); - auto iter = _sessions.find(remote_address); + auto iter = _sessions.find(remote); if (iter == _sessions.end()) { - LOG_WARNING("{} has been removed from proxy stub", remote_address); + LOG_WARNING("{} has been removed from proxy stub", remote); return; } - LOG_INFO("remove {} from proxy stub", remote_address); + LOG_INFO("remove {} from proxy stub", remote); session = std::move(iter->second); _sessions.erase(iter); } @@ -113,8 +114,9 @@ proxy_session::proxy_session(proxy_stub *op, dsn::message_ex *first_msg) CHECK_NOTNULL(first_msg, "null msg when create session"); _backup_one_request->add_ref(); - _remote_address = _backup_one_request->header->from_address; - CHECK_EQ_MSG(_remote_address.type(), HOST_TYPE_IPV4, "invalid rpc_address type"); + _session_remote = ::dsn::host_port::from_address(_backup_one_request->header->from_address); + _session_remote_str = _session_remote.to_string(); + CHECK_EQ_MSG(_session_remote.type(), HOST_TYPE_IPV4, "invalid host_port type"); } proxy_session::~proxy_session() @@ -135,7 +137,7 @@ void proxy_session::on_recv_request(dsn::message_ex *msg) // "parse" with a lock. a subclass may implement a lock inside parse if necessary if (!parse(msg)) { LOG_ERROR_PREFIX("got invalid message, try to remove the proxy session from proxy stub"); - _stub->remove_session(_remote_address); + _stub->remove_session(_session_remote); LOG_ERROR_PREFIX("close the proxy session"); ((dsn::message_ex *)_backup_one_request)->io_session->close(); diff --git a/src/redis_protocol/proxy_lib/proxy_layer.h b/src/redis_protocol/proxy_lib/proxy_layer.h index 8f8a7022a9..99884074ca 100644 --- a/src/redis_protocol/proxy_lib/proxy_layer.h +++ b/src/redis_protocol/proxy_lib/proxy_layer.h @@ -25,7 +25,7 @@ #include #include -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/serverlet.h" #include "runtime/task/task_code.h" #include "utils/threadpool_code.h" @@ -70,7 +70,7 @@ class proxy_session : public std::enable_shared_from_this virtual bool parse(dsn::message_ex *msg) = 0; dsn::message_ex *create_response(); - const char *log_prefix() const { return _remote_address.to_string(); } + const char *log_prefix() const { return _session_remote_str.c_str(); } protected: proxy_stub *_stub; @@ -79,8 +79,9 @@ class proxy_session : public std::enable_shared_from_this // when get message from raw parser, request & response of "dsn::message_ex*" are not in couple. // we need to backup one request to create a response struct. dsn::message_ex *_backup_one_request; - // the client address for which this session served - dsn::rpc_address _remote_address; + // the client for which this session served + dsn::host_port _session_remote; + std::string _session_remote_str; }; class proxy_stub : public ::dsn::serverlet @@ -106,16 +107,16 @@ class proxy_stub : public ::dsn::serverlet this->unregister_rpc_handler(RPC_CALL_RAW_MESSAGE); this->unregister_rpc_handler(RPC_CALL_RAW_SESSION_DISCONNECT); } - void remove_session(dsn::rpc_address remote_address); + void remove_session(dsn::host_port remote_address); private: void on_rpc_request(dsn::message_ex *request); void on_recv_remove_session_request(dsn::message_ex *); ::dsn::zrwlock_nr _lock; - std::unordered_map<::dsn::rpc_address, std::shared_ptr> _sessions; + std::unordered_map<::dsn::host_port, std::shared_ptr> _sessions; proxy_session::factory _factory; - ::dsn::rpc_address _uri_address; + ::dsn::host_port _uri_address; std::string _cluster; std::string _app; std::string _geo_app; diff --git a/src/redis_protocol/proxy_lib/redis_parser.cpp b/src/redis_protocol/proxy_lib/redis_parser.cpp index c0cf0bff09..143f493947 100644 --- a/src/redis_protocol/proxy_lib/redis_parser.cpp +++ b/src/redis_protocol/proxy_lib/redis_parser.cpp @@ -32,12 +32,13 @@ #include #include +#include "absl/strings/string_view.h" #include "common/common.h" #include "common/replication_other_types.h" #include "pegasus/client.h" #include "rrdb/rrdb_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/serialization.h" #include "utils/api_utilities.h" #include "utils/binary_writer.h" @@ -45,7 +46,6 @@ #include "utils/fmt_logging.h" #include "utils/ports.h" #include "utils/string_conv.h" -#include "absl/strings/string_view.h" #include "utils/strings.h" #include "utils/utils.h" @@ -96,7 +96,7 @@ redis_parser::redis_parser(proxy_stub *op, dsn::message_ex *first_msg) { ::dsn::apps::rrdb_client *r; if (op) { - std::vector meta_list; + std::vector meta_list; dsn::replication::replica_helper::load_meta_servers( meta_list, dsn::PEGASUS_CLUSTER_SECTION_NAME.c_str(), op->get_cluster()); r = new ::dsn::apps::rrdb_client(op->get_cluster(), meta_list, op->get_app()); diff --git a/src/replica/bulk_load/replica_bulk_loader.cpp b/src/replica/bulk_load/replica_bulk_loader.cpp index 1306eaee30..9ae6f0598b 100644 --- a/src/replica/bulk_load/replica_bulk_loader.cpp +++ b/src/replica/bulk_load/replica_bulk_loader.cpp @@ -36,8 +36,10 @@ #include "replica/replica_stub.h" #include "replica/replication_app_base.h" #include "replica_bulk_loader.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/async_calls.h" #include "utils/autoref_ptr.h" #include "utils/chrono_literals.h" @@ -186,13 +188,15 @@ void replica_bulk_loader::broadcast_group_bulk_load(const bulk_load_request &met LOG_INFO_PREFIX("start to broadcast group bulk load"); - for (const auto &addr : _replica->_primary_states.membership.secondaries) { - if (addr == _stub->_primary_address) + for (const auto &hp : _replica->_primary_states.membership.hp_secondaries) { + if (hp == _stub->primary_host_port()) continue; auto request = std::make_unique(); request->app_name = _replica->_app_info.app_name; - request->target_address = addr; + const auto &addr = dsn::dns_resolver::instance().resolve_address(hp); + request->target = addr; + request->__set_hp_target(hp); _replica->_primary_states.get_replica_config(partition_status::PS_SECONDARY, request->config); request->cluster_name = meta_req.cluster_name; @@ -200,14 +204,14 @@ void replica_bulk_loader::broadcast_group_bulk_load(const bulk_load_request &met request->meta_bulk_load_status = meta_req.meta_bulk_load_status; request->remote_root_path = meta_req.remote_root_path; - LOG_INFO_PREFIX("send group_bulk_load_request to {}", addr); + LOG_INFO_PREFIX("send group_bulk_load_request to {}({})", hp, addr); group_bulk_load_rpc rpc( std::move(request), RPC_GROUP_BULK_LOAD, 0_ms, 0, get_gpid().thread_hash()); auto callback_task = rpc.call(addr, tracker(), [this, rpc](error_code err) mutable { on_group_bulk_load_reply(err, rpc.request(), rpc.response()); }); - _replica->_primary_states.group_bulk_load_pending_replies[addr] = callback_task; + _replica->_primary_states.group_bulk_load_pending_replies[hp] = callback_task; } } @@ -243,8 +247,9 @@ void replica_bulk_loader::on_group_bulk_load(const group_bulk_load_request &requ return; } - LOG_INFO_PREFIX("receive group_bulk_load request, primary address = {}, ballot = {}, " + LOG_INFO_PREFIX("receive group_bulk_load request, primary address = {}({}), ballot = {}, " "meta bulk_load_status = {}, local bulk_load_status = {}", + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.meta_bulk_load_status), @@ -278,34 +283,39 @@ void replica_bulk_loader::on_group_bulk_load_reply(error_code err, return; } - _replica->_primary_states.group_bulk_load_pending_replies.erase(req.target_address); + _replica->_primary_states.group_bulk_load_pending_replies.erase(req.hp_target); if (err != ERR_OK) { - LOG_ERROR_PREFIX( - "failed to receive group_bulk_load_reply from {}, error = {}", req.target_address, err); - _replica->_primary_states.reset_node_bulk_load_states(req.target_address); + LOG_ERROR_PREFIX("failed to receive group_bulk_load_reply from {}({}), error = {}", + req.hp_target, + req.target, + err); + _replica->_primary_states.reset_node_bulk_load_states(req.hp_target); return; } if (resp.err != ERR_OK) { - LOG_ERROR_PREFIX("receive group_bulk_load response from {} failed, error = {}", - req.target_address, + LOG_ERROR_PREFIX("receive group_bulk_load response from {}({}) failed, error = {}", + req.hp_target, + req.target, resp.err); - _replica->_primary_states.reset_node_bulk_load_states(req.target_address); + _replica->_primary_states.reset_node_bulk_load_states(req.hp_target); return; } if (req.config.ballot != get_ballot()) { - LOG_ERROR_PREFIX("recevied wrong group_bulk_load response from {}, request ballot = {}, " - "current ballot = {}", - req.target_address, - req.config.ballot, - get_ballot()); - _replica->_primary_states.reset_node_bulk_load_states(req.target_address); + LOG_ERROR_PREFIX( + "recevied wrong group_bulk_load response from {}({}), request ballot = {}, " + "current ballot = {}", + req.hp_target, + req.target, + req.config.ballot, + get_ballot()); + _replica->_primary_states.reset_node_bulk_load_states(req.hp_target); return; } - _replica->_primary_states.secondary_bulk_load_states[req.target_address] = resp.bulk_load_state; + _replica->_primary_states.secondary_bulk_load_states[req.hp_target] = resp.bulk_load_state; } // ThreadPool: THREAD_POOL_REPLICATION @@ -430,7 +440,7 @@ error_code replica_bulk_loader::start_download(const std::string &remote_dir, if (_stub->_bulk_load_downloading_count.load() >= FLAGS_max_concurrent_bulk_load_downloading_count) { LOG_WARNING_PREFIX("node[{}] already has {} replica downloading, wait for next round", - _stub->_primary_address_str, + _stub->_primary_host_port_cache, _stub->_bulk_load_downloading_count.load()); return ERR_BUSY; } @@ -449,7 +459,7 @@ error_code replica_bulk_loader::start_download(const std::string &remote_dir, _status = bulk_load_status::BLS_DOWNLOADING; ++_stub->_bulk_load_downloading_count; LOG_INFO_PREFIX("node[{}] has {} replica executing downloading", - _stub->_primary_address_str, + _stub->_primary_host_port_cache, _stub->_bulk_load_downloading_count.load()); _bulk_load_start_time_ms = dsn_now_ms(); METRIC_VAR_INCREMENT(bulk_load_downloading_count); @@ -661,7 +671,7 @@ void replica_bulk_loader::try_decrease_bulk_load_download_count() --_stub->_bulk_load_downloading_count; _is_downloading.store(false); LOG_INFO_PREFIX("node[{}] has {} replica executing downloading", - _stub->_primary_address_str, + _stub->_primary_host_port_cache, _stub->_bulk_load_downloading_count.load()); } @@ -736,8 +746,8 @@ void replica_bulk_loader::handle_bulk_load_finish(bulk_load_status::type new_sta } if (status() == partition_status::PS_PRIMARY) { - for (const auto &target_address : _replica->_primary_states.membership.secondaries) { - _replica->_primary_states.reset_node_bulk_load_states(target_address); + for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { + _replica->_primary_states.reset_node_bulk_load_states(target_hp); } } @@ -924,24 +934,27 @@ void replica_bulk_loader::report_group_download_progress(/*out*/ bulk_load_respo primary_state.__set_download_status(_download_status.load()); } response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; - LOG_INFO_PREFIX("primary = {}, download progress = {}%, status = {}", + response.hp_group_bulk_load_state[_replica->_primary_states.membership.hp_primary] = + primary_state; + LOG_INFO_PREFIX("primary = {}({}), download progress = {}%, status = {}", + _replica->_primary_states.membership.hp_primary, _replica->_primary_states.membership.primary, primary_state.download_progress, primary_state.download_status); int32_t total_progress = primary_state.download_progress; - for (const auto &target_address : _replica->_primary_states.membership.secondaries) { + for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { const auto &secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_address]; + _replica->_primary_states.secondary_bulk_load_states[target_hp]; int32_t s_progress = secondary_state.__isset.download_progress ? secondary_state.download_progress : 0; error_code s_status = secondary_state.__isset.download_status ? secondary_state.download_status : ERR_OK; - LOG_INFO_PREFIX("secondary = {}, download progress = {}%, status={}", - target_address, - s_progress, - s_status); - response.group_bulk_load_state[target_address] = secondary_state; + LOG_INFO_PREFIX( + "secondary = {}, download progress = {}%, status={}", target_hp, s_progress, s_status); + response.group_bulk_load_state[dsn::dns_resolver::instance().resolve_address(target_hp)] = + secondary_state; + response.hp_group_bulk_load_state[target_hp] = secondary_state; total_progress += s_progress; } @@ -964,23 +977,28 @@ void replica_bulk_loader::report_group_ingestion_status(/*out*/ bulk_load_respon partition_bulk_load_state primary_state; primary_state.__set_ingest_status(_replica->_app->get_ingestion_status()); response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; - LOG_INFO_PREFIX("primary = {}, ingestion status = {}", + response.hp_group_bulk_load_state[_replica->_primary_states.membership.hp_primary] = + primary_state; + LOG_INFO_PREFIX("primary = {}({}), ingestion status = {}", + _replica->_primary_states.membership.hp_primary, _replica->_primary_states.membership.primary, enum_to_string(primary_state.ingest_status)); bool is_group_ingestion_finish = (primary_state.ingest_status == ingestion_status::IS_SUCCEED) && - (_replica->_primary_states.membership.secondaries.size() + 1 == + (_replica->_primary_states.membership.hp_secondaries.size() + 1 == _replica->_primary_states.membership.max_replica_count); - for (const auto &target_address : _replica->_primary_states.membership.secondaries) { + for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { const auto &secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_address]; + _replica->_primary_states.secondary_bulk_load_states[target_hp]; ingestion_status::type ingest_status = secondary_state.__isset.ingest_status ? secondary_state.ingest_status : ingestion_status::IS_INVALID; LOG_INFO_PREFIX( - "secondary = {}, ingestion status={}", target_address, enum_to_string(ingest_status)); - response.group_bulk_load_state[target_address] = secondary_state; + "secondary = {}, ingestion status={}", target_hp, enum_to_string(ingest_status)); + response.group_bulk_load_state[dsn::dns_resolver::instance().resolve_address(target_hp)] = + secondary_state; + response.hp_group_bulk_load_state[target_hp] = secondary_state; is_group_ingestion_finish &= (ingest_status == ingestion_status::IS_SUCCEED); } response.__set_is_group_ingestion_finished(is_group_ingestion_finish); @@ -1007,21 +1025,25 @@ void replica_bulk_loader::report_group_cleaned_up(bulk_load_response &response) partition_bulk_load_state primary_state; primary_state.__set_is_cleaned_up(is_cleaned_up()); response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; + response.hp_group_bulk_load_state[_replica->_primary_states.membership.hp_primary] = + primary_state; LOG_INFO_PREFIX("primary = {}, bulk load states cleaned_up = {}", _replica->_primary_states.membership.primary, primary_state.is_cleaned_up); bool group_flag = (primary_state.is_cleaned_up) && - (_replica->_primary_states.membership.secondaries.size() + 1 == + (_replica->_primary_states.membership.hp_secondaries.size() + 1 == _replica->_primary_states.membership.max_replica_count); - for (const auto &target_address : _replica->_primary_states.membership.secondaries) { + for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { const auto &secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_address]; + _replica->_primary_states.secondary_bulk_load_states[target_hp]; bool is_cleaned_up = secondary_state.__isset.is_cleaned_up ? secondary_state.is_cleaned_up : false; LOG_INFO_PREFIX( - "secondary = {}, bulk load states cleaned_up = {}", target_address, is_cleaned_up); - response.group_bulk_load_state[target_address] = secondary_state; + "secondary = {}, bulk load states cleaned_up = {}", target_hp, is_cleaned_up); + response.group_bulk_load_state[dsn::dns_resolver::instance().resolve_address(target_hp)] = + secondary_state; + response.hp_group_bulk_load_state[target_hp] = secondary_state; group_flag &= is_cleaned_up; } LOG_INFO_PREFIX("group bulk load states cleaned_up = {}", group_flag); @@ -1042,19 +1064,24 @@ void replica_bulk_loader::report_group_is_paused(bulk_load_response &response) partition_bulk_load_state primary_state; primary_state.__set_is_paused(_status == bulk_load_status::BLS_PAUSED); response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; - LOG_INFO_PREFIX("primary = {}, bulk_load is_paused = {}", + response.hp_group_bulk_load_state[_replica->_primary_states.membership.hp_primary] = + primary_state; + LOG_INFO_PREFIX("primary = {}({}), bulk_load is_paused = {}", + _replica->_primary_states.membership.hp_primary, _replica->_primary_states.membership.primary, primary_state.is_paused); - bool group_is_paused = - primary_state.is_paused && (_replica->_primary_states.membership.secondaries.size() + 1 == - _replica->_primary_states.membership.max_replica_count); - for (const auto &target_address : _replica->_primary_states.membership.secondaries) { + bool group_is_paused = primary_state.is_paused && + (_replica->_primary_states.membership.hp_secondaries.size() + 1 == + _replica->_primary_states.membership.max_replica_count); + for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { partition_bulk_load_state secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_address]; + _replica->_primary_states.secondary_bulk_load_states[target_hp]; bool is_paused = secondary_state.__isset.is_paused ? secondary_state.is_paused : false; - LOG_INFO_PREFIX("secondary = {}, bulk_load is_paused = {}", target_address, is_paused); - response.group_bulk_load_state[target_address] = secondary_state; + LOG_INFO_PREFIX("secondary = {}, bulk_load is_paused = {}", target_hp, is_paused); + response.group_bulk_load_state[dsn::dns_resolver::instance().resolve_address(target_hp)] = + secondary_state; + response.hp_group_bulk_load_state[target_hp] = secondary_state; group_is_paused &= is_paused; } LOG_INFO_PREFIX("group bulk load is_paused = {}", group_is_paused); diff --git a/src/replica/bulk_load/test/replica_bulk_loader_test.cpp b/src/replica/bulk_load/test/replica_bulk_loader_test.cpp index b604b362cb..1375549bf4 100644 --- a/src/replica/bulk_load/test/replica_bulk_loader_test.cpp +++ b/src/replica/bulk_load/test/replica_bulk_loader_test.cpp @@ -28,6 +28,7 @@ #include "replica/test/mock_utils.h" #include "replica/test/replica_test_base.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_tracker.h" #include "test_util/test_util.h" #include "utils/fail_point.h" @@ -165,8 +166,8 @@ class replica_bulk_loader_test : public replica_test_base mock_group_progress(status, 10, 50, 50); partition_bulk_load_state state; state.__set_is_paused(true); - _replica->set_secondary_bulk_load_state(SECONDARY, state); - _replica->set_secondary_bulk_load_state(SECONDARY2, state); + _replica->set_secondary_bulk_load_state(SECONDARY_HP, state); + _replica->set_secondary_bulk_load_state(SECONDARY_HP2, state); bulk_load_response response; _bulk_loader->report_group_is_paused(response); @@ -219,7 +220,8 @@ class replica_bulk_loader_test : public replica_test_base _group_req.meta_bulk_load_status = status; _group_req.config.status = partition_status::PS_SECONDARY; _group_req.config.ballot = b; - _group_req.target_address = SECONDARY; + _group_req.target = SECONDARY; + _group_req.__set_hp_target(SECONDARY_HP); } void mock_replica_config(partition_status::type status) @@ -228,6 +230,7 @@ class replica_bulk_loader_test : public replica_test_base rconfig.ballot = BALLOT; rconfig.pid = PID; rconfig.primary = PRIMARY; + rconfig.__set_hp_primary(PRIMARY_HP); rconfig.status = status; _replica->set_replica_config(rconfig); } @@ -242,6 +245,10 @@ class replica_bulk_loader_test : public replica_test_base config.primary = PRIMARY; config.secondaries.emplace_back(SECONDARY); config.secondaries.emplace_back(SECONDARY2); + config.__set_hp_primary(PRIMARY_HP); + config.__set_hp_secondaries({}); + config.hp_secondaries.emplace_back(SECONDARY_HP); + config.hp_secondaries.emplace_back(SECONDARY_HP2); _replica->set_primary_partition_configuration(config); } @@ -309,8 +316,8 @@ class replica_bulk_loader_test : public replica_test_base state1.__set_download_progress(secondary_progress1); state2.__set_download_status(ERR_OK); state2.__set_download_progress(secondary_progress2); - _replica->set_secondary_bulk_load_state(SECONDARY, state1); - _replica->set_secondary_bulk_load_state(SECONDARY2, state2); + _replica->set_secondary_bulk_load_state(SECONDARY_HP, state1); + _replica->set_secondary_bulk_load_state(SECONDARY_HP2, state2); } void mock_group_progress(bulk_load_status::type p_status, @@ -346,8 +353,8 @@ class replica_bulk_loader_test : public replica_test_base partition_bulk_load_state state1, state2; state1.__set_ingest_status(status1); state2.__set_ingest_status(status2); - _replica->set_secondary_bulk_load_state(SECONDARY, state1); - _replica->set_secondary_bulk_load_state(SECONDARY2, state2); + _replica->set_secondary_bulk_load_state(SECONDARY_HP, state1); + _replica->set_secondary_bulk_load_state(SECONDARY_HP2, state2); } void mock_group_ingestion_states(ingestion_status::type s1_status, @@ -372,8 +379,8 @@ class replica_bulk_loader_test : public replica_test_base partition_bulk_load_state state1, state2; state1.__set_is_cleaned_up(s1_cleaned_up); state2.__set_is_cleaned_up(s2_cleaned_up); - _replica->set_secondary_bulk_load_state(SECONDARY, state1); - _replica->set_secondary_bulk_load_state(SECONDARY2, state2); + _replica->set_secondary_bulk_load_state(SECONDARY_HP, state1); + _replica->set_secondary_bulk_load_state(SECONDARY_HP2, state2); } // helper functions @@ -382,7 +389,8 @@ class replica_bulk_loader_test : public replica_test_base int32_t get_download_progress() { return _bulk_loader->_download_progress.load(); } bool is_secondary_bulk_load_state_reset() { - const partition_bulk_load_state &state = _replica->get_secondary_bulk_load_state(SECONDARY); + const partition_bulk_load_state &state = + _replica->get_secondary_bulk_load_state(SECONDARY_HP); bool is_download_state_reset = (state.__isset.download_progress && state.__isset.download_status && state.download_progress == 0 && state.download_status == ERR_OK); @@ -413,6 +421,9 @@ class replica_bulk_loader_test : public replica_test_base rpc_address PRIMARY = rpc_address::from_ip_port("127.0.0.2", 34801); rpc_address SECONDARY = rpc_address::from_ip_port("127.0.0.3", 34801); rpc_address SECONDARY2 = rpc_address::from_ip_port("127.0.0.4", 34801); + const host_port PRIMARY_HP = host_port("localhost", 34801); + const host_port SECONDARY_HP = host_port("localhost", 34801); + const host_port SECONDARY_HP2 = host_port("localhost", 34801); int32_t MAX_DOWNLOADING_COUNT = 5; std::string LOCAL_DIR = bulk_load_constant::BULK_LOAD_LOCAL_ROOT_DIR; std::string METADATA = bulk_load_constant::BULK_LOAD_METADATA; diff --git a/src/replica/duplication/duplication_sync_timer.cpp b/src/replica/duplication/duplication_sync_timer.cpp index 0682bd4317..8b9adce15e 100644 --- a/src/replica/duplication/duplication_sync_timer.cpp +++ b/src/replica/duplication/duplication_sync_timer.cpp @@ -71,6 +71,7 @@ void duplication_sync_timer::run() auto req = std::make_unique(); req->node = _stub->primary_address(); + req->__set_hp_node(_stub->primary_host_port()); // collects confirm points from all primaries on this server for (const replica_ptr &r : get_all_primaries()) { diff --git a/src/replica/duplication/replica_follower.cpp b/src/replica/duplication/replica_follower.cpp index 636dd1aec8..c9c5e317da 100644 --- a/src/replica/duplication/replica_follower.cpp +++ b/src/replica/duplication/replica_follower.cpp @@ -32,7 +32,9 @@ #include "nfs/nfs_node.h" #include "replica/replica.h" #include "replica/replica_stub.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/group_host_port.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -73,8 +75,8 @@ void replica_follower::init_master_info() dsn::utils::split_args(meta_list_str.c_str(), metas, ','); CHECK(!metas.empty(), "master cluster meta list is invalid!"); for (const auto &meta : metas) { - const auto node = rpc_address::from_host_port(meta); - CHECK(node, "{} is invalid meta address", meta); + const auto node = host_port::from_string(meta); + CHECK(!node.is_invalid(), "{} is invalid meta host_port", meta); _master_meta_list.emplace_back(std::move(node)); } } @@ -105,9 +107,9 @@ error_code replica_follower::duplicate_checkpoint() // ThreadPool: THREAD_POOL_DEFAULT void replica_follower::async_duplicate_checkpoint_from_master_replica() { - rpc_address meta_servers; + host_port meta_servers; meta_servers.assign_group(_master_cluster_name.c_str()); - meta_servers.group_address()->add_list(_master_meta_list); + meta_servers.group_host_port()->add_list(_master_meta_list); query_cfg_request meta_config_request; meta_config_request.app_name = _master_app_name; @@ -118,18 +120,21 @@ void replica_follower::async_duplicate_checkpoint_from_master_replica() dsn::message_ex *msg = dsn::message_ex::create_request( RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, 0, get_gpid().thread_hash()); dsn::marshall(msg, meta_config_request); - rpc::call(meta_servers, msg, &_tracker, [&](error_code err, query_cfg_response &&resp) mutable { - FAIL_POINT_INJECT_F("duplicate_checkpoint_ok", [&](absl::string_view s) -> void { - _tracker.set_tasks_success(); - return; - }); + rpc::call(dsn::dns_resolver::instance().resolve_address(meta_servers), + msg, + &_tracker, + [&](error_code err, query_cfg_response &&resp) mutable { + FAIL_POINT_INJECT_F("duplicate_checkpoint_ok", [&](absl::string_view s) -> void { + _tracker.set_tasks_success(); + return; + }); - FAIL_POINT_INJECT_F("duplicate_checkpoint_failed", - [&](absl::string_view s) -> void { return; }); - if (update_master_replica_config(err, std::move(resp)) == ERR_OK) { - copy_master_replica_checkpoint(); - } - }); + FAIL_POINT_INJECT_F("duplicate_checkpoint_failed", + [&](absl::string_view s) -> void { return; }); + if (update_master_replica_config(err, std::move(resp)) == ERR_OK) { + copy_master_replica_checkpoint(); + } + }); } // ThreadPool: THREAD_POOL_DEFAULT @@ -165,7 +170,7 @@ error_code replica_follower::update_master_replica_config(error_code err, query_ return ERR_INCONSISTENT_STATE; } - if (dsn_unlikely(resp.partitions[0].primary == rpc_address::s_invalid_address)) { + if (dsn_unlikely(resp.partitions[0].hp_primary == host_port::s_invalid_host_port)) { LOG_ERROR_PREFIX("master[{}] partition address is invalid", master_replica_name()); return ERR_INVALID_STATE; } @@ -173,8 +178,9 @@ error_code replica_follower::update_master_replica_config(error_code err, query_ // since the request just specify one partition, the result size is single _master_replica_config = resp.partitions[0]; LOG_INFO_PREFIX( - "query master[{}] config successfully and update local config: remote={}, gpid={}", + "query master[{}]({}) config successfully and update local config: remote={}, gpid={}", master_replica_name(), + _master_replica_config.hp_primary, _master_replica_config.primary, _master_replica_config.pid); return ERR_OK; @@ -217,13 +223,16 @@ error_code replica_follower::nfs_copy_checkpoint(error_code err, learn_response return ERR_FILE_OPERATION_FAILED; } + host_port hp_learnee; + GET_HOST_PORT(resp, learnee, hp_learnee); + nfs_copy_remote_files( - resp.address, resp.replica_disk_tag, resp.base_local_dir, resp.state.files, dest); + hp_learnee, resp.replica_disk_tag, resp.base_local_dir, resp.state.files, dest); return ERR_OK; } // ThreadPool: THREAD_POOL_DEFAULT -void replica_follower::nfs_copy_remote_files(const rpc_address &remote_node, +void replica_follower::nfs_copy_remote_files(const host_port &remote_node, const std::string &remote_disk, const std::string &remote_dir, std::vector &file_list, diff --git a/src/replica/duplication/replica_follower.h b/src/replica/duplication/replica_follower.h index dfc81fc734..0ca3f5c93e 100644 --- a/src/replica/duplication/replica_follower.h +++ b/src/replica/duplication/replica_follower.h @@ -27,6 +27,7 @@ #include "dsn.layer2_types.h" #include "replica/replica_base.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_tracker.h" #include "utils/error_code.h" #include "utils/zlocks.h" @@ -47,7 +48,7 @@ class replica_follower : replica_base const std::string &get_master_app_name() const { return _master_app_name; }; - const std::vector &get_master_meta_list() const { return _master_meta_list; }; + const std::vector &get_master_meta_list() const { return _master_meta_list; }; const bool is_need_duplicate() const { return need_duplicate; } @@ -59,7 +60,7 @@ class replica_follower : replica_base std::string _master_cluster_name; std::string _master_app_name; - std::vector _master_meta_list; + std::vector _master_meta_list; partition_configuration _master_replica_config; bool need_duplicate{false}; @@ -69,7 +70,7 @@ class replica_follower : replica_base error_code update_master_replica_config(error_code err, query_cfg_response &&resp); void copy_master_replica_checkpoint(); error_code nfs_copy_checkpoint(error_code err, learn_response &&resp); - void nfs_copy_remote_files(const rpc_address &remote_node, + void nfs_copy_remote_files(const host_port &remote_node, const std::string &remote_disk, const std::string &remote_dir, std::vector &file_list, @@ -78,9 +79,12 @@ class replica_follower : replica_base std::string master_replica_name() { std::string app_info = fmt::format("{}.{}", _master_cluster_name, _master_app_name); - if (_master_replica_config.primary != rpc_address::s_invalid_address) { - return fmt::format( - "{}({}|{})", app_info, _master_replica_config.primary, _master_replica_config.pid); + if (_master_replica_config.hp_primary != host_port::s_invalid_host_port) { + return fmt::format("{}({}({})|{})", + app_info, + _master_replica_config.hp_primary, + _master_replica_config.primary, + _master_replica_config.pid); } return app_info; } diff --git a/src/replica/duplication/test/replica_follower_test.cpp b/src/replica/duplication/test/replica_follower_test.cpp index bf5cca0b1d..4ebc8f0264 100644 --- a/src/replica/duplication/test/replica_follower_test.cpp +++ b/src/replica/duplication/test/replica_follower_test.cpp @@ -33,6 +33,7 @@ #include "replica/duplication/replica_follower.h" #include "replica/test/mock_utils.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_tracker.h" #include "utils/autoref_ptr.h" #include "utils/error_code.h" @@ -120,7 +121,7 @@ TEST_P(replica_follower_test, test_init_master_info) { _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, "master"); _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, - "127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802"); + "127.0.0.1:34801,127.0.0.1:34802,127.0.0.1:34803"); update_mock_replica(_app_info); auto follower = _mock_replica->get_replica_follower(); @@ -128,7 +129,7 @@ TEST_P(replica_follower_test, test_init_master_info) ASSERT_EQ(follower->get_master_cluster_name(), "master"); ASSERT_TRUE(follower->is_need_duplicate()); ASSERT_TRUE(_mock_replica->is_duplication_follower()); - std::vector test_ip{"127.0.0.1:34801", "127.0.0.2:34801", "127.0.0.3:34802"}; + std::vector test_ip{"127.0.0.1:34801", "127.0.0.1:34802", "127.0.0.1:34803"}; for (int i = 0; i < follower->get_master_meta_list().size(); i++) { ASSERT_EQ(std::string(follower->get_master_meta_list()[i].to_string()), test_ip[i]); } @@ -144,7 +145,7 @@ TEST_P(replica_follower_test, test_duplicate_checkpoint) { _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, "master"); _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, - "127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802"); + "127.0.0.1:34801,127.0.0.1:34802,127.0.0.1:34803"); update_mock_replica(_app_info); auto follower = _mock_replica->get_replica_follower(); @@ -164,7 +165,7 @@ TEST_P(replica_follower_test, test_async_duplicate_checkpoint_from_master_replic { _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, "master"); _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, - "127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802"); + "127.0.0.1:34801,127.0.0.1:34802,127.0.0.1:34803"); update_mock_replica(_app_info); auto follower = _mock_replica->get_replica_follower(); @@ -186,17 +187,19 @@ TEST_P(replica_follower_test, test_update_master_replica_config) { _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, "master"); _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, - "127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802"); + "127.0.0.1:34801,127.0.0.1:34802,127.0.0.1:34803"); update_mock_replica(_app_info); auto follower = _mock_replica->get_replica_follower(); query_cfg_response resp; ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INCONSISTENT_STATE); ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address); + ASSERT_EQ(master_replica_config(follower).hp_primary, host_port::s_invalid_host_port); resp.partition_count = 100; ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INCONSISTENT_STATE); ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address); + ASSERT_EQ(master_replica_config(follower).hp_primary, host_port::s_invalid_host_port); resp.partition_count = _app_info.partition_count; partition_configuration p; @@ -204,28 +207,37 @@ TEST_P(replica_follower_test, test_update_master_replica_config) resp.partitions.emplace_back(p); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INVALID_DATA); ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address); + ASSERT_EQ(master_replica_config(follower).hp_primary, host_port::s_invalid_host_port); resp.partitions.clear(); p.pid = gpid(2, 100); resp.partitions.emplace_back(p); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INCONSISTENT_STATE); ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address); + ASSERT_EQ(master_replica_config(follower).hp_primary, host_port::s_invalid_host_port); resp.partitions.clear(); p.primary = rpc_address::s_invalid_address; + p.__set_hp_primary(host_port::s_invalid_host_port); p.pid = gpid(2, 1); resp.partitions.emplace_back(p); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INVALID_STATE); ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address); + ASSERT_EQ(master_replica_config(follower).hp_primary, host_port::s_invalid_host_port); resp.partitions.clear(); p.pid = gpid(2, 1); p.primary = rpc_address::from_ip_port("127.0.0.1", 34801); - p.secondaries.emplace_back(rpc_address::from_ip_port("127.0.0.2", 34801)); - p.secondaries.emplace_back(rpc_address::from_ip_port("127.0.0.3", 34801)); + p.__set_hp_primary(host_port("localhost", 34801)); + p.secondaries.emplace_back(rpc_address::from_ip_port("127.0.0.1", 34802)); + p.secondaries.emplace_back(rpc_address::from_ip_port("127.0.0.1", 34803)); + p.__set_hp_secondaries({}); + p.hp_secondaries.emplace_back(host_port("localhost", 34802)); + p.hp_secondaries.emplace_back(host_port("localhost", 34803)); resp.partitions.emplace_back(p); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_OK); ASSERT_EQ(master_replica_config(follower).primary, p.primary); + ASSERT_EQ(master_replica_config(follower).hp_primary, p.hp_primary); ASSERT_EQ(master_replica_config(follower).pid, p.pid); } @@ -233,7 +245,7 @@ TEST_P(replica_follower_test, test_nfs_copy_checkpoint) { _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, "master"); _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, - "127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802"); + "127.0.0.1:34801,127.0.0.1:34802,127.0.0.1:34803"); update_mock_replica(_app_info); init_nfs(); auto follower = _mock_replica->get_replica_follower(); @@ -241,7 +253,8 @@ TEST_P(replica_follower_test, test_nfs_copy_checkpoint) ASSERT_EQ(nfs_copy_checkpoint(follower, ERR_CORRUPTION, learn_response()), ERR_CORRUPTION); auto resp = learn_response(); - resp.address = rpc_address::from_ip_port("127.0.0.1", 34801); + resp.learnee = rpc_address::from_ip_port("127.0.0.1", 34801); + resp.__set_hp_learnee(host_port("localhost", 34801)); std::string dest = utils::filesystem::path_combine( _mock_replica->dir(), duplication_constants::kDuplicationCheckpointRootDir); diff --git a/src/replica/replica.cpp b/src/replica/replica.cpp index 5635db04c2..9c9c410668 100644 --- a/src/replica/replica.cpp +++ b/src/replica/replica.cpp @@ -273,7 +273,7 @@ replica::replica(replica_stub *stub, bool need_restore, bool is_duplication_follower) : serverlet(replication_options::kReplicaAppType.c_str()), - replica_base(gpid, fmt::format("{}@{}", gpid, stub->_primary_address_str), app.app_name), + replica_base(gpid, fmt::format("{}@{}", gpid, stub->_primary_host_port_cache), app.app_name), _app_info(app), _primary_states(gpid, FLAGS_staleness_for_commit, FLAGS_batch_write_disabled), _potential_secondary_states(this), diff --git a/src/replica/replica.h b/src/replica/replica.h index 710fdb4108..d94871ff36 100644 --- a/src/replica/replica.h +++ b/src/replica/replica.h @@ -67,7 +67,7 @@ class rocksdb_wrapper_test; namespace dsn { class gpid; -class rpc_address; +class host_port; namespace dist { namespace block_service { @@ -316,7 +316,7 @@ class replica : public serverlet, public ref_counter, public replica_ba // See more about it in `replica_bulk_loader.cpp` void init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_committed_mutations = false); - void send_prepare_message(::dsn::rpc_address addr, + void send_prepare_message(::dsn::host_port addr, partition_status::type status, const mutation_ptr &mu, int timeout_milliseconds, @@ -342,7 +342,7 @@ class replica : public serverlet, public ref_counter, public replica_ba learn_response &&resp); void on_learn_remote_state_completed(error_code err); void handle_learning_error(error_code err, bool is_local_error); - error_code handle_learning_succeeded_on_primary(::dsn::rpc_address node, + error_code handle_learning_succeeded_on_primary(::dsn::host_port node, uint64_t learn_signature); void notify_learn_completion(); error_code apply_learned_state_from_private_log(learn_state &state); @@ -371,7 +371,7 @@ class replica : public serverlet, public ref_counter, public replica_ba // failure handling void handle_local_failure(error_code error); void handle_remote_failure(partition_status::type status, - ::dsn::rpc_address node, + ::dsn::host_port node, error_code error, const std::string &caused_by); @@ -379,12 +379,12 @@ class replica : public serverlet, public ref_counter, public replica_ba // reconfiguration void assign_primary(configuration_update_request &proposal); void add_potential_secondary(configuration_update_request &proposal); - void upgrade_to_secondary_on_primary(::dsn::rpc_address node); + void upgrade_to_secondary_on_primary(::dsn::host_port node); void downgrade_to_secondary_on_primary(configuration_update_request &proposal); void downgrade_to_inactive_on_primary(configuration_update_request &proposal); void remove(configuration_update_request &proposal); void update_configuration_on_meta_server(config_type::type type, - ::dsn::rpc_address node, + ::dsn::host_port node, partition_configuration &newConfig); void on_update_configuration_on_meta_server_reply(error_code err, diff --git a/src/replica/replica_2pc.cpp b/src/replica/replica_2pc.cpp index e2d7891c24..5440a048cd 100644 --- a/src/replica/replica_2pc.cpp +++ b/src/replica/replica_2pc.cpp @@ -56,7 +56,8 @@ #include "replica/replication_app_base.h" #include "replica_stub.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/rpc_stream.h" #include "runtime/rpc/serialization.h" @@ -199,7 +200,7 @@ void replica::on_client_write(dsn::message_ex *request, bool ignore_throttling) LOG_INFO_PREFIX("receive bulk load ingestion request"); // bulk load ingestion request requires that all secondaries should be alive - if (static_cast(_primary_states.membership.secondaries.size()) + 1 < + if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < _primary_states.membership.max_replica_count) { response_client_write(request, ERR_NOT_ENOUGH_MEMBER); return; @@ -208,7 +209,7 @@ void replica::on_client_write(dsn::message_ex *request, bool ignore_throttling) _bulk_load_ingestion_start_time_ms = dsn_now_ms(); } - if (static_cast(_primary_states.membership.secondaries.size()) + 1 < + if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < _options->app_mutation_2pc_min_replica_count(_app_info.max_replica_count)) { response_client_write(request, ERR_NOT_ENOUGH_MEMBER); return; @@ -268,7 +269,7 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c break; } LOG_INFO_PREFIX("try to prepare bulk load mutation({})", mu->name()); - if (static_cast(_primary_states.membership.secondaries.size()) + 1 < + if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < _primary_states.membership.max_replica_count) { err = ERR_NOT_ENOUGH_MEMBER; break; @@ -281,7 +282,7 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c // stop prepare if there are too few replicas unless it's a reconciliation // for reconciliation, we should ensure every prepared mutation to be committed // please refer to PacificA paper - if (static_cast(_primary_states.membership.secondaries.size()) + 1 < + if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < _options->app_mutation_2pc_min_replica_count(_app_info.max_replica_count) && !reconciliation) { err = ERR_NOT_ENOUGH_MEMBER; @@ -298,11 +299,10 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c // remote prepare mu->set_prepare_ts(); - mu->set_left_secondary_ack_count((unsigned int)_primary_states.membership.secondaries.size()); - for (auto it = _primary_states.membership.secondaries.begin(); - it != _primary_states.membership.secondaries.end(); - ++it) { - send_prepare_message(*it, + mu->set_left_secondary_ack_count( + (unsigned int)_primary_states.membership.hp_secondaries.size()); + for (const auto &secondary : _primary_states.membership.hp_secondaries) { + send_prepare_message(secondary, partition_status::PS_SECONDARY, mu, FLAGS_prepare_timeout_ms_for_secondaries, @@ -357,15 +357,15 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c return; } -void replica::send_prepare_message(::dsn::rpc_address addr, +void replica::send_prepare_message(::dsn::host_port hp, partition_status::type status, const mutation_ptr &mu, int timeout_milliseconds, bool pop_all_committed_mutations, int64_t learn_signature) { - mu->_tracer->add_sub_tracer(addr.to_string()); - ADD_POINT(mu->_tracer->sub_tracer(addr.to_string())); + mu->_tracer->add_sub_tracer(hp.to_string()); + ADD_POINT(mu->_tracer->sub_tracer(hp.to_string())); dsn::message_ex *msg = dsn::message_ex::create_request( RPC_PREPARE, timeout_milliseconds, get_gpid().thread_hash()); @@ -383,8 +383,8 @@ void replica::send_prepare_message(::dsn::rpc_address addr, mu->write_to(writer, msg); } - mu->remote_tasks()[addr] = - rpc::call(addr, + mu->remote_tasks()[hp] = + rpc::call(dsn::dns_resolver::instance().resolve_address(hp), msg, &_tracker, [=](error_code err, dsn::message_ex *request, dsn::message_ex *reply) { @@ -394,7 +394,7 @@ void replica::send_prepare_message(::dsn::rpc_address addr, LOG_DEBUG_PREFIX("mutation {} send_prepare_message to {} as {}", mu->name(), - addr, + hp, enum_to_string(rconfig.status)); } @@ -624,7 +624,7 @@ void replica::on_prepare_reply(std::pair p CHECK_EQ_MSG(mu->data.header.ballot, get_ballot(), "{}: invalid mutation ballot", mu->name()); - ::dsn::rpc_address node = request->to_address; + const auto &node = request->to_host_port; partition_status::type st = _primary_states.get_node_status(node); // handle reply @@ -637,7 +637,7 @@ void replica::on_prepare_reply(std::pair p ::dsn::unmarshall(reply, resp); } - auto send_prepare_tracer = mu->_tracer->sub_tracer(request->to_address.to_string()); + auto send_prepare_tracer = mu->_tracer->sub_tracer(request->to_host_port.to_string()); APPEND_EXTERN_POINT(send_prepare_tracer, resp.receive_timestamp, "remote_receive"); APPEND_EXTERN_POINT(send_prepare_tracer, resp.response_timestamp, "remote_reply"); ADD_CUSTOM_POINT(send_prepare_tracer, resp.err.to_string()); diff --git a/src/replica/replica_check.cpp b/src/replica/replica_check.cpp index d5d625b4b5..cff270913b 100644 --- a/src/replica/replica_check.cpp +++ b/src/replica/replica_check.cpp @@ -47,7 +47,9 @@ #include "replica/replication_app_base.h" #include "replica_stub.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/async_calls.h" #include "runtime/task/task.h" #include "split/replica_split_manager.h" @@ -120,14 +122,16 @@ void replica::broadcast_group_check() } for (auto it = _primary_states.statuses.begin(); it != _primary_states.statuses.end(); ++it) { - if (it->first == _stub->_primary_address) + if (it->first == _stub->primary_host_port()) continue; - ::dsn::rpc_address addr = it->first; + auto hp = it->first; + auto addr = dsn::dns_resolver::instance().resolve_address(hp); std::shared_ptr request(new group_check_request); request->app = _app_info; request->node = addr; + request->__set_hp_node(hp); _primary_states.get_replica_config(it->second, request->config); request->last_committed_decree = last_committed_decree(); request->__set_confirmed_decree(_duplication_mgr->min_confirmed_decree()); @@ -141,12 +145,12 @@ void replica::broadcast_group_check() } if (request->config.status == partition_status::PS_POTENTIAL_SECONDARY) { - auto it = _primary_states.learners.find(addr); - CHECK(it != _primary_states.learners.end(), "learner {} is missing", addr); + auto it = _primary_states.learners.find(hp); + CHECK(it != _primary_states.learners.end(), "learner {} is missing", hp); request->config.learner_signature = it->second.signature; } - LOG_INFO_PREFIX("send group check to {} with state {}", addr, enum_to_string(it->second)); + LOG_INFO_PREFIX("send group check to {} with state {}", hp, enum_to_string(it->second)); dsn::task_ptr callback_task = rpc::call(addr, @@ -160,7 +164,7 @@ void replica::broadcast_group_check() std::chrono::milliseconds(0), get_gpid().thread_hash()); - _primary_states.group_check_pending_replies[addr] = callback_task; + _primary_states.group_check_pending_replies[hp] = callback_task; } // send empty prepare when necessary @@ -177,8 +181,9 @@ void replica::on_group_check(const group_check_request &request, { _checker.only_one_thread_access(); - LOG_INFO_PREFIX("process group check, primary = {}, ballot = {}, status = {}, " + LOG_INFO_PREFIX("process group check, primary = {}({}), ballot = {}, status = {}, " "last_committed_decree = {}, confirmed_decree = {}", + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.config.status), @@ -222,7 +227,8 @@ void replica::on_group_check(const group_check_request &request, } response.pid = get_gpid(); - response.node = _stub->_primary_address; + response.node = _stub->primary_address(); + response.__set_hp_node(_stub->primary_host_port()); response.err = ERR_OK; if (status() == partition_status::PS_ERROR) { response.err = ERR_INVALID_STATE; @@ -241,27 +247,29 @@ void replica::on_group_check_reply(error_code err, { _checker.only_one_thread_access(); + host_port hp_node; + GET_HOST_PORT(*req, node, hp_node); if (partition_status::PS_PRIMARY != status() || req->config.ballot < get_ballot()) { return; } - auto r = _primary_states.group_check_pending_replies.erase(req->node); - CHECK_EQ_MSG(r, 1, "invalid node address, address = {}", req->node); + auto r = _primary_states.group_check_pending_replies.erase(hp_node); + CHECK_EQ_MSG(r, 1, "invalid node address, address = {}({})", hp_node, req->node); if (err != ERR_OK || resp->err != ERR_OK) { if (ERR_OK == err) { err = resp->err; } - handle_remote_failure(req->config.status, req->node, err, "group check"); + handle_remote_failure(req->config.status, hp_node, err, "group check"); METRIC_VAR_INCREMENT(group_check_failed_requests); } else { if (resp->learner_status_ == learner_status::LearningSucceeded && req->config.status == partition_status::PS_POTENTIAL_SECONDARY) { - handle_learning_succeeded_on_primary(req->node, resp->learner_signature); + handle_learning_succeeded_on_primary(hp_node, resp->learner_signature); } _split_mgr->primary_parent_handle_stop_split(req, resp); if (req->config.status == partition_status::PS_SECONDARY) { - _primary_states.secondary_disk_status[req->node] = resp->disk_status; + _primary_states.secondary_disk_status[hp_node] = resp->disk_status; } } } diff --git a/src/replica/replica_chkpt.cpp b/src/replica/replica_chkpt.cpp index 8028bb5fab..4bcdf95e24 100644 --- a/src/replica/replica_chkpt.cpp +++ b/src/replica/replica_chkpt.cpp @@ -255,7 +255,8 @@ void replica::on_query_last_checkpoint(/*out*/ learn_response &response) // for example: base_local_dir = "./data" + "checkpoint.1024" = "./data/checkpoint.1024" response.base_local_dir = utils::filesystem::path_combine( _app->data_dir(), checkpoint_folder(response.state.to_decree_included)); - response.address = _stub->_primary_address; + response.learnee = _stub->primary_address(); + response.__set_hp_learnee(_stub->primary_host_port()); for (auto &file : response.state.files) { // response.state.files contain file absolute path, for example: // "./data/checkpoint.1024/1.sst" use `substr` to get the file name: 1.sst diff --git a/src/replica/replica_config.cpp b/src/replica/replica_config.cpp index 0c5536ca88..580d82bbcb 100644 --- a/src/replica/replica_config.cpp +++ b/src/replica/replica_config.cpp @@ -58,7 +58,9 @@ #include "replica/replication_app_base.h" #include "replica_stub.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "security/access_controller.h" @@ -143,7 +145,7 @@ void replica::on_config_proposal(configuration_update_request &proposal) void replica::assign_primary(configuration_update_request &proposal) { - CHECK_EQ(proposal.node, _stub->_primary_address); + CHECK_EQ(proposal.hp_node, _stub->primary_host_port()); if (status() == partition_status::PS_PRIMARY) { LOG_WARNING_PREFIX("invalid assgin primary proposal as the node is in {}", @@ -162,10 +164,12 @@ void replica::assign_primary(configuration_update_request &proposal) return; } - proposal.config.primary = _stub->_primary_address; - replica_helper::remove_node(_stub->_primary_address, proposal.config.secondaries); + proposal.config.primary = _stub->primary_address(); + proposal.config.__set_hp_primary(_stub->primary_host_port()); + replica_helper::remove_node(_stub->primary_address(), proposal.config.secondaries); + replica_helper::remove_node(_stub->primary_host_port(), proposal.config.hp_secondaries); - update_configuration_on_meta_server(proposal.type, proposal.node, proposal.config); + update_configuration_on_meta_server(proposal.type, proposal.hp_node, proposal.config); } // run on primary to send ADD_LEARNER request to candidate replica server @@ -179,20 +183,20 @@ void replica::add_potential_secondary(configuration_update_request &proposal) CHECK_EQ(proposal.config.ballot, get_ballot()); CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.primary, _primary_states.membership.primary); - CHECK(proposal.config.secondaries == _primary_states.membership.secondaries, ""); - CHECK(!_primary_states.check_exist(proposal.node, partition_status::PS_PRIMARY), + CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); + CHECK(!_primary_states.check_exist(proposal.hp_node, partition_status::PS_PRIMARY), "node = {}", - proposal.node); - CHECK(!_primary_states.check_exist(proposal.node, partition_status::PS_SECONDARY), + proposal.hp_node); + CHECK(!_primary_states.check_exist(proposal.hp_node, partition_status::PS_SECONDARY), "node = {}", - proposal.node); + proposal.hp_node); int potential_secondaries_count = - _primary_states.membership.secondaries.size() + _primary_states.learners.size(); + _primary_states.membership.hp_secondaries.size() + _primary_states.learners.size(); if (potential_secondaries_count >= _primary_states.membership.max_replica_count - 1) { if (proposal.type == config_type::CT_ADD_SECONDARY) { - if (_primary_states.learners.find(proposal.node) == _primary_states.learners.end()) { + if (_primary_states.learners.find(proposal.hp_node) == _primary_states.learners.end()) { LOG_INFO_PREFIX( "already have enough secondaries or potential secondaries, ignore new " "potential secondary proposal"); @@ -204,7 +208,8 @@ void replica::add_potential_secondary(configuration_update_request &proposal) "secondary proposal"); return; } else { - LOG_INFO_PREFIX("add a new secondary({}) for future load balancer", proposal.node); + LOG_INFO_PREFIX("add a new secondary({}) for future load balancer", + proposal.hp_node); } } else { CHECK(false, "invalid config_type, type = {}", enum_to_string(proposal.type)); @@ -215,38 +220,43 @@ void replica::add_potential_secondary(configuration_update_request &proposal) state.prepare_start_decree = invalid_decree; state.timeout_task = nullptr; // TODO: add timer for learner task - auto it = _primary_states.learners.find(proposal.node); + auto it = _primary_states.learners.find(proposal.hp_node); if (it != _primary_states.learners.end()) { state.signature = it->second.signature; } else { state.signature = ++_primary_states.next_learning_version; - _primary_states.learners[proposal.node] = state; - _primary_states.statuses[proposal.node] = partition_status::PS_POTENTIAL_SECONDARY; + _primary_states.learners[proposal.hp_node] = state; + _primary_states.statuses[proposal.hp_node] = partition_status::PS_POTENTIAL_SECONDARY; } group_check_request request; request.app = _app_info; request.node = proposal.node; + request.__set_hp_node(proposal.hp_node); _primary_states.get_replica_config( partition_status::PS_POTENTIAL_SECONDARY, request.config, state.signature); request.last_committed_decree = last_committed_decree(); LOG_INFO_PREFIX("call one way {} to start learning with signature [{:#018x}]", - proposal.node, + proposal.hp_node, state.signature); rpc::call_one_way_typed( proposal.node, RPC_LEARN_ADD_LEARNER, request, get_gpid().thread_hash()); } -void replica::upgrade_to_secondary_on_primary(::dsn::rpc_address node) +void replica::upgrade_to_secondary_on_primary(::dsn::host_port node) { LOG_INFO_PREFIX("upgrade potential secondary {} to secondary", node); partition_configuration newConfig = _primary_states.membership; // add secondary - newConfig.secondaries.push_back(node); + if (!newConfig.__isset.hp_secondaries) { + newConfig.__set_hp_secondaries({}); + } + newConfig.hp_secondaries.push_back(node); + newConfig.secondaries.push_back(dsn::dns_resolver::instance().resolve_address(node)); update_configuration_on_meta_server(config_type::CT_UPGRADE_TO_SECONDARY, node, newConfig); } @@ -257,15 +267,20 @@ void replica::downgrade_to_secondary_on_primary(configuration_update_request &pr return; CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.primary, _primary_states.membership.primary); - CHECK(proposal.config.secondaries == _primary_states.membership.secondaries, ""); - CHECK_EQ(proposal.node, proposal.config.primary); + CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); + CHECK_EQ(proposal.hp_node, proposal.config.hp_primary); proposal.config.primary.set_invalid(); + proposal.config.__set_hp_primary(host_port()); proposal.config.secondaries.push_back(proposal.node); + if (!proposal.config.__isset.hp_secondaries) { + proposal.config.__set_hp_secondaries({}); + } + proposal.config.hp_secondaries.push_back(proposal.hp_node); update_configuration_on_meta_server( - config_type::CT_DOWNGRADE_TO_SECONDARY, proposal.node, proposal.config); + config_type::CT_DOWNGRADE_TO_SECONDARY, proposal.hp_node, proposal.config); } void replica::downgrade_to_inactive_on_primary(configuration_update_request &proposal) @@ -274,19 +289,21 @@ void replica::downgrade_to_inactive_on_primary(configuration_update_request &pro return; CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.primary, _primary_states.membership.primary); - CHECK(proposal.config.secondaries == _primary_states.membership.secondaries, ""); + CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); - if (proposal.node == proposal.config.primary) { + if (proposal.hp_node == proposal.config.hp_primary) { proposal.config.primary.set_invalid(); + proposal.config.hp_primary.reset(); } else { - CHECK(replica_helper::remove_node(proposal.node, proposal.config.secondaries), + CHECK(replica_helper::remove_node(proposal.node, proposal.config.secondaries) && + replica_helper::remove_node(proposal.hp_node, proposal.config.hp_secondaries), "remove node failed, node = {}", proposal.node); } update_configuration_on_meta_server( - config_type::CT_DOWNGRADE_TO_INACTIVE, proposal.node, proposal.config); + config_type::CT_DOWNGRADE_TO_INACTIVE, proposal.hp_node, proposal.config); } void replica::remove(configuration_update_request &proposal) @@ -295,18 +312,20 @@ void replica::remove(configuration_update_request &proposal) return; CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.primary, _primary_states.membership.primary); - CHECK(proposal.config.secondaries == _primary_states.membership.secondaries, ""); + CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); - auto st = _primary_states.get_node_status(proposal.node); + auto st = _primary_states.get_node_status(host_port::from_address(proposal.node)); switch (st) { case partition_status::PS_PRIMARY: - CHECK_EQ(proposal.config.primary, proposal.node); + CHECK_EQ(proposal.config.hp_primary, proposal.hp_node); proposal.config.primary.set_invalid(); + proposal.config.hp_primary.reset(); break; case partition_status::PS_SECONDARY: { - CHECK(replica_helper::remove_node(proposal.node, proposal.config.secondaries), + CHECK(replica_helper::remove_node(proposal.node, proposal.config.secondaries) && + replica_helper::remove_node(proposal.hp_node, proposal.config.hp_secondaries), "remove_node failed, node = {}", proposal.node); } break; @@ -316,7 +335,7 @@ void replica::remove(configuration_update_request &proposal) break; } - update_configuration_on_meta_server(config_type::CT_REMOVE, proposal.node, proposal.config); + update_configuration_on_meta_server(config_type::CT_REMOVE, proposal.hp_node, proposal.config); } // from primary @@ -348,7 +367,7 @@ void replica::on_remove(const replica_configuration &request) } void replica::update_configuration_on_meta_server(config_type::type type, - ::dsn::rpc_address node, + ::dsn::host_port node, partition_configuration &newConfig) { // type should never be `CT_REGISTER_CHILD` @@ -362,7 +381,7 @@ void replica::update_configuration_on_meta_server(config_type::type type, CHECK(status() == partition_status::PS_INACTIVE && _inactive_is_transient && _is_initializing, ""); - CHECK_EQ(newConfig.primary, node); + CHECK_EQ(newConfig.hp_primary, node); } else if (type != config_type::CT_ASSIGN_PRIMARY && type != config_type::CT_UPGRADE_TO_PRIMARY) { CHECK_EQ(status(), partition_status::PS_PRIMARY); @@ -383,7 +402,8 @@ void replica::update_configuration_on_meta_server(config_type::type type, request->config = newConfig; request->config.ballot++; request->type = type; - request->node = node; + request->node = dsn::dns_resolver::instance().resolve_address(node); + request->__set_hp_node(node); ::dsn::marshall(msg, *request); @@ -397,7 +417,8 @@ void replica::update_configuration_on_meta_server(config_type::type type, enum_to_string(request->type), request->node); - rpc_address target(_stub->_failure_detector->get_servers()); + rpc_address target( + dsn::dns_resolver::instance().resolve_address(_stub->_failure_detector->get_servers())); _primary_states.reconfiguration_task = rpc::call(target, msg, @@ -438,7 +459,8 @@ void replica::on_update_configuration_on_meta_server_reply( LPC_DELAY_UPDATE_CONFIG, &_tracker, [ this, request, req2 = std::move(req) ]() { - rpc_address target(_stub->_failure_detector->get_servers()); + rpc_address target(dsn::dns_resolver::instance().resolve_address( + _stub->_failure_detector->get_servers())); rpc_response_task_ptr t = rpc::create_rpc_response_task( request, &_tracker, @@ -474,8 +496,8 @@ void replica::on_update_configuration_on_meta_server_reply( // post-update work items? if (resp.err == ERR_OK) { CHECK_EQ(req->config.pid, resp.config.pid); - CHECK_EQ(req->config.primary, resp.config.primary); - CHECK(req->config.secondaries == resp.config.secondaries, ""); + CHECK_EQ(req->config.hp_primary, resp.config.hp_primary); + CHECK(req->config.hp_secondaries == resp.config.hp_secondaries, ""); switch (req->type) { case config_type::CT_UPGRADE_TO_PRIMARY: @@ -489,9 +511,9 @@ void replica::on_update_configuration_on_meta_server_reply( case config_type::CT_UPGRADE_TO_SECONDARY: break; case config_type::CT_REMOVE: - if (req->node != _stub->_primary_address) { + if (req->hp_node != _stub->primary_host_port()) { replica_configuration rconfig; - replica_helper::get_replica_config(resp.config, req->node, rconfig); + replica_helper::get_replica_config(resp.config, req->hp_node, rconfig); rpc::call_one_way_typed( req->node, RPC_REMOVE_REPLICA, rconfig, get_gpid().thread_hash()); } @@ -616,11 +638,11 @@ bool replica::update_configuration(const partition_configuration &config) CHECK_GE(config.ballot, get_ballot()); replica_configuration rconfig; - replica_helper::get_replica_config(config, _stub->_primary_address, rconfig); + replica_helper::get_replica_config(config, _stub->primary_host_port(), rconfig); if (rconfig.status == partition_status::PS_PRIMARY && (rconfig.ballot > get_ballot() || status() != partition_status::PS_PRIMARY)) { - _primary_states.reset_membership(config, config.primary != _stub->_primary_address); + _primary_states.reset_membership(config, config.hp_primary != _stub->primary_host_port()); } if (config.ballot > get_ballot() || @@ -1007,7 +1029,7 @@ bool replica::update_local_configuration(const replica_configuration &config, init_prepare(next, false); } - if (_primary_states.membership.secondaries.size() + 1 < + if (_primary_states.membership.hp_secondaries.size() + 1 < _options->app_mutation_2pc_min_replica_count(_app_info.max_replica_count)) { std::vector queued; _primary_states.write_queue.clear(queued); @@ -1057,10 +1079,10 @@ void replica::on_config_sync(const app_info &info, } else { if (_is_initializing) { // in initializing, when replica still primary, need to inc ballot - if (config.primary == _stub->_primary_address && + if (config.hp_primary == _stub->primary_host_port() && status() == partition_status::PS_INACTIVE && _inactive_is_transient) { update_configuration_on_meta_server(config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT, - config.primary, + config.hp_primary, const_cast(config)); return; } @@ -1070,9 +1092,10 @@ void replica::on_config_sync(const app_info &info, update_configuration(config); if (status() == partition_status::PS_INACTIVE && !_inactive_is_transient) { - if (config.primary == _stub->_primary_address // dead primary + if (config.hp_primary == _stub->primary_host_port() // dead primary || - config.primary.is_invalid() // primary is dead (otherwise let primary remove this) + config.hp_primary + .is_invalid() // primary is dead (otherwise let primary remove this) ) { LOG_INFO_PREFIX("downgrade myself as inactive is not transient, remote_config({})", boost::lexical_cast(config)); diff --git a/src/replica/replica_context.cpp b/src/replica/replica_context.cpp index f557463168..07490bc50d 100644 --- a/src/replica/replica_context.cpp +++ b/src/replica/replica_context.cpp @@ -34,6 +34,7 @@ #include "replica.h" #include "replica_context.h" #include "replica_stub.h" +#include "runtime/rpc/rpc_address.h" #include "utils/error_code.h" namespace dsn { @@ -104,11 +105,11 @@ void primary_context::reset_membership(const partition_configuration &config, bo membership = config; - if (membership.primary.is_invalid() == false) { - statuses[membership.primary] = partition_status::PS_PRIMARY; + if (membership.hp_primary.is_invalid() == false) { + statuses[membership.hp_primary] = partition_status::PS_PRIMARY; } - for (auto it = config.secondaries.begin(); it != config.secondaries.end(); ++it) { + for (auto it = config.hp_secondaries.begin(); it != config.hp_secondaries.end(); ++it) { statuses[*it] = partition_status::PS_SECONDARY; learners.erase(*it); } @@ -124,19 +125,21 @@ void primary_context::get_replica_config(partition_status::type st, { config.pid = membership.pid; config.primary = membership.primary; + config.__set_hp_primary(membership.hp_primary); config.ballot = membership.ballot; config.status = st; config.learner_signature = learner_signature; } -bool primary_context::check_exist(::dsn::rpc_address node, partition_status::type st) +bool primary_context::check_exist(::dsn::host_port node, partition_status::type st) { switch (st) { case partition_status::PS_PRIMARY: - return membership.primary == node; + return membership.hp_primary == node; case partition_status::PS_SECONDARY: - return std::find(membership.secondaries.begin(), membership.secondaries.end(), node) != - membership.secondaries.end(); + return std::find(membership.hp_secondaries.begin(), + membership.hp_secondaries.end(), + node) != membership.hp_secondaries.end(); case partition_status::PS_POTENTIAL_SECONDARY: return learners.find(node) != learners.end(); default: @@ -145,7 +148,7 @@ bool primary_context::check_exist(::dsn::rpc_address node, partition_status::typ } } -void primary_context::reset_node_bulk_load_states(const rpc_address &node) +void primary_context::reset_node_bulk_load_states(const host_port &node) { secondary_bulk_load_states[node].__set_download_progress(0); secondary_bulk_load_states[node].__set_download_status(ERR_OK); diff --git a/src/replica/replica_context.h b/src/replica/replica_context.h index 3b6220fa2a..5099260c17 100644 --- a/src/replica/replica_context.h +++ b/src/replica/replica_context.h @@ -41,7 +41,7 @@ #include "metadata_types.h" #include "mutation.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "utils/autoref_ptr.h" #include "utils/fmt_logging.h" @@ -59,7 +59,7 @@ struct remote_learner_state std::string last_learn_log_file; }; -typedef std::unordered_map<::dsn::rpc_address, remote_learner_state> learner_map; +typedef std::unordered_map<::dsn::host_port, remote_learner_state> learner_map; #define CLEANUP_TASK(task_, force) \ { \ @@ -104,13 +104,13 @@ class primary_context void get_replica_config(partition_status::type status, /*out*/ replica_configuration &config, uint64_t learner_signature = invalid_signature); - bool check_exist(::dsn::rpc_address node, partition_status::type status); - partition_status::type get_node_status(::dsn::rpc_address addr) const; + bool check_exist(::dsn::host_port node, partition_status::type status); + partition_status::type get_node_status(::dsn::host_port hp) const; void do_cleanup_pending_mutations(bool clean_pending_mutations = true); // reset bulk load states in secondary_bulk_load_states by node address - void reset_node_bulk_load_states(const rpc_address &node); + void reset_node_bulk_load_states(const host_port &node); void cleanup_bulk_load_states(); @@ -150,7 +150,7 @@ class primary_context // Used for partition split // child addresses who has been caught up with its parent - std::unordered_set caught_up_children; + std::unordered_set caught_up_children; // Used for partition split // whether parent's write request should be sent to child synchronously @@ -170,7 +170,7 @@ class primary_context // Used partition split // secondary replica address who has paused or canceled split - std::unordered_set split_stopped_secondary; + std::unordered_set split_stopped_secondary; // Used for partition split // primary parent query child on meta_server task @@ -181,13 +181,13 @@ class primary_context // group bulk_load response tasks of RPC_GROUP_BULK_LOAD for each secondary replica node_tasks group_bulk_load_pending_replies; // bulk_load_state of secondary replicas - std::unordered_map secondary_bulk_load_states; + std::unordered_map secondary_bulk_load_states; // if primary send an empty prepare after ingestion succeed to gurantee secondary commit its // ingestion request bool ingestion_is_empty_prepare_sent{false}; - // secondary rpc_address -> secondary disk_status - std::unordered_map secondary_disk_status; + // secondary host_port -> secondary disk_status + std::unordered_map secondary_disk_status; }; // Context of the secondary replica. @@ -295,9 +295,9 @@ class partition_split_context //---------------inline impl---------------------------------------------------------------- -inline partition_status::type primary_context::get_node_status(::dsn::rpc_address addr) const +inline partition_status::type primary_context::get_node_status(::dsn::host_port hp) const { - auto it = statuses.find(addr); + auto it = statuses.find(hp); return it != statuses.end() ? it->second : partition_status::PS_INACTIVE; } } // namespace replication diff --git a/src/replica/replica_failover.cpp b/src/replica/replica_failover.cpp index 90f88c0985..b689dbdda3 100644 --- a/src/replica/replica_failover.cpp +++ b/src/replica/replica_failover.cpp @@ -36,7 +36,9 @@ #include "replica.h" #include "replica/replica_context.h" #include "replica_stub.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" @@ -63,7 +65,7 @@ void replica::handle_local_failure(error_code error) } void replica::handle_remote_failure(partition_status::type st, - ::dsn::rpc_address node, + ::dsn::host_port node, error_code error, const std::string &caused_by) { @@ -74,7 +76,7 @@ void replica::handle_remote_failure(partition_status::type st, node); CHECK_EQ(status(), partition_status::PS_PRIMARY); - CHECK_NE(node, _stub->_primary_address); + CHECK_NE(node, _stub->primary_host_port()); switch (st) { case partition_status::PS_SECONDARY: @@ -84,7 +86,8 @@ void replica::handle_remote_failure(partition_status::type st, enum_to_string(st)); { configuration_update_request request; - request.node = node; + request.node = dsn::dns_resolver::instance().resolve_address(node); + request.__set_hp_node(node); request.type = config_type::CT_DOWNGRADE_TO_INACTIVE; request.config = _primary_states.membership; downgrade_to_inactive_on_primary(request); diff --git a/src/replica/replica_learn.cpp b/src/replica/replica_learn.cpp index b2116422f0..92a1c49c3b 100644 --- a/src/replica/replica_learn.cpp +++ b/src/replica/replica_learn.cpp @@ -58,6 +58,7 @@ #include "replica_stub.h" #include "runtime/api_layer1.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -213,10 +214,11 @@ void replica::init_learn(uint64_t signature) if (_app->last_committed_decree() == 0 && _stub->_learn_app_concurrent_count.load() >= FLAGS_learn_app_max_concurrent_count) { LOG_WARNING_PREFIX( - "init_learn[{:#018x}]: learnee = {}, learn_duration = {} ms, need to learn app " + "init_learn[{:#018x}]: learnee = {}({}), learn_duration = {} ms, need to learn app " "because app_committed_decree = 0, but learn_app_concurrent_count({}) >= " "FLAGS_learn_app_max_concurrent_count({}), skip", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), _stub->_learn_app_concurrent_count, @@ -232,25 +234,28 @@ void replica::init_learn(uint64_t signature) request.__set_max_gced_decree(get_max_gced_decree_for_learn()); request.last_committed_decree_in_app = _app->last_committed_decree(); request.last_committed_decree_in_prepare_list = _prepare_list->last_committed_decree(); - request.learner = _stub->_primary_address; + request.learner = _stub->primary_address(); + request.__set_hp_learner(_stub->primary_host_port()); request.signature = _potential_secondary_states.learning_version; _app->prepare_get_checkpoint(request.app_specific_learn_request); - LOG_INFO_PREFIX("init_learn[{:#018x}]: learnee = {}, learn_duration = {} ms, max_gced_decree = " - "{}, local_committed_decree = {}, app_committed_decree = {}, " - "app_durable_decree = {}, current_learning_status = {}, total_copy_file_count " - "= {}, total_copy_file_size = {}, total_copy_buffer_size = {}", - request.signature, - _config.primary, - _potential_secondary_states.duration_ms(), - request.max_gced_decree, - last_committed_decree(), - _app->last_committed_decree(), - _app->last_durable_decree(), - enum_to_string(_potential_secondary_states.learning_status), - _potential_secondary_states.learning_copy_file_count, - _potential_secondary_states.learning_copy_file_size, - _potential_secondary_states.learning_copy_buffer_size); + LOG_INFO_PREFIX( + "init_learn[{:#018x}]: learnee = {}({}), learn_duration = {} ms, max_gced_decree = " + "{}, local_committed_decree = {}, app_committed_decree = {}, " + "app_durable_decree = {}, current_learning_status = {}, total_copy_file_count " + "= {}, total_copy_file_size = {}, total_copy_buffer_size = {}", + request.signature, + _config.hp_primary, + _config.primary, + _potential_secondary_states.duration_ms(), + request.max_gced_decree, + last_committed_decree(), + _app->last_committed_decree(), + _app->last_durable_decree(), + enum_to_string(_potential_secondary_states.learning_status), + _potential_secondary_states.learning_copy_file_count, + _potential_secondary_states.learning_copy_file_size, + _potential_secondary_states.learning_copy_buffer_size); dsn::message_ex *msg = dsn::message_ex::create_request(RPC_LEARN, 0, get_gpid().thread_hash()); dsn::marshall(msg, request); @@ -370,7 +375,10 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) // but just set state to partition_status::PS_POTENTIAL_SECONDARY _primary_states.get_replica_config(partition_status::PS_POTENTIAL_SECONDARY, response.config); - auto it = _primary_states.learners.find(request.learner); + host_port hp_learner; + GET_HOST_PORT(request, learner, hp_learner); + + auto it = _primary_states.learners.find(hp_learner); if (it == _primary_states.learners.end()) { response.config.status = partition_status::PS_INACTIVE; response.err = ERR_OBJECT_NOT_FOUND; @@ -392,13 +400,15 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) // TODO: learner machine has been down for a long time, and DDD MUST happened before // which leads to state lost. Now the lost state is back, what shall we do? if (request.last_committed_decree_in_app > last_prepared_decree()) { - LOG_ERROR_PREFIX("on_learn[{:#018x}]: learner = {}, learner state is newer than learnee, " - "learner_app_committed_decree = {}, local_committed_decree = {}, learn " - "from scratch", - request.signature, - request.learner, - request.last_committed_decree_in_app, - local_committed_decree); + LOG_ERROR_PREFIX( + "on_learn[{:#018x}]: learner = {}({}), learner state is newer than learnee, " + "learner_app_committed_decree = {}, local_committed_decree = {}, learn " + "from scratch", + request.signature, + hp_learner, + request.learner, + request.last_committed_decree_in_app, + local_committed_decree); *(decree *)&request.last_committed_decree_in_app = 0; } @@ -407,25 +417,29 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) // this happens when the new primary does not commit the previously prepared mutations // yet, which it should do, so let's help it now. else if (request.last_committed_decree_in_app > local_committed_decree) { - LOG_ERROR_PREFIX("on_learn[{:#018x}]: learner = {}, learner's last_committed_decree_in_app " - "is newer than learnee, learner_app_committed_decree = {}, " - "local_committed_decree = {}, commit local soft", - request.signature, - request.learner, - request.last_committed_decree_in_app, - local_committed_decree); + LOG_ERROR_PREFIX( + "on_learn[{:#018x}]: learner = {}({}), learner's last_committed_decree_in_app " + "is newer than learnee, learner_app_committed_decree = {}, " + "local_committed_decree = {}, commit local soft", + request.signature, + hp_learner, + request.learner, + request.last_committed_decree_in_app, + local_committed_decree); // we shouldn't commit mutations hard coz these mutations may preparing on another learner _prepare_list->commit(request.last_committed_decree_in_app, COMMIT_TO_DECREE_SOFT); local_committed_decree = last_committed_decree(); if (request.last_committed_decree_in_app > local_committed_decree) { - LOG_ERROR_PREFIX("on_learn[{:#018x}]: try to commit primary to {}, still less than " - "learner({})'s committed decree({}), wait mutations to be commitable", - request.signature, - local_committed_decree, - request.learner, - request.last_committed_decree_in_app); + LOG_ERROR_PREFIX( + "on_learn[{:#018x}]: try to commit primary to {}, still less than " + "learner({}({}))'s committed decree({}), wait mutations to be commitable", + request.signature, + local_committed_decree, + hp_learner, + request.learner, + request.last_committed_decree_in_app); response.err = ERR_INCONSISTENT_STATE; reply(msg, response); return; @@ -438,11 +452,12 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) response.state.__set_learn_start_decree(learn_start_decree); bool delayed_replay_prepare_list = false; - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, remote_committed_decree = {}, " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), remote_committed_decree = {}, " "remote_app_committed_decree = {}, local_committed_decree = {}, " "app_committed_decree = {}, app_durable_decree = {}, " "prepare_min_decree = {}, prepare_list_count = {}, learn_start_decree = {}", request.signature, + hp_learner, request.learner, request.last_committed_decree_in_prepare_list, request.last_committed_decree_in_app, @@ -453,7 +468,8 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) _prepare_list->count(), learn_start_decree); - response.address = _stub->_primary_address; + response.learnee = _stub->primary_address(); + response.__set_hp_learnee(_stub->primary_host_port()); response.prepare_start_decree = invalid_decree; response.last_committed_decree = local_committed_decree; response.err = ERR_OK; @@ -467,31 +483,35 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) delayed_replay_prepare_list); if (!should_learn_cache) { if (learn_start_decree > _app->last_durable_decree()) { - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, choose to learn private logs, " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), choose to learn private logs, " "because learn_start_decree({}) > _app->last_durable_decree({})", request.signature, + hp_learner, request.learner, learn_start_decree, _app->last_durable_decree()); _private_log->get_learn_state(get_gpid(), learn_start_decree, response.state); response.type = learn_type::LT_LOG; } else if (_private_log->get_learn_state(get_gpid(), learn_start_decree, response.state)) { - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, choose to learn private logs, " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), choose to learn private logs, " "because mutation_log::get_learn_state() returns true", request.signature, + hp_learner, request.learner); response.type = learn_type::LT_LOG; } else if (learn_start_decree < request.last_committed_decree_in_app + 1) { - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, choose to learn private logs, " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), choose to learn private logs, " "because learn_start_decree steps back for duplication", request.signature, + hp_learner, request.learner); response.type = learn_type::LT_LOG; } else { - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, choose to learn app, beacuse " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), choose to learn app, beacuse " "learn_start_decree({}) <= _app->last_durable_decree({}), and " "mutation_log::get_learn_state() returns false", request.signature, + hp_learner, request.learner, learn_start_decree, _app->last_durable_decree()); @@ -504,9 +524,10 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) if (response.state.files.size() > 0) { auto &last_file = response.state.files.back(); if (last_file == learner_state.last_learn_log_file) { - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, learn the same file {} " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), learn the same file {} " "repeatedly, hint to switch file", request.signature, + hp_learner, request.learner, last_file); _private_log->hint_switch_file(); @@ -516,10 +537,11 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) } // it is safe to commit to last_committed_decree() now response.state.to_decree_included = last_committed_decree(); - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, learn private logs succeed, " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), learn private logs succeed, " "learned_meta_size = {}, learned_file_count = {}, to_decree_included = " "{}", request.signature, + hp_learner, request.learner, response.state.meta.length(), response.state.files.size(), @@ -531,17 +553,19 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) if (err != ERR_OK) { response.err = ERR_GET_LEARN_STATE_FAILED; LOG_ERROR_PREFIX( - "on_learn[{:#018x}]: learner = {}, get app checkpoint failed, error = {}", + "on_learn[{:#018x}]: learner = {}({}), get app checkpoint failed, error = {}", request.signature, + hp_learner, request.learner, err); } else { response.base_local_dir = _app->data_dir(); response.__set_replica_disk_tag(_dir_node->tag); LOG_INFO_PREFIX( - "on_learn[{:#018x}]: learner = {}, get app learn state succeed, " + "on_learn[{:#018x}]: learner = {}({}), get app learn state succeed, " "learned_meta_size = {}, learned_file_count = {}, learned_to_decree = {}", request.signature, + hp_learner, request.learner, response.state.meta.length(), response.state.files.size(), @@ -575,12 +599,13 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response } LOG_INFO_PREFIX( - "on_learn_reply_start[{}]: learnee = {}, learn_duration ={} ms, response_err = " + "on_learn_reply_start[{}]: learnee = {}({}), learn_duration ={} ms, response_err = " "{}, remote_committed_decree = {}, prepare_start_decree = {}, learn_type = {} " "learned_buffer_size = {}, learned_file_count = {},to_decree_included = " "{}, learn_start_decree = {}, last_commit_decree = {}, current_learning_status = " "{} ", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), resp.err, @@ -599,10 +624,11 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response if (resp.err != ERR_OK) { if (resp.err == ERR_INACTIVE_STATE || resp.err == ERR_INCONSISTENT_STATE) { - LOG_WARNING_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, learnee is updating " + LOG_WARNING_PREFIX("on_learn_reply[{:#018x}]: learnee = {}({}), learnee is updating " "ballot(inactive state) or reconciliation(inconsistent state), " "delay to start another round of learning", req.signature, + resp.config.hp_primary, resp.config.primary); _potential_secondary_states.learning_round_is_running = false; _potential_secondary_states.delay_learning_task = @@ -618,17 +644,19 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response } if (resp.config.ballot > get_ballot()) { - LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, update configuration because " + LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}({}), update configuration because " "ballot have changed", req.signature, + resp.config.hp_primary, resp.config.primary); CHECK(update_local_configuration(resp.config), ""); } if (status() != partition_status::PS_POTENTIAL_SECONDARY) { LOG_ERROR_PREFIX( - "on_learn_reply[{:#018x}]: learnee = {}, current_status = {}, stop learning", + "on_learn_reply[{:#018x}]: learnee = {}({}), current_status = {}, stop learning", req.signature, + resp.config.hp_primary, resp.config.primary, enum_to_string(status())); return; @@ -636,12 +664,14 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response // local state is newer than learnee if (resp.last_committed_decree < _app->last_committed_decree()) { - LOG_WARNING_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, learner state is newer than " - "learnee (primary): {} vs {}, create new app", - req.signature, - resp.config.primary, - _app->last_committed_decree(), - resp.last_committed_decree); + LOG_WARNING_PREFIX( + "on_learn_reply[{:#018x}]: learnee = {}({}), learner state is newer than " + "learnee (primary): {} vs {}, create new app", + req.signature, + resp.config.hp_primary, + resp.config.primary, + _app->last_committed_decree(), + resp.last_committed_decree); METRIC_VAR_INCREMENT(learn_resets); @@ -649,9 +679,10 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response auto err = _app->close(true); if (err != ERR_OK) { LOG_ERROR_PREFIX( - "on_learn_reply[{:#018x}]: learnee = {}, close app (with clear_state=true) " + "on_learn_reply[{:#018x}]: learnee = {}({}), close app (with clear_state=true) " "failed, err = {}", req.signature, + resp.config.hp_primary, resp.config.primary, err); } @@ -677,9 +708,10 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response err = _app->open_new_internal(this, _private_log->on_partition_reset(get_gpid(), 0)); if (err != ERR_OK) { - LOG_ERROR_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, open app (with " + LOG_ERROR_PREFIX("on_learn_reply[{:#018x}]: learnee = {}({}), open app (with " "create_new=true) failed, err = {}", req.signature, + resp.config.hp_primary, resp.config.primary, err); } @@ -714,9 +746,10 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response if (++_stub->_learn_app_concurrent_count > FLAGS_learn_app_max_concurrent_count) { --_stub->_learn_app_concurrent_count; LOG_WARNING_PREFIX( - "on_learn_reply[{:#018x}]: learnee = {}, learn_app_concurrent_count({}) >= " + "on_learn_reply[{:#018x}]: learnee = {}({}), learn_app_concurrent_count({}) >= " "FLAGS_learn_app_max_concurrent_count({}), skip this round", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _stub->_learn_app_concurrent_count, FLAGS_learn_app_max_concurrent_count); @@ -725,8 +758,9 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response } else { _potential_secondary_states.learn_app_concurrent_count_increased = true; LOG_INFO_PREFIX( - "on_learn_reply[{:#018x}]: learnee = {}, ++learn_app_concurrent_count = {}", + "on_learn_reply[{:#018x}]: learnee = {}({}), ++learn_app_concurrent_count = {}", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _stub->_learn_app_concurrent_count.load()); } @@ -771,9 +805,10 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response // reset preparelist _potential_secondary_states.learning_start_prepare_decree = resp.prepare_start_decree; _prepare_list->truncate(_app->last_committed_decree()); - LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, truncate prepare list, " + LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}({}), truncate prepare list, " "local_committed_decree = {}, current_learning_status = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _app->last_committed_decree(), enum_to_string(_potential_secondary_states.learning_status)); @@ -802,12 +837,14 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response _prepare_list->get_mutation_by_decree(mu->data.header.decree); if (existing_mutation != nullptr && existing_mutation->data.header.ballot > mu->data.header.ballot) { - LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, mutation({}) exist on " - "the learner with larger ballot {}", - req.signature, - resp.config.primary, - mu->name(), - existing_mutation->data.header.ballot); + LOG_INFO_PREFIX( + "on_learn_reply[{:#018x}]: learnee = {}({}), mutation({}) exist on " + "the learner with larger ballot {}", + req.signature, + resp.config.hp_primary, + resp.config.primary, + mu->name(), + existing_mutation->data.header.ballot); } else { _prepare_list->prepare(mu, partition_status::PS_POTENTIAL_SECONDARY); } @@ -819,10 +856,11 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response } } - LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, learn_duration = {} ms, apply " + LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}({}), learn_duration = {} ms, apply " "cache done, prepare_cache_range = <{}, {}>, local_committed_decree = {}, " "app_committed_decree = {}, current_learning_status = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), cache_range.first, @@ -865,8 +903,9 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response if (!dsn::utils::filesystem::directory_exists(learn_dir)) { LOG_ERROR_PREFIX( - "on_learn_reply[{:#018x}]: learnee = {}, create replica learn dir {} failed", + "on_learn_reply[{:#018x}]: learnee = {}({}), create replica learn dir {} failed", req.signature, + resp.config.hp_primary, resp.config.primary, learn_dir); @@ -888,16 +927,18 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response } bool high_priority = (resp.type == learn_type::LT_APP ? false : true); - LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, learn_duration = {} ms, start to " - "copy remote files, copy_file_count = {}, priority = {}", - req.signature, - resp.config.primary, - _potential_secondary_states.duration_ms(), - resp.state.files.size(), - high_priority ? "high" : "low"); + LOG_INFO_PREFIX( + "on_learn_reply[{:#018x}]: learnee = {}({}), learn_duration = {} ms, start to " + "copy remote files, copy_file_count = {}, priority = {}", + req.signature, + resp.config.hp_primary, + resp.config.primary, + _potential_secondary_states.duration_ms(), + resp.state.files.size(), + high_priority ? "high" : "low"); _potential_secondary_states.learn_remote_files_task = _stub->_nfs->copy_remote_files( - resp.config.primary, + resp.config.hp_primary, resp.replica_disk_tag, resp.base_local_dir, resp.state.files, @@ -1003,30 +1044,33 @@ void replica::on_copy_remote_state_completed(error_code err, decree old_app_committed = _app->last_committed_decree(); decree old_app_durable = _app->last_durable_decree(); - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, learn_duration = {} " - "ms, copy remote state done, err = {}, copy_file_count = {}, copy_file_size = " - "{}, copy_time_used = {} ms, local_committed_decree = {}, app_committed_decree " - "= {}, app_durable_decree = {}, prepare_start_decree = {}, " - "current_learning_status = {}", - req.signature, - resp.config.primary, - _potential_secondary_states.duration_ms(), - err, - resp.state.files.size(), - size, - _potential_secondary_states.duration_ms() - copy_start_time, - last_committed_decree(), - _app->last_committed_decree(), - _app->last_durable_decree(), - resp.prepare_start_decree, - enum_to_string(_potential_secondary_states.learning_status)); + LOG_INFO_PREFIX( + "on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), learn_duration = {} " + "ms, copy remote state done, err = {}, copy_file_count = {}, copy_file_size = " + "{}, copy_time_used = {} ms, local_committed_decree = {}, app_committed_decree " + "= {}, app_durable_decree = {}, prepare_start_decree = {}, " + "current_learning_status = {}", + req.signature, + resp.config.hp_primary, + resp.config.primary, + _potential_secondary_states.duration_ms(), + err, + resp.state.files.size(), + size, + _potential_secondary_states.duration_ms() - copy_start_time, + last_committed_decree(), + _app->last_committed_decree(), + _app->last_durable_decree(), + resp.prepare_start_decree, + enum_to_string(_potential_secondary_states.learning_status)); if (resp.type == learn_type::LT_APP) { --_stub->_learn_app_concurrent_count; _potential_secondary_states.learn_app_concurrent_count_increased = false; - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), " "--learn_app_concurrent_count = {}", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _stub->_learn_app_concurrent_count.load()); } @@ -1071,19 +1115,21 @@ void replica::on_copy_remote_state_completed(error_code err, // the learn_start_decree will be set to 0, which makes learner to learn from // scratch CHECK_LE(_app->last_committed_decree(), resp.last_committed_decree); - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, checkpoint duration = {} ns, apply " "checkpoint succeed, app_committed_decree = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), dsn_now_ns() - start_ts, _app->last_committed_decree()); } else { - LOG_ERROR_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_ERROR_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, checkpoint duration = {} ns, apply " "checkpoint failed, err = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), dsn_now_ns() - start_ts, @@ -1096,19 +1142,21 @@ void replica::on_copy_remote_state_completed(error_code err, auto start_ts = dsn_now_ns(); err = apply_learned_state_from_private_log(lstate); if (err == ERR_OK) { - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, apply_log_duration = {} ns, apply learned " "state from private log succeed, app_committed_decree = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), dsn_now_ns() - start_ts, _app->last_committed_decree()); } else { - LOG_ERROR_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_ERROR_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, apply_log_duration = {} ns, apply " "learned state from private log failed, err = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), dsn_now_ns() - start_ts, @@ -1119,26 +1167,28 @@ void replica::on_copy_remote_state_completed(error_code err, // reset prepare list to make it catch with app _prepare_list->reset(_app->last_committed_decree()); - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, learn_duration = " - "{} ms, apply checkpoint/log done, err = {}, last_prepared_decree = ({} => " - "{}), last_committed_decree = ({} => {}), app_committed_decree = ({} => " - "{}), app_durable_decree = ({} => {}), remote_committed_decree = {}, " - "prepare_start_decree = {}, current_learning_status = {}", - req.signature, - resp.config.primary, - _potential_secondary_states.duration_ms(), - err, - old_prepared, - last_prepared_decree(), - old_committed, - last_committed_decree(), - old_app_committed, - _app->last_committed_decree(), - old_app_durable, - _app->last_durable_decree(), - resp.last_committed_decree, - resp.prepare_start_decree, - enum_to_string(_potential_secondary_states.learning_status)); + LOG_INFO_PREFIX( + "on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), learn_duration = " + "{} ms, apply checkpoint/log done, err = {}, last_prepared_decree = ({} => " + "{}), last_committed_decree = ({} => {}), app_committed_decree = ({} => " + "{}), app_durable_decree = ({} => {}), remote_committed_decree = {}, " + "prepare_start_decree = {}, current_learning_status = {}", + req.signature, + resp.config.hp_primary, + resp.config.primary, + _potential_secondary_states.duration_ms(), + err, + old_prepared, + last_prepared_decree(), + old_committed, + last_committed_decree(), + old_app_committed, + _app->last_committed_decree(), + old_app_durable, + _app->last_durable_decree(), + resp.last_committed_decree, + resp.prepare_start_decree, + enum_to_string(_potential_secondary_states.learning_status)); } // if catch-up done, do flush to enable all learned state is durable @@ -1148,15 +1198,17 @@ void replica::on_copy_remote_state_completed(error_code err, _app->last_committed_decree() > _app->last_durable_decree()) { err = background_sync_checkpoint(); - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, learn_duration = " - "{} ms, flush done, err = {}, app_committed_decree = {}, " - "app_durable_decree = {}", - req.signature, - resp.config.primary, - _potential_secondary_states.duration_ms(), - err, - _app->last_committed_decree(), - _app->last_durable_decree()); + LOG_INFO_PREFIX( + "on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), learn_duration = " + "{} ms, flush done, err = {}, app_committed_decree = {}, " + "app_durable_decree = {}", + req.signature, + resp.config.hp_primary, + resp.config.primary, + _potential_secondary_states.duration_ms(), + err, + _app->last_committed_decree(), + _app->last_durable_decree()); if (err == ERR_OK) { CHECK_EQ(_app->last_committed_decree(), _app->last_durable_decree()); @@ -1183,10 +1235,11 @@ void replica::on_learn_remote_state_completed(error_code err) _checker.only_one_thread_access(); if (partition_status::PS_POTENTIAL_SECONDARY != status()) { - LOG_WARNING_PREFIX("on_learn_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_WARNING_PREFIX("on_learn_remote_state_completed[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, err = {}, the learner status is not " "PS_POTENTIAL_SECONDARY, but {}, ignore", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), err, @@ -1194,17 +1247,19 @@ void replica::on_learn_remote_state_completed(error_code err) return; } - LOG_INFO_PREFIX("on_learn_remote_state_completed[{:#018x}]: learnee = {}, learn_duration = {} " - "ms, err = {}, local_committed_decree = {}, app_committed_decree = {}, " - "app_durable_decree = {}, current_learning_status = {}", - _potential_secondary_states.learning_version, - _config.primary, - _potential_secondary_states.duration_ms(), - err, - last_committed_decree(), - _app->last_committed_decree(), - _app->last_durable_decree(), - enum_to_string(_potential_secondary_states.learning_status)); + LOG_INFO_PREFIX( + "on_learn_remote_state_completed[{:#018x}]: learnee = {}({}), learn_duration = {} " + "ms, err = {}, local_committed_decree = {}, app_committed_decree = {}, " + "app_durable_decree = {}, current_learning_status = {}", + _potential_secondary_states.learning_version, + _config.hp_primary, + _config.primary, + _potential_secondary_states.duration_ms(), + err, + last_committed_decree(), + _app->last_committed_decree(), + _app->last_durable_decree(), + enum_to_string(_potential_secondary_states.learning_status)); _potential_secondary_states.learning_round_is_running = false; @@ -1221,8 +1276,9 @@ void replica::handle_learning_error(error_code err, bool is_local_error) _checker.only_one_thread_access(); LOG_ERROR_PREFIX( - "handle_learning_error[{:#018x}]: learnee = {}, learn_duration = {} ms, err = {}, {}", + "handle_learning_error[{:#018x}]: learnee = {}({}), learn_duration = {} ms, err = {}, {}", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), err, @@ -1242,7 +1298,7 @@ void replica::handle_learning_error(error_code err, bool is_local_error) is_local_error ? partition_status::PS_ERROR : partition_status::PS_INACTIVE); } -error_code replica::handle_learning_succeeded_on_primary(::dsn::rpc_address node, +error_code replica::handle_learning_succeeded_on_primary(::dsn::host_port node, uint64_t learn_signature) { auto it = _primary_states.learners.find(node); @@ -1277,12 +1333,14 @@ void replica::notify_learn_completion() report.last_committed_decree_in_prepare_list = last_committed_decree(); report.learner_signature = _potential_secondary_states.learning_version; report.learner_status_ = _potential_secondary_states.learning_status; - report.node = _stub->_primary_address; + report.node = _stub->primary_address(); + report.__set_hp_node(_stub->primary_host_port()); - LOG_INFO_PREFIX("notify_learn_completion[{:#018x}]: learnee = {}, learn_duration = {} ms, " + LOG_INFO_PREFIX("notify_learn_completion[{:#018x}]: learnee = {}({}), learn_duration = {} ms, " "local_committed_decree = {}, app_committed_decree = {}, app_durable_decree = " "{}, current_learning_status = {}", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), last_committed_decree(), @@ -1312,9 +1370,13 @@ void replica::on_learn_completion_notification(const group_check_response &repor { _checker.only_one_thread_access(); + host_port hp_node; + GET_HOST_PORT(report, node, hp_node); + LOG_INFO_PREFIX( - "on_learn_completion_notification[{:#018x}]: learner = {}, learning_status = {}", + "on_learn_completion_notification[{:#018x}]: learner = {}({}), learning_status = {}", report.learner_signature, + hp_node, report.node, enum_to_string(report.learner_status_)); @@ -1322,25 +1384,30 @@ void replica::on_learn_completion_notification(const group_check_response &repor response.err = (partition_status::PS_INACTIVE == status() && _inactive_is_transient) ? ERR_INACTIVE_STATE : ERR_INVALID_STATE; - LOG_ERROR_PREFIX("on_learn_completion_notification[{:#018x}]: learner = {}, this replica " - "is not primary, but {}, reply {}", - report.learner_signature, - report.node, - enum_to_string(status()), - response.err); + LOG_ERROR_PREFIX( + "on_learn_completion_notification[{:#018x}]: learner = {}({}), this replica " + "is not primary, but {}, reply {}", + report.learner_signature, + hp_node, + report.node, + enum_to_string(status()), + response.err); } else if (report.learner_status_ != learner_status::LearningSucceeded) { response.err = ERR_INVALID_STATE; - LOG_ERROR_PREFIX("on_learn_completion_notification[{:#018x}]: learner = {}, learner_status " - "is not LearningSucceeded, but {}, reply ERR_INVALID_STATE", - report.learner_signature, - report.node, - enum_to_string(report.learner_status_)); + LOG_ERROR_PREFIX( + "on_learn_completion_notification[{:#018x}]: learner = {}({}), learner_status " + "is not LearningSucceeded, but {}, reply ERR_INVALID_STATE", + report.learner_signature, + hp_node, + report.node, + enum_to_string(report.learner_status_)); } else { - response.err = handle_learning_succeeded_on_primary(report.node, report.learner_signature); + response.err = handle_learning_succeeded_on_primary(hp_node, report.learner_signature); if (response.err != ERR_OK) { - LOG_ERROR_PREFIX("on_learn_completion_notification[{:#018x}]: learner = {}, handle " + LOG_ERROR_PREFIX("on_learn_completion_notification[{:#018x}]: learner = {}({}), handle " "learning succeeded on primary failed, reply {}", report.learner_signature, + hp_node, report.node, response.err); } @@ -1363,10 +1430,11 @@ void replica::on_learn_completion_notification_reply(error_code err, } if (resp.signature != (int64_t)_potential_secondary_states.learning_version) { - LOG_ERROR_PREFIX("on_learn_completion_notification_reply[{:#018x}]: learnee = {}, " + LOG_ERROR_PREFIX("on_learn_completion_notification_reply[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, signature not matched, current signature on " "primary is [{:#018x}]", report.learner_signature, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), resp.signature); @@ -1374,21 +1442,24 @@ void replica::on_learn_completion_notification_reply(error_code err, return; } - LOG_INFO_PREFIX("on_learn_completion_notification_reply[{:#018x}]: learnee = {}, " + LOG_INFO_PREFIX("on_learn_completion_notification_reply[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, response_err = {}", report.learner_signature, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), resp.err); if (resp.err != ERR_OK) { if (resp.err == ERR_INACTIVE_STATE) { - LOG_WARNING_PREFIX("on_learn_completion_notification_reply[{:#018x}]: learnee = {}, " - "learn_duration = {} ms, learnee is updating ballot, delay to start " - "another round of learning", - report.learner_signature, - _config.primary, - _potential_secondary_states.duration_ms()); + LOG_WARNING_PREFIX( + "on_learn_completion_notification_reply[{:#018x}]: learnee = {}({}), " + "learn_duration = {} ms, learnee is updating ballot, delay to start " + "another round of learning", + report.learner_signature, + _config.hp_primary, + _config.primary, + _potential_secondary_states.duration_ms()); _potential_secondary_states.learning_round_is_running = false; _potential_secondary_states.delay_learning_task = tasking::create_task( LPC_DELAY_LEARN, @@ -1406,8 +1477,9 @@ void replica::on_learn_completion_notification_reply(error_code err, void replica::on_add_learner(const group_check_request &request) { - LOG_INFO_PREFIX("process add learner, primary = {}, ballot ={}, status ={}, " + LOG_INFO_PREFIX("process add learner, primary = {}({}), ballot ={}, status ={}, " "last_committed_decree = {}, duplicating = {}", + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.config.status), @@ -1548,12 +1620,13 @@ error_code replica::apply_learned_state_from_private_log(learn_state &state) LOG_INFO_PREFIX( "apply_learned_state_from_private_log[{}]: duplicating={}, step_back={}, " - "learnee = {}, learn_duration = {} ms, apply private log files done, file_count " + "learnee = {}({}), learn_duration = {} ms, apply private log files done, file_count " "={}, first_learn_start_decree ={}, learn_start_decree = {}, " "app_committed_decree = {}", _potential_secondary_states.learning_version, duplicating, step_back, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), state.files.size(), @@ -1581,20 +1654,22 @@ error_code replica::apply_learned_state_from_private_log(learn_state &state) } if (state.to_decree_included > last_committed_decree()) { - LOG_INFO_PREFIX("apply_learned_state_from_private_log[{}]: learnee ={}, " + LOG_INFO_PREFIX("apply_learned_state_from_private_log[{}]: learnee ={}({}), " "learned_to_decree_included({}) > last_committed_decree({}), commit to " "to_decree_included", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, state.to_decree_included, last_committed_decree()); plist.commit(state.to_decree_included, COMMIT_TO_DECREE_SOFT); } - LOG_INFO_PREFIX(" apply_learned_state_from_private_log[{}]: learnee ={}, " + LOG_INFO_PREFIX(" apply_learned_state_from_private_log[{}]: learnee ={}({}), " "learn_duration ={} ms, apply in-buffer private logs done, " "replay_count ={}, app_committed_decree = {}", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), replay_count, diff --git a/src/replica/replica_restore.cpp b/src/replica/replica_restore.cpp index d4e268cc7b..063a294c46 100644 --- a/src/replica/replica_restore.cpp +++ b/src/replica/replica_restore.cpp @@ -39,7 +39,7 @@ #include "metadata_types.h" #include "replica.h" #include "replica_stub.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -403,7 +403,8 @@ void replica::tell_meta_to_restore_rollback() dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_DROP_APP); ::dsn::marshall(msg, request); - rpc_address target(_stub->_failure_detector->get_servers()); + const auto &target = + dsn::dns_resolver::instance().resolve_address(_stub->_failure_detector->get_servers()); rpc::call(target, msg, &_tracker, @@ -432,7 +433,8 @@ void replica::report_restore_status_to_meta() dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_REPORT_RESTORE_STATUS); ::dsn::marshall(msg, request); - rpc_address target(_stub->_failure_detector->get_servers()); + const auto &target = + dsn::dns_resolver::instance().resolve_address(_stub->_failure_detector->get_servers()); rpc::call(target, msg, &_tracker, diff --git a/src/replica/replica_stub.cpp b/src/replica/replica_stub.cpp index f146fc44be..4f6f73c994 100644 --- a/src/replica/replica_stub.cpp +++ b/src/replica/replica_stub.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -391,7 +390,6 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/, _is_long_subscriber = is_long_subscriber; _failure_detector = nullptr; _state = NS_Disconnected; - _primary_address_str[0] = '\0'; } replica_stub::~replica_stub(void) { close(); } @@ -406,9 +404,9 @@ void replica_stub::initialize(bool clear /* = false*/) void replica_stub::initialize(const replication_options &opts, bool clear /* = false*/) { - _primary_address = dsn_primary_address(); - strcpy(_primary_address_str, _primary_address.to_string()); - LOG_INFO("primary_address = {}", _primary_address_str); + _primary_host_port = dsn_primary_host_port(); + _primary_host_port_cache = _primary_host_port.to_string(); + LOG_INFO("primary_host_port = {}", _primary_host_port_cache); set_options(opts); LOG_INFO("meta_servers = {}", fmt::join(_options.meta_servers, ", ")); @@ -766,7 +764,7 @@ void replica_stub::on_client_write(gpid id, dsn::message_ex *request) if (_verbose_client_log && request) { LOG_INFO("{}@{}: client = {}, code = {}, timeout = {}", id, - _primary_address_str, + _primary_host_port_cache, request->header->from_address, request->header->rpc_name, request->header->client.timeout_ms); @@ -788,7 +786,7 @@ void replica_stub::on_client_read(gpid id, dsn::message_ex *request) if (_verbose_client_log && request) { LOG_INFO("{}@{}: client = {}, code = {}, timeout = {}", id, - _primary_address_str, + _primary_host_port_cache, request->header->from_address, request->header->rpc_name, request->header->client.timeout_ms); @@ -806,16 +804,17 @@ void replica_stub::on_config_proposal(const configuration_update_request &propos if (!is_connected()) { LOG_WARNING("{}@{}: received config proposal {} for {}: not connected, ignore", proposal.config.pid, - _primary_address_str, + _primary_host_port_cache, enum_to_string(proposal.type), proposal.node); return; } - LOG_INFO("{}@{}: received config proposal {} for {}", + LOG_INFO("{}@{}: received config proposal {} for {}({})", proposal.config.pid, - _primary_address_str, + _primary_host_port_cache, enum_to_string(proposal.type), + proposal.hp_node, proposal.node); replica_ptr rep = get_replica(proposal.config.pid); @@ -1064,14 +1063,15 @@ void replica_stub::on_group_check(group_check_rpc rpc) if (!is_connected()) { LOG_WARNING("{}@{}: received group check: not connected, ignore", request.config.pid, - _primary_address_str); + _primary_host_port_cache); return; } - LOG_INFO("{}@{}: received group check, primary = {}, ballot = {}, status = {}, " + LOG_INFO("{}@{}: received group check, primary = {}({}), ballot = {}, status = {}, " "last_committed_decree = {}", request.config.pid, - _primary_address_str, + _primary_host_port_cache, + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.config.status), @@ -1131,17 +1131,19 @@ void replica_stub::on_learn_completion_notification(learn_completion_notificatio void replica_stub::on_add_learner(const group_check_request &request) { if (!is_connected()) { - LOG_WARNING("{}@{}: received add learner, primary = {}, not connected, ignore", + LOG_WARNING("{}@{}: received add learner, primary = {}({}), not connected, ignore", request.config.pid, - _primary_address_str, + _primary_host_port_cache, + request.config.hp_primary, request.config.primary); return; } - LOG_INFO("{}@{}: received add learner, primary = {}, ballot = {}, status = {}, " + LOG_INFO("{}@{}: received add learner, primary = {}({}), ballot = {}, status = {}, " "last_committed_decree = {}", request.config.pid, - _primary_address_str, + _primary_host_port_cache, + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.config.status), @@ -1221,7 +1223,8 @@ void replica_stub::query_configuration_by_node() dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_CONFIG_SYNC); configuration_query_by_node_request req; - req.node = _primary_address; + req.node = primary_address(); + req.__set_hp_node(_primary_host_port); // TODO: send stored replicas may cost network, we shouldn't config the frequency get_local_replicas(req.stored_replicas); @@ -1232,7 +1235,8 @@ void replica_stub::query_configuration_by_node() LOG_INFO("send query node partitions request to meta server, stored_replicas_count = {}", req.stored_replicas.size()); - rpc_address target(_failure_detector->get_servers()); + const auto &target = + dsn::dns_resolver::instance().resolve_address(_failure_detector->get_servers()); _config_query_task = rpc::call(target, msg, @@ -1379,17 +1383,17 @@ void replica_stub::on_node_query_reply_scatter(replica_stub_ptr this_, req.__isset.meta_split_status ? req.meta_split_status : split_status::NOT_SPLIT); } else { - if (req.config.primary == _primary_address) { + if (req.config.hp_primary == _primary_host_port) { LOG_INFO("{}@{}: replica not exists on replica server, which is primary, remove it " "from meta server", req.config.pid, - _primary_address_str); + _primary_host_port_cache); remove_replica_on_meta_server(req.info, req.config); } else { LOG_INFO( "{}@{}: replica not exists on replica server, which is not primary, just ignore", req.config.pid, - _primary_address_str); + _primary_host_port_cache); } } } @@ -1427,23 +1431,24 @@ void replica_stub::remove_replica_on_meta_server(const app_info &info, request->info = info; request->config = config; request->config.ballot++; - request->node = _primary_address; + request->node = primary_address(); + request->__set_hp_node(_primary_host_port); request->type = config_type::CT_DOWNGRADE_TO_INACTIVE; - if (_primary_address == config.primary) { + if (_primary_host_port == config.hp_primary) { request->config.primary.set_invalid(); - } else if (replica_helper::remove_node(_primary_address, request->config.secondaries)) { + request->config.hp_primary.reset(); + } else if (replica_helper::remove_node(primary_address(), request->config.secondaries) && + replica_helper::remove_node(_primary_host_port, request->config.hp_secondaries)) { } else { return; } ::dsn::marshall(msg, *request); - rpc_address target(_failure_detector->get_servers()); - rpc::call(_failure_detector->get_servers(), - msg, - nullptr, - [](error_code err, dsn::message_ex *, dsn::message_ex *) {}); + const auto &target = + dsn::dns_resolver::instance().resolve_address(_failure_detector->get_servers()); + rpc::call(target, msg, nullptr, [](error_code err, dsn::message_ex *, dsn::message_ex *) {}); } void replica_stub::on_meta_server_disconnected() @@ -1507,7 +1512,7 @@ void replica_stub::response_client(gpid id, } LOG_ERROR("{}@{}: {} fail: client = {}, code = {}, timeout = {}, status = {}, error = {}", id, - _primary_address_str, + _primary_host_port_cache, is_read ? "read" : "write", request == nullptr ? "null" : request->header->from_address.to_string(), request == nullptr ? "null" : request->header->rpc_name, @@ -1742,7 +1747,7 @@ void replica_stub::open_replica( // process below LOG_INFO("{}@{}: start to load replica {} group check, dir = {}", id, - _primary_address_str, + _primary_host_port_cache, group_check ? "with" : "without", dir); rep = load_replica(dn, dir.c_str()); @@ -1786,7 +1791,7 @@ void replica_stub::open_replica( "{}@{}: cannot load replica({}.{}), ballot = {}, " "last_committed_decree = {}, but it does not existed!", id, - _primary_address_str, + _primary_host_port_cache, id, app.app_type.c_str(), configuration_update->config.ballot, @@ -1824,8 +1829,9 @@ void replica_stub::open_replica( } if (rep == nullptr) { - LOG_WARNING( - "{}@{}: open replica failed, erase from opening replicas", id, _primary_address_str); + LOG_WARNING("{}@{}: open replica failed, erase from opening replicas", + id, + _primary_host_port_cache); zauto_write_lock l(_replicas_lock); CHECK_GT_MSG(_opening_replicas.erase(id), 0, "replica {} is not in _opening_replicas", id); METRIC_VAR_DECREMENT(opening_replicas); @@ -1845,12 +1851,12 @@ void replica_stub::open_replica( } if (nullptr != group_check) { - rpc::call_one_way_typed(_primary_address, + rpc::call_one_way_typed(primary_address(), RPC_LEARN_ADD_LEARNER, *group_check, group_check->config.pid.thread_hash()); } else if (nullptr != configuration_update) { - rpc::call_one_way_typed(_primary_address, + rpc::call_one_way_typed(primary_address(), RPC_CONFIG_PROPOSAL, *configuration_update, configuration_update->config.pid.thread_hash()); @@ -2084,9 +2090,9 @@ void replica_stub::notify_replica_state_update(const replica_configuration &conf tasking::enqueue( LPC_REPLICA_STATE_CHANGE_NOTIFICATION, &_tracker, - std::bind(_replica_state_subscriber, _primary_address, config, is_closing)); + std::bind(_replica_state_subscriber, _primary_host_port, config, is_closing)); } else { - _replica_state_subscriber(_primary_address, config, is_closing); + _replica_state_subscriber(_primary_host_port, config, is_closing); } } } @@ -2402,7 +2408,7 @@ replica_stub::exec_command_on_replica(const std::vector &args, std::stringstream query_state; query_state << processed << " processed, " << not_found << " not found"; for (auto &kv : results) { - query_state << "\n " << kv.first << "@" << _primary_address_str; + query_state << "\n " << kv.first << "@" << _primary_host_port_cache; if (kv.second.first != partition_status::PS_INVALID) query_state << "@" << (kv.second.first == partition_status::PS_PRIMARY ? "P" : "S"); query_state << " : " << kv.second.second; @@ -2548,7 +2554,7 @@ uint64_t replica_stub::gc_tcmalloc_memory(bool release_all) // // partition split // -void replica_stub::create_child_replica(rpc_address primary_address, +void replica_stub::create_child_replica(host_port primary_address, app_info app, ballot init_ballot, gpid child_gpid, @@ -2691,7 +2697,7 @@ void replica_stub::on_bulk_load(bulk_load_rpc rpc) const bulk_load_request &request = rpc.request(); bulk_load_response &response = rpc.response(); - LOG_INFO("[{}@{}]: receive bulk load request", request.pid, _primary_address_str); + LOG_INFO("[{}@{}]: receive bulk load request", request.pid, _primary_host_port_cache); replica_ptr rep = get_replica(request.pid); if (rep != nullptr) { rep->get_bulk_loader()->on_bulk_load(request, response); @@ -2706,10 +2712,11 @@ void replica_stub::on_group_bulk_load(group_bulk_load_rpc rpc) const group_bulk_load_request &request = rpc.request(); group_bulk_load_response &response = rpc.response(); - LOG_INFO("[{}@{}]: received group bulk load request, primary = {}, ballot = {}, " + LOG_INFO("[{}@{}]: received group bulk load request, primary = {}({}), ballot = {}, " "meta_bulk_load_status = {}", request.config.pid, - _primary_address_str, + _primary_host_port_cache, + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.meta_bulk_load_status)); @@ -2730,7 +2737,7 @@ void replica_stub::on_detect_hotkey(detect_hotkey_rpc rpc) LOG_INFO("[{}@{}]: received detect hotkey request, hotkey_type = {}, detect_action = {}", request.pid, - _primary_address_str, + _primary_host_port_cache, enum_to_string(request.type), enum_to_string(request.action)); diff --git a/src/replica/replica_stub.h b/src/replica/replica_stub.h index 62fb36dbf8..0370c7d0c3 100644 --- a/src/replica/replica_stub.h +++ b/src/replica/replica_stub.h @@ -50,17 +50,19 @@ #include "failure_detector/failure_detector_multimaster.h" #include "metadata_types.h" #include "partition_split_types.h" +#include "ranger/access_type.h" #include "replica.h" #include "replica/mutation_log.h" #include "replica_admin_types.h" -#include "ranger/access_type.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" -#include "security/access_controller.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/serverlet.h" #include "runtime/task/task.h" #include "runtime/task/task_code.h" #include "runtime/task/task_tracker.h" +#include "security/access_controller.h" #include "utils/autoref_ptr.h" #include "utils/error_code.h" #include "utils/flags.h" @@ -114,7 +116,7 @@ class replica_split_manager; typedef std::unordered_map replicas; typedef std::function + ::dsn::host_port /*from*/, const replica_configuration & /*new_config*/, bool /*is_closing*/)> replica_state_subscriber; class replica_stub; @@ -197,8 +199,15 @@ class replica_stub : public serverlet, public ref_counter replication_options &options() { return _options; } const replication_options &options() const { return _options; } bool is_connected() const { return NS_Connected == _state; } - virtual rpc_address get_meta_server_address() const { return _failure_detector->get_servers(); } - rpc_address primary_address() const { return _primary_address; } + virtual rpc_address get_meta_server_address() const + { + return dsn::dns_resolver::instance().resolve_address(_failure_detector->get_servers()); + } + rpc_address primary_address() const + { + return dsn::dns_resolver::instance().resolve_address(_primary_host_port); + } + const host_port &primary_host_port() const { return _primary_host_port; } // // helper methods @@ -218,7 +227,7 @@ class replica_stub : public serverlet, public ref_counter // // called by parent partition, executed by child partition - void create_child_replica(dsn::rpc_address primary_address, + void create_child_replica(dsn::host_port primary_address, app_info app, ballot init_ballot, gpid child_gpid, @@ -446,8 +455,9 @@ class replica_stub : public serverlet, public ref_counter closing_replicas _closing_replicas; closed_replicas _closed_replicas; - ::dsn::rpc_address _primary_address; - char _primary_address_str[64]; + ::dsn::host_port _primary_host_port; + // The stringify of '_primary_host_port', used by logging usually. + std::string _primary_host_port_cache; std::shared_ptr _failure_detector; mutable zlock _state_lock; diff --git a/src/replica/split/replica_split_manager.cpp b/src/replica/split/replica_split_manager.cpp index 9ea5d41806..26d2aa01b1 100644 --- a/src/replica/split/replica_split_manager.cpp +++ b/src/replica/split/replica_split_manager.cpp @@ -35,8 +35,10 @@ #include "replica/replica_stub.h" #include "replica/replication_app_base.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/async_calls.h" #include "runtime/task/task.h" #include "utils/autoref_ptr.h" @@ -144,17 +146,18 @@ void replica_split_manager::parent_start_split( _child_gpid = child_gpid; _child_init_ballot = get_ballot(); - LOG_INFO_PREFIX("start to add child({}), init_ballot={}, status={}, primary_address={}", + LOG_INFO_PREFIX("start to add child({}), init_ballot={}, status={}, primary={}({})", _child_gpid, _child_init_ballot, enum_to_string(status()), + request.config.hp_primary, request.config.primary); tasking::enqueue(LPC_CREATE_CHILD, tracker(), std::bind(&replica_stub::create_child_replica, _stub, - _replica->_config.primary, + _replica->_config.hp_primary, _replica->_app_info, _child_init_ballot, _child_gpid, @@ -165,7 +168,7 @@ void replica_split_manager::parent_start_split( // ThreadPool: THREAD_POOL_REPLICATION void replica_split_manager::child_init_replica(gpid parent_gpid, - rpc_address primary_address, + const host_port &primary_host_port, ballot init_ballot) // on child partition { FAIL_POINT_INJECT_F("replica_child_init_replica", [](absl::string_view) {}); @@ -181,7 +184,8 @@ void replica_split_manager::child_init_replica(gpid parent_gpid, // update replica config _replica->_config.ballot = init_ballot; - _replica->_config.primary = primary_address; + _replica->_config.primary = dsn::dns_resolver::instance().resolve_address(primary_host_port); + _replica->_config.__set_hp_primary(primary_host_port); _replica->_config.status = partition_status::PS_PARTITION_SPLIT; // initialize split context @@ -614,10 +618,12 @@ void replica_split_manager::child_notify_catch_up() // on child partition request->parent_gpid = _replica->_split_states.parent_gpid; request->child_gpid = get_gpid(); request->child_ballot = get_ballot(); - request->child_address = _stub->_primary_address; + request->child = _stub->primary_address(); + request->__set_hp_child(_stub->primary_host_port()); - LOG_INFO_PREFIX("send notification to primary parent[{}@{}], ballot={}", + LOG_INFO_PREFIX("send notification to primary parent[{}@{}({})], ballot={}", _replica->_split_states.parent_gpid, + _replica->_config.hp_primary, _replica->_config.primary, get_ballot()); @@ -647,8 +653,9 @@ void replica_split_manager::child_notify_catch_up() // on child partition child_handle_split_error("notify_primary_split_catch_up failed"); return; } - LOG_INFO_PREFIX("notify primary parent[{}@{}] catch up succeed", + LOG_INFO_PREFIX("notify primary parent[{}@{}({})] catch up succeed", _replica->_split_states.parent_gpid, + _replica->_config.hp_primary, _replica->_config.primary); }); } @@ -680,13 +687,17 @@ void replica_split_manager::parent_handle_child_catch_up( return; } + host_port hp_child; + GET_HOST_PORT(request, child, hp_child); + response.err = ERR_OK; - LOG_INFO_PREFIX("receive catch_up request from {}@{}, current ballot={}", + LOG_INFO_PREFIX("receive catch_up request from {}@{}({}), current ballot={}", request.child_gpid, - request.child_address, + hp_child, + request.child, request.child_ballot); - _replica->_primary_states.caught_up_children.insert(request.child_address); + _replica->_primary_states.caught_up_children.insert(hp_child); // _primary_states.statuses is a map structure: rpc address -> partition_status // it stores replica's rpc address and partition_status of this replica group for (auto &iter : _replica->_primary_states.statuses) { @@ -764,17 +775,17 @@ void replica_split_manager::update_child_group_partition_count( } if (!_replica->_primary_states.learners.empty() || - _replica->_primary_states.membership.secondaries.size() + 1 < + _replica->_primary_states.membership.hp_secondaries.size() + 1 < _replica->_primary_states.membership.max_replica_count) { LOG_ERROR_PREFIX("there are {} learners or not have enough secondaries(count is {})", _replica->_primary_states.learners.size(), - _replica->_primary_states.membership.secondaries.size()); + _replica->_primary_states.membership.hp_secondaries.size()); parent_handle_split_error( "update_child_group_partition_count failed, have learner or lack of secondary", true); return; } - auto not_replied_addresses = std::make_shared>(); + auto not_replied_addresses = std::make_shared>(); // _primary_states.statuses is a map structure: rpc address -> partition_status for (const auto &kv : _replica->_primary_states.statuses) { not_replied_addresses->insert(kv.first); @@ -787,22 +798,25 @@ void replica_split_manager::update_child_group_partition_count( // ThreadPool: THREAD_POOL_REPLICATION void replica_split_manager::parent_send_update_partition_count_request( - const rpc_address &address, + const host_port &hp, int32_t new_partition_count, - std::shared_ptr> ¬_replied_addresses) // on primary parent + std::shared_ptr> ¬_replied_addresses) // on primary parent { FAIL_POINT_INJECT_F("replica_parent_update_partition_count_request", [](absl::string_view) {}); CHECK_EQ_PREFIX(status(), partition_status::PS_PRIMARY); + const auto &address = dsn::dns_resolver::instance().resolve_address(hp); auto request = std::make_unique(); request->new_partition_count = new_partition_count; - request->target_address = address; + request->target = address; + request->__set_hp_target(hp); request->child_pid = _child_gpid; request->ballot = get_ballot(); LOG_INFO_PREFIX( - "send update child group partition count request to node({}), new partition_count = {}", + "send update child group partition count request to node({}({})), new partition_count = {}", + hp, address, new_partition_count); update_child_group_partition_count_rpc rpc(std::move(request), @@ -876,7 +890,7 @@ void replica_split_manager::on_update_child_group_partition_count_reply( error_code ec, const update_child_group_partition_count_request &request, const update_child_group_partition_count_response &response, - std::shared_ptr> ¬_replied_addresses) // on primary parent + std::shared_ptr> ¬_replied_addresses) // on primary parent { _replica->_checker.only_one_thread_access(); @@ -902,15 +916,16 @@ void replica_split_manager::on_update_child_group_partition_count_reply( error_code error = (ec == ERR_OK) ? response.err : ec; if (error == ERR_TIMEOUT) { LOG_WARNING_PREFIX( - "failed to update child node({}) partition_count, error = {}, wait and retry", - request.target_address, + "failed to update child node({}({})) partition_count, error = {}, wait and retry", + request.hp_target, + request.target, error); tasking::enqueue( LPC_PARTITION_SPLIT, tracker(), std::bind(&replica_split_manager::parent_send_update_partition_count_request, this, - request.target_address, + request.hp_target, request.new_partition_count, not_replied_addresses), get_gpid().thread_hash(), @@ -919,21 +934,23 @@ void replica_split_manager::on_update_child_group_partition_count_reply( } if (error != ERR_OK) { - LOG_ERROR_PREFIX("failed to update child node({}) partition_count({}), error = {}", - request.target_address, + LOG_ERROR_PREFIX("failed to update child node({}({})) partition_count({}), error = {}", + request.hp_target, + request.target, request.new_partition_count, error); parent_handle_split_error("on_update_child_group_partition_count_reply error", true); return; } - LOG_INFO_PREFIX("update node({}) child({}) partition_count({}) succeed", - request.target_address, + LOG_INFO_PREFIX("update node({}({})) child({}) partition_count({}) succeed", + request.hp_target, + request.target, request.child_pid, request.new_partition_count); // update group partition_count succeed - not_replied_addresses->erase(request.target_address); + not_replied_addresses->erase(request.hp_target); if (not_replied_addresses->empty()) { LOG_INFO_PREFIX("update child({}) group partition_count, new_partition_count = {}", request.child_pid, @@ -975,6 +992,7 @@ void replica_split_manager::register_child_on_meta(ballot b) // on primary paren child_config.ballot++; child_config.last_committed_decree = 0; child_config.last_drops.clear(); + child_config.hp_last_drops.clear(); child_config.pid.set_partition_index(_replica->_app_info.partition_count + get_gpid().get_partition_index()); @@ -982,7 +1000,8 @@ void replica_split_manager::register_child_on_meta(ballot b) // on primary paren request.app = _replica->_app_info; request.child_config = child_config; request.parent_config = _replica->_primary_states.membership; - request.primary_address = _stub->_primary_address; + request.primary = _stub->primary_address(); + request.__set_hp_primary(_stub->primary_host_port()); // reject client request _replica->update_local_configuration_with_no_ballot_change(partition_status::PS_INACTIVE); @@ -1006,7 +1025,8 @@ void replica_split_manager::parent_send_register_request( request.parent_config.ballot, request.child_config.ballot); - rpc_address meta_address(_stub->_failure_detector->get_servers()); + rpc_address meta_address( + dsn::dns_resolver::instance().resolve_address(_stub->_failure_detector->get_servers())); std::unique_ptr req = std::make_unique(request); register_child_rpc rpc(std::move(req), RPC_CM_REGISTER_CHILD_REPLICA, @@ -1206,13 +1226,13 @@ void replica_split_manager::trigger_primary_parent_split( _meta_split_status = meta_split_status; if (meta_split_status == split_status::SPLITTING) { if (!_replica->_primary_states.learners.empty() || - _replica->_primary_states.membership.secondaries.size() + 1 < + _replica->_primary_states.membership.hp_secondaries.size() + 1 < _replica->_primary_states.membership.max_replica_count) { LOG_WARNING_PREFIX( "there are {} learners or not have enough secondaries(count is {}), wait for " "next round", _replica->_primary_states.learners.size(), - _replica->_primary_states.membership.secondaries.size()); + _replica->_primary_states.membership.hp_secondaries.size()); return; } @@ -1479,7 +1499,8 @@ void replica_split_manager::primary_parent_handle_stop_split( return; } - _replica->_primary_states.split_stopped_secondary.insert(req->node); + _replica->_primary_states.split_stopped_secondary.insert( + req->__isset.hp_node ? req->hp_node : host_port::from_address(req->node)); auto count = 0; for (auto &iter : _replica->_primary_states.statuses) { if (iter.second == partition_status::PS_SECONDARY && @@ -1500,7 +1521,8 @@ void replica_split_manager::parent_send_notify_stop_request( split_status::type meta_split_status) // on primary parent { FAIL_POINT_INJECT_F("replica_parent_send_notify_stop_request", [](absl::string_view) {}); - rpc_address meta_address(_stub->_failure_detector->get_servers()); + auto meta_address = + dsn::dns_resolver::instance().resolve_address(_stub->_failure_detector->get_servers()); std::unique_ptr req = std::make_unique(); req->app_name = _replica->_app_info.app_name; req->parent_gpid = get_gpid(); @@ -1531,7 +1553,8 @@ void replica_split_manager::query_child_state() // on primary parent request->pid = get_gpid(); request->partition_count = _replica->_app_info.partition_count; - rpc_address meta_address(_stub->_failure_detector->get_servers()); + rpc_address meta_address( + dsn::dns_resolver::instance().resolve_address(_stub->_failure_detector->get_servers())); LOG_INFO_PREFIX("send query child partition state request to meta server({})", meta_address); query_child_state_rpc rpc( std::move(request), RPC_CM_QUERY_CHILD_STATE, 0_ms, 0, get_gpid().thread_hash()); diff --git a/src/replica/split/replica_split_manager.h b/src/replica/split/replica_split_manager.h index 00c6f81ef9..982e1c2cb3 100644 --- a/src/replica/split/replica_split_manager.h +++ b/src/replica/split/replica_split_manager.h @@ -37,7 +37,7 @@ namespace dsn { class partition_configuration; -class rpc_address; +class host_port; class task_tracker; namespace replication { @@ -76,7 +76,7 @@ class replica_split_manager : replica_base void parent_start_split(const group_check_request &request); // child replica initialize config and state info - void child_init_replica(gpid parent_gpid, rpc_address primary_address, ballot init_ballot); + void child_init_replica(gpid parent_gpid, const host_port &primary_address, ballot init_ballot); void parent_prepare_states(const std::string &dir); @@ -123,9 +123,9 @@ class replica_split_manager : replica_base void update_child_group_partition_count(int32_t new_partition_count); void parent_send_update_partition_count_request( - const rpc_address &address, + const host_port &hp, int32_t new_partition_count, - std::shared_ptr> ¬_replied_addresses); + std::shared_ptr> ¬_replied_addresses); // child update its partition_count void @@ -136,7 +136,7 @@ class replica_split_manager : replica_base error_code ec, const update_child_group_partition_count_request &request, const update_child_group_partition_count_response &response, - std::shared_ptr> ¬_replied_addresses); + std::shared_ptr> ¬_replied_addresses); // all replicas update partition_count in memory and disk void update_local_partition_count(int32_t new_partition_count); diff --git a/src/replica/split/test/replica_split_test.cpp b/src/replica/split/test/replica_split_test.cpp index 646aea1e01..2821d902e1 100644 --- a/src/replica/split/test/replica_split_test.cpp +++ b/src/replica/split/test/replica_split_test.cpp @@ -41,6 +41,7 @@ #include "replica/test/mock_utils.h" #include "replica/test/replica_test_base.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_tracker.h" #include "utils/autoref_ptr.h" @@ -189,10 +190,13 @@ class replica_split_test : public replica_test_base config.max_replica_count = 3; config.pid = PARENT_GPID; config.ballot = INIT_BALLOT; - config.primary = PRIMARY; - config.secondaries.emplace_back(SECONDARY); + config.hp_primary = PRIMARY; + config.primary = PRIMARY_ADDR; + config.__set_hp_secondaries({SECONDARY}); + config.secondaries.emplace_back(SECONDARY_ADDR); if (!lack_of_secondary) { - config.secondaries.emplace_back(SECONDARY2); + config.secondaries.emplace_back(SECONDARY_ADDR2); + config.hp_secondaries.emplace_back(SECONDARY2); } _parent_replica->set_primary_partition_configuration(config); } @@ -202,7 +206,8 @@ class replica_split_test : public replica_test_base { req.child_pid = CHILD_GPID; req.ballot = b; - req.target_address = PRIMARY; + req.target = PRIMARY_ADDR; + req.__set_hp_target(PRIMARY); req.new_partition_count = NEW_PARTITION_COUNT; } @@ -293,7 +298,8 @@ class replica_split_test : public replica_test_base req.child_gpid = CHILD_GPID; req.parent_gpid = PARENT_GPID; req.child_ballot = child_ballot; - req.child_address = PRIMARY; + req.child = PRIMARY_ADDR; + req.__set_hp_child(PRIMARY); notify_cacth_up_response resp; _parent_split_mgr->parent_handle_child_catch_up(req, resp); @@ -325,11 +331,11 @@ class replica_split_test : public replica_test_base mock_update_child_partition_count_request(req, INIT_BALLOT); update_child_group_partition_count_response resp; resp.err = resp_err; - auto not_replied_addresses = std::make_shared>(); - not_replied_addresses->insert(PRIMARY); + auto not_replied_host_ports = std::make_shared>(); + not_replied_host_ports->insert(PRIMARY); _parent_split_mgr->on_update_child_group_partition_count_reply( - ERR_OK, req, resp, not_replied_addresses); + ERR_OK, req, resp, not_replied_host_ports); _parent_replica->tracker()->wait_outstanding_tasks(); _child_replica->tracker()->wait_outstanding_tasks(); return resp.err; @@ -345,7 +351,7 @@ class replica_split_test : public replica_test_base void test_on_register_child_reply(partition_status::type status, dsn::error_code resp_err) { stub->set_state_connected(); - stub->set_rpc_address(PRIMARY); + stub->set_host_port(PRIMARY); mock_parent_split_context(status); _parent_replica->_primary_states.sync_send_write_request = true; _parent_split_mgr->_partition_version = -1; @@ -356,11 +362,13 @@ class replica_split_test : public replica_test_base req.parent_config.pid = PARENT_GPID; req.parent_config.ballot = INIT_BALLOT; req.parent_config.last_committed_decree = DECREE; - req.parent_config.primary = PRIMARY; + req.parent_config.primary = PRIMARY_ADDR; + req.parent_config.__set_hp_primary(PRIMARY); req.child_config.pid = CHILD_GPID; req.child_config.ballot = INIT_BALLOT + 1; req.child_config.last_committed_decree = 0; - req.primary_address = PRIMARY; + req.primary = PRIMARY_ADDR; + req.__set_hp_primary(PRIMARY); register_child_response resp; resp.err = resp_err; @@ -394,7 +402,8 @@ class replica_split_test : public replica_test_base req.app = _parent_replica->_app_info; req.config.ballot = INIT_BALLOT; req.config.status = partition_status::PS_SECONDARY; - req.node = SECONDARY; + req.node = SECONDARY_ADDR; + req.__set_hp_node(SECONDARY); if (meta_split_status == split_status::PAUSING || meta_split_status == split_status::CANCELING) { req.__set_meta_split_status(meta_split_status); @@ -426,7 +435,8 @@ class replica_split_test : public replica_test_base std::shared_ptr req = std::make_shared(); std::shared_ptr resp = std::make_shared(); - req->node = SECONDARY; + req->node = SECONDARY_ADDR; + req->__set_hp_node(SECONDARY); if (meta_split_status != split_status::NOT_SPLIT) { req->__set_meta_split_status(meta_split_status); } @@ -525,9 +535,12 @@ class replica_split_test : public replica_test_base const int32_t APP_ID = 2; const int32_t OLD_PARTITION_COUNT = 8; const int32_t NEW_PARTITION_COUNT = 16; - const rpc_address PRIMARY = rpc_address::from_ip_port("127.0.0.1", 18230); - const rpc_address SECONDARY = rpc_address::from_ip_port("127.0.0.2", 10058); - const rpc_address SECONDARY2 = rpc_address::from_ip_port("127.0.0.3", 10805); + const host_port PRIMARY = host_port("localhost", 18230); + const rpc_address PRIMARY_ADDR = rpc_address::from_ip_port("127.0.0.1", 18230); + const host_port SECONDARY = host_port("localhost", 10058); + const rpc_address SECONDARY_ADDR = rpc_address::from_ip_port("127.0.0.1", 10058); + const host_port SECONDARY2 = host_port("localhost", 10805); + const rpc_address SECONDARY_ADDR2 = rpc_address::from_ip_port("127.0.0.1", 10805); const gpid PARENT_GPID = gpid(APP_ID, 1); const gpid CHILD_GPID = gpid(APP_ID, 9); const ballot INIT_BALLOT = 3; diff --git a/src/replica/storage/simple_kv/simple_kv.app.example.h b/src/replica/storage/simple_kv/simple_kv.app.example.h index 5c6c1f0e59..2d1fd1279e 100644 --- a/src/replica/storage/simple_kv/simple_kv.app.example.h +++ b/src/replica/storage/simple_kv/simple_kv.app.example.h @@ -45,8 +45,8 @@ class simple_kv_client_app : public ::dsn::service_app return ::dsn::ERR_INVALID_PARAMETERS; printf("%s %s %s\n", args[1].c_str(), args[2].c_str(), args[3].c_str()); - const auto meta = rpc_address::from_host_port(args[2]); - _simple_kv_client.reset(new simple_kv_client(args[1].c_str(), {meta}, args[3].c_str())); + const auto hp = host_port::from_string(args[2].c_str()); + _simple_kv_client.reset(new simple_kv_client(args[1].c_str(), {hp}, args[3].c_str())); _timer = ::dsn::tasking::enqueue_timer(LPC_SIMPLE_KV_TEST_TIMER, &_tracker, diff --git a/src/replica/storage/simple_kv/simple_kv.client.h b/src/replica/storage/simple_kv/simple_kv.client.h index 0825f712f5..f6bc2c2f8b 100644 --- a/src/replica/storage/simple_kv/simple_kv.client.h +++ b/src/replica/storage/simple_kv/simple_kv.client.h @@ -27,6 +27,7 @@ #pragma once #include #include "utils/optional.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/task/async_calls.h" #include "client/partition_resolver.h" #include "simple_kv.code.definition.h" @@ -39,7 +40,7 @@ class simple_kv_client { public: simple_kv_client(const char *cluster_name, - const std::vector &meta_list, + const std::vector &meta_list, const char *app_name) { _resolver = partition_resolver::get_resolver(cluster_name, meta_list, app_name); diff --git a/src/replica/storage/simple_kv/test/case.cpp b/src/replica/storage/simple_kv/test/case.cpp index dbefe7ad78..eca2bb7e1b 100644 --- a/src/replica/storage/simple_kv/test/case.cpp +++ b/src/replica/storage/simple_kv/test/case.cpp @@ -534,8 +534,8 @@ void event_on_rpc::init(message_ex *msg, task *tsk) if (msg != nullptr) { _trace_id = fmt::sprintf("%016llx", msg->header->trace_id); _rpc_name = msg->header->rpc_name; - _from = address_to_node(msg->header->from_address); - _to = address_to_node(msg->to_address); + _from = address_to_node(host_port::from_address(msg->header->from_address)); + _to = address_to_node(msg->to_host_port); } } @@ -914,9 +914,9 @@ void client_case_line::get_read_params(int &id, std::string &key, int &timeout_m timeout_ms = _timeout; } -void client_case_line::get_replica_config_params(rpc_address &receiver, +void client_case_line::get_replica_config_params(host_port &receiver, dsn::replication::config_type::type &type, - rpc_address &node) const + host_port &node) const { CHECK_EQ(_type, replica_config); receiver = _config_receiver; @@ -1166,9 +1166,9 @@ bool test_case::check_client_write(int &id, std::string &key, std::string &value return true; } -bool test_case::check_replica_config(rpc_address &receiver, +bool test_case::check_replica_config(host_port &receiver, dsn::replication::config_type::type &type, - rpc_address &node) + host_port &node) { if (!check_client_instruction(client_case_line::replica_config)) return false; diff --git a/src/replica/storage/simple_kv/test/case.h b/src/replica/storage/simple_kv/test/case.h index 6164b2be6e..4e22a3a9b5 100644 --- a/src/replica/storage/simple_kv/test/case.h +++ b/src/replica/storage/simple_kv/test/case.h @@ -35,7 +35,7 @@ #include "common.h" #include "meta_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/error_code.h" #include "utils/fmt_utils.h" #include "utils/singleton.h" @@ -424,9 +424,9 @@ class client_case_line : public case_line bool parse_type_name(const std::string &name); void get_write_params(int &id, std::string &key, std::string &value, int &timeout_ms) const; void get_read_params(int &id, std::string &key, int &timeout_ms) const; - void get_replica_config_params(rpc_address &receiver, + void get_replica_config_params(host_port &receiver, dsn::replication::config_type::type &type, - rpc_address &node) const; + host_port &node) const; bool check_write_result(int id, ::dsn::error_code err, int32_t resp); bool check_read_result(int id, ::dsn::error_code err, const std::string &resp); @@ -445,9 +445,9 @@ class client_case_line : public case_line int _write_resp; std::string _read_resp; - rpc_address _config_receiver; + host_port _config_receiver; dsn::replication::config_type::type _config_type; - rpc_address _config_node; + host_port _config_node; }; USER_DEFINED_ENUM_FORMATTER(client_case_line::client_type) @@ -476,9 +476,9 @@ class test_case : public dsn::utils::singleton void wait_check_client(); void notify_check_client(); bool check_client_write(int &id, std::string &key, std::string &value, int &timeout_ms); - bool check_replica_config(rpc_address &receiver, + bool check_replica_config(host_port &receiver, dsn::replication::config_type::type &type, - rpc_address &node); + host_port &node); bool check_client_read(int &id, std::string &key, int &timeout_ms); void on_end_write(int id, ::dsn::error_code err, int32_t resp); void on_end_read(int id, ::dsn::error_code err, const std::string &resp); diff --git a/src/replica/storage/simple_kv/test/checker.cpp b/src/replica/storage/simple_kv/test/checker.cpp index d87007fcd1..405181897c 100644 --- a/src/replica/storage/simple_kv/test/checker.cpp +++ b/src/replica/storage/simple_kv/test/checker.cpp @@ -50,6 +50,8 @@ #include "replica/replica_stub.h" #include "replica/replication_service_app.h" #include "replica/storage/simple_kv/test/common.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_engine.h" #include "runtime/service_app.h" #include "runtime/service_engine.h" @@ -73,7 +75,7 @@ class checker_partition_guardian : public partition_guardian static bool s_disable_balancer; public: - checker_partition_guardian(meta_service *svc) : partition_guardian(svc) {} + checker_partition_guardian(meta_service *svc) : partition_guardian(svc), _svc(svc) {} pc_status cure(meta_view view, const dsn::gpid &gpid, configuration_proposal_action &action) override { @@ -83,22 +85,26 @@ class checker_partition_guardian : public partition_guardian return pc_status::healthy; pc_status result; - if (pc.primary.is_invalid()) { - if (pc.secondaries.size() > 0) { + if (pc.hp_primary.is_invalid()) { + if (pc.hp_secondaries.size() > 0) { action.node = pc.secondaries[0]; - for (unsigned int i = 1; i < pc.secondaries.size(); ++i) - if (pc.secondaries[i] < action.node) + action.__set_hp_node(pc.hp_secondaries[0]); + for (unsigned int i = 1; i < pc.hp_secondaries.size(); ++i) + if (pc.hp_secondaries[i] < action.hp_node) { action.node = pc.secondaries[i]; + action.hp_node = pc.hp_secondaries[i]; + } action.type = config_type::CT_UPGRADE_TO_PRIMARY; result = pc_status::ill; } - else if (pc.last_drops.size() == 0) { - std::vector sort_result; + else if (pc.hp_last_drops.size() == 0) { + std::vector sort_result; sort_alive_nodes(*view.nodes, server_load_balancer::primary_comparator(*view.nodes), sort_result); - action.node = sort_result[0]; + action.node = dsn::dns_resolver::instance().resolve_address(sort_result[0]); + action.__set_hp_node(sort_result[0]); action.type = config_type::CT_ASSIGN_PRIMARY; result = pc_status::ill; } @@ -106,28 +112,33 @@ class checker_partition_guardian : public partition_guardian // DDD else { action.node = *pc.last_drops.rbegin(); + action.__set_hp_node(*pc.hp_last_drops.rbegin()); action.type = config_type::CT_ASSIGN_PRIMARY; - LOG_ERROR("{} enters DDD state, we are waiting for its last primary node {} to " + LOG_ERROR("{} enters DDD state, we are waiting for its last primary node {}({}) to " "come back ...", pc.pid, + action.hp_node, action.node); result = pc_status::dead; } action.target = action.node; + action.__set_hp_target(action.hp_node); } - else if (static_cast(pc.secondaries.size()) + 1 < pc.max_replica_count) { - std::vector sort_result; + else if (static_cast(pc.hp_secondaries.size()) + 1 < pc.max_replica_count) { + std::vector sort_result; sort_alive_nodes( *view.nodes, server_load_balancer::partition_comparator(*view.nodes), sort_result); for (auto &node : sort_result) { if (!is_member(pc, node)) { - action.node = node; + action.node = dsn::dns_resolver::instance().resolve_address(node); + action.__set_hp_node(node); break; } } action.target = pc.primary; + action.__set_hp_target(pc.hp_primary); action.type = config_type::CT_ADD_SECONDARY; result = pc_status::ill; } else { @@ -136,10 +147,10 @@ class checker_partition_guardian : public partition_guardian return result; } - typedef std::function node_comparator; + typedef std::function node_comparator; static void sort_alive_nodes(const node_mapper &nodes, const node_comparator &cmp, - std::vector &sorted_node) + std::vector &sorted_node) { sorted_node.clear(); sorted_node.reserve(nodes.size()); @@ -150,6 +161,8 @@ class checker_partition_guardian : public partition_guardian } std::sort(sorted_node.begin(), sorted_node.end(), cmp); } + + meta_service *_svc; }; bool test_checker::s_inited = false; @@ -205,10 +218,10 @@ bool test_checker::init(const std::string &name, const std::vectorid(); std::string name = node.second->full_name(); - rpc_address paddr = node.second->rpc()->primary_address(); - int port = paddr.port(); - _node_to_address[name] = paddr; - LOG_INFO("=== node_to_address[{}]={}", name, paddr); + const auto &hp = node.second->rpc()->primary_host_port(); + int port = hp.port(); + _node_to_host_port[name] = hp; + LOG_INFO("=== node_to_address[{}]={}", name, hp); _address_to_node[port] = name; LOG_INFO("=== address_to_node[{}]={}", port, name); if (id != port) { @@ -267,7 +280,7 @@ void test_checker::check() } } -void test_checker::on_replica_state_change(::dsn::rpc_address from, +void test_checker::on_replica_state_change(::dsn::host_port from, const replica_configuration &new_config, bool is_closing) { @@ -376,7 +389,7 @@ bool test_checker::check_replica_state(int primary_count, int secondary_count, i return p == primary_count && s == secondary_count && i == inactive_count; } -std::string test_checker::address_to_node_name(rpc_address addr) +std::string test_checker::address_to_node_name(host_port addr) { auto find = _address_to_node.find(addr.port()); if (find != _address_to_node.end()) @@ -384,12 +397,12 @@ std::string test_checker::address_to_node_name(rpc_address addr) return "node@" + boost::lexical_cast(addr.port()); } -rpc_address test_checker::node_name_to_address(const std::string &name) +host_port test_checker::node_name_to_address(const std::string &name) { - auto find = _node_to_address.find(name); - if (find != _node_to_address.end()) + auto find = _node_to_host_port.find(name); + if (find != _node_to_host_port.end()) return find->second; - return rpc_address(); + return host_port(); } void install_checkers() diff --git a/src/replica/storage/simple_kv/test/checker.h b/src/replica/storage/simple_kv/test/checker.h index 0578ac1330..b5d3e2bf7b 100644 --- a/src/replica/storage/simple_kv/test/checker.h +++ b/src/replica/storage/simple_kv/test/checker.h @@ -33,7 +33,7 @@ #include "common.h" #include "meta/meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/simulator.h" #include "utils/singleton.h" @@ -73,10 +73,10 @@ class test_checker : public dsn::utils::singleton bool check_replica_state(int primary_count, int secondary_count, int inactive_count); - std::string address_to_node_name(rpc_address addr); - rpc_address node_name_to_address(const std::string &name); + std::string address_to_node_name(host_port addr); + host_port node_name_to_address(const std::string &name); - void on_replica_state_change(::dsn::rpc_address from, + void on_replica_state_change(::dsn::host_port from, const replica_configuration &new_config, bool is_closing); void on_config_change(const app_mapper &new_config); @@ -92,7 +92,7 @@ class test_checker : public dsn::utils::singleton parti_config _last_config; state_snapshot _last_states; - std::map _node_to_address; // address is primary_address() + std::map _node_to_host_port; // host_port is primary_host_port() std::map _address_to_node; // port is enough for key }; diff --git a/src/replica/storage/simple_kv/test/client.cpp b/src/replica/storage/simple_kv/test/client.cpp index f93dffe1b0..b6117394d0 100644 --- a/src/replica/storage/simple_kv/test/client.cpp +++ b/src/replica/storage/simple_kv/test/client.cpp @@ -38,7 +38,9 @@ #include "replica/storage/simple_kv/simple_kv.client.h" #include "replica/storage/simple_kv/test/common.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/group_host_port.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -66,10 +68,12 @@ ::dsn::error_code simple_kv_client_app::start(const std::vector &ar if (args.size() < 2) return ::dsn::ERR_INVALID_PARAMETERS; - std::vector meta_servers; + std::vector meta_servers; replica_helper::load_meta_servers(meta_servers); _meta_server_group.assign_group("meta_servers"); - _meta_server_group.group_address()->add_list(meta_servers); + for (const auto &hp : meta_servers) { + LOG_WARNING_IF(!_meta_server_group.group_host_port()->add(hp), "duplicate adress {}", hp); + } _simple_kv_client.reset( new application::simple_kv_client("mycluster", meta_servers, "simple_kv.instance0")); @@ -94,9 +98,9 @@ void simple_kv_client_app::run() std::string value; int timeout_ms; - rpc_address receiver; + host_port receiver; dsn::replication::config_type::type type; - rpc_address node; + host_port node; while (!g_done) { if (test_case::instance().check_client_write(id, key, value, timeout_ms)) { @@ -141,9 +145,9 @@ void simple_kv_client_app::begin_write(int id, std::chrono::milliseconds(timeout_ms)); } -void simple_kv_client_app::send_config_to_meta(const rpc_address &receiver, +void simple_kv_client_app::send_config_to_meta(const host_port &receiver, dsn::replication::config_type::type type, - const rpc_address &node) + const host_port &node) { dsn::message_ex *req = dsn::message_ex::create_request(RPC_CM_PROPOSE_BALANCER, 30000); @@ -151,15 +155,17 @@ void simple_kv_client_app::send_config_to_meta(const rpc_address &receiver, request.gpid = g_default_gpid; configuration_proposal_action act; - act.__set_target(receiver); - act.__set_node(node); + act.target = dsn::dns_resolver::instance().resolve_address(receiver); + act.node = dsn::dns_resolver::instance().resolve_address(node); + act.__set_hp_target(receiver); + act.__set_hp_node(node); act.__set_type(type); request.action_list.emplace_back(std::move(act)); request.__set_force(true); dsn::marshall(req, request); - dsn_rpc_call_one_way(_meta_server_group, req); + dsn_rpc_call_one_way(dsn::dns_resolver::instance().resolve_address(_meta_server_group), req); } struct read_context diff --git a/src/replica/storage/simple_kv/test/client.h b/src/replica/storage/simple_kv/test/client.h index 66f71d5aae..623863d4ca 100644 --- a/src/replica/storage/simple_kv/test/client.h +++ b/src/replica/storage/simple_kv/test/client.h @@ -31,12 +31,13 @@ #include #include "meta_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/service_app.h" #include "runtime/task/task_tracker.h" #include "utils/error_code.h" namespace dsn { + namespace replication { namespace application { class simple_kv_client; @@ -57,14 +58,14 @@ class simple_kv_client_app : public ::dsn::service_app void begin_read(int id, const std::string &key, int timeout_ms); void begin_write(int id, const std::string &key, const std::string &value, int timeout_ms); - void send_config_to_meta(const rpc_address &receiver, + void send_config_to_meta(const host_port &receiver, dsn::replication::config_type::type type, - const rpc_address &node); + const host_port &node); private: std::unique_ptr _simple_kv_client; - rpc_address _meta_server_group; - rpc_address _service_addr; + host_port _meta_server_group; + host_port _service_addr; dsn::task_tracker _tracker; }; } diff --git a/src/replica/storage/simple_kv/test/common.cpp b/src/replica/storage/simple_kv/test/common.cpp index 2163136411..5ffb84bfd5 100644 --- a/src/replica/storage/simple_kv/test/common.cpp +++ b/src/replica/storage/simple_kv/test/common.cpp @@ -86,7 +86,7 @@ partition_status::type partition_status_from_short_string(const std::string &str return partition_status::PS_INVALID; } -std::string address_to_node(rpc_address addr) +std::string address_to_node(host_port addr) { if (addr.is_invalid()) return "-"; @@ -94,10 +94,10 @@ std::string address_to_node(rpc_address addr) return test_checker::instance().address_to_node_name(addr); } -rpc_address node_to_address(const std::string &name) +host_port node_to_address(const std::string &name) { if (name == "-") - return rpc_address(); + return host_port(); CHECK(test_checker::s_inited, ""); return test_checker::instance().node_name_to_address(name); } @@ -318,8 +318,8 @@ void parti_config::convert_from(const partition_configuration &c) { pid = c.pid; ballot = c.ballot; - primary = address_to_node(c.primary); - for (auto &s : c.secondaries) + primary = address_to_node(c.hp_primary); + for (auto &s : c.hp_secondaries) secondaries.push_back(address_to_node(s)); std::sort(secondaries.begin(), secondaries.end()); } diff --git a/src/replica/storage/simple_kv/test/common.h b/src/replica/storage/simple_kv/test/common.h index 8124fee348..b7d5874436 100644 --- a/src/replica/storage/simple_kv/test/common.h +++ b/src/replica/storage/simple_kv/test/common.h @@ -37,8 +37,8 @@ #include "common/gpid.h" #include "common/replication_other_types.h" #include "metadata_types.h" -#include "runtime/rpc/rpc_address.h" #include "utils/fmt_utils.h" +#include "runtime/rpc/rpc_host_port.h" namespace dsn { class partition_configuration; @@ -57,10 +57,10 @@ partition_status::type partition_status_from_short_string(const std::string &str // transfer primary_address to node_name // return "-" if addr.is_invalid() // return "node@port" if not found -std::string address_to_node(rpc_address addr); +std::string address_to_node(host_port addr); // transfer node_name to primary_address // return invalid addr if not found -rpc_address node_to_address(const std::string &name); +host_port node_to_address(const std::string &name); bool gpid_from_string(const std::string &str, gpid &gpid); diff --git a/src/replica/test/mock_utils.h b/src/replica/test/mock_utils.h index 0fb2e0806b..6d7725b787 100644 --- a/src/replica/test/mock_utils.h +++ b/src/replica/test/mock_utils.h @@ -34,6 +34,7 @@ #include "replica/replica.h" #include "replica/replica_stub.h" #include "replica/backup/cold_backup_context.h" +#include "runtime/rpc/rpc_host_port.h" DSN_DECLARE_int32(log_private_file_size_mb); @@ -179,11 +180,11 @@ class mock_replica : public replica { _primary_states.membership = pconfig; } - partition_bulk_load_state get_secondary_bulk_load_state(const rpc_address &node) + partition_bulk_load_state get_secondary_bulk_load_state(const host_port &node) { return _primary_states.secondary_bulk_load_states[node]; } - void set_secondary_bulk_load_state(const rpc_address &node, + void set_secondary_bulk_load_state(const host_port &node, const partition_bulk_load_state &state) { _primary_states.secondary_bulk_load_states[node] = state; @@ -277,7 +278,7 @@ class mock_replica_stub : public replica_stub rpc_address get_meta_server_address() const override { - return rpc_address::from_ip_port("127.0.0.2", 12321); + return rpc_address::from_ip_port("127.0.0.1", 12321); } std::map mock_replicas; @@ -374,7 +375,7 @@ class mock_replica_stub : public replica_stub _bulk_load_downloading_count.store(count); } - void set_rpc_address(const rpc_address &address) { _primary_address = address; } + void set_host_port(const host_port &address) { _primary_host_port = address; } }; class mock_log_file : public log_file diff --git a/src/replica/test/replica_test.cpp b/src/replica/test/replica_test.cpp index 718d81021e..7123dd85b7 100644 --- a/src/replica/test/replica_test.cpp +++ b/src/replica/test/replica_test.cpp @@ -271,6 +271,7 @@ TEST_P(replica_test, write_size_limited) auto write_request = dsn::message_ex::create_request(RPC_TEST); auto cleanup = dsn::defer([=]() { delete write_request; }); + header.context.u.is_forwarded = false; write_request->header = &header; std::unique_ptr sim_net( new tools::sim_network_provider(nullptr, nullptr)); diff --git a/src/runtime/api_layer1.h b/src/runtime/api_layer1.h index dc5e317032..6c2d675a13 100644 --- a/src/runtime/api_layer1.h +++ b/src/runtime/api_layer1.h @@ -33,6 +33,7 @@ #include "runtime/api_task.h" #include "common/gpid.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_tracker.h" /*! @@ -215,6 +216,8 @@ replace the underneath implementation of the network (e.g., RDMA, simulated netw extern dsn::rpc_address dsn_primary_address(); +extern dsn::host_port dsn_primary_host_port(); + /*! @defgroup rpc-server Server-Side RPC Primitives diff --git a/src/runtime/rpc/asio_net_provider.cpp b/src/runtime/rpc/asio_net_provider.cpp index 74d3fcb314..1bc85f2d5b 100644 --- a/src/runtime/rpc/asio_net_provider.cpp +++ b/src/runtime/rpc/asio_net_provider.cpp @@ -146,6 +146,7 @@ error_code asio_network_provider::start(rpc_channel channel, int port, bool clie channel); _address = rpc_address(get_local_ipv4(), port); + _hp = ::dsn::host_port::from_address(_address); if (!client_only) { auto v4_addr = boost::asio::ip::address_v4::any(); //(ntohl(_address.ip)); @@ -333,6 +334,19 @@ void asio_udp_provider::do_receive() return; } + // Get the remote endpoint of the socket. + boost::system::error_code ec; + auto remote = _socket->remote_endpoint(ec); + if (ec) { + LOG_ERROR("failed to get the remote endpoint: {}", ec.message()); + do_receive(); + return; + } + + auto ip = remote.address().to_v4().to_ulong(); + auto port = remote.port(); + const auto &remote_addr = ::dsn::rpc_address(ip, port); + auto hdr_format = message_parser::get_header_type(_recv_reader._buffer.data()); if (NET_HDR_INVALID == hdr_format) { LOG_ERROR("{}: asio udp read failed: invalid header type '{}'", @@ -356,7 +370,25 @@ void asio_udp_provider::do_receive() return; } + if (msg->header->from_address != remote_addr) { + if (!msg->header->context.u.is_forwarded) { + msg->header->from_address = remote_addr; + LOG_DEBUG("{}: message's from_address {} is not equal to socket's remote_addr " + "{}, assign it to remote_addr.", + _address, + msg->header->from_address, + remote_addr); + } else { + LOG_DEBUG("{}: message's from_address {} is not equal to socket's remote_addr " + "{}, but it's forwarded message, ignore it!.", + _address, + msg->header->from_address, + remote_addr); + } + } + msg->to_address = _address; + msg->to_host_port = _hp; if (msg->header->context.u.is_request) { on_recv_request(msg, 0); } else { @@ -423,6 +455,8 @@ error_code asio_udp_provider::start(rpc_channel channel, int port, bool client_o } } + _hp = ::dsn::host_port::from_address(_address); + for (int i = 0; i < FLAGS_io_service_worker_count; i++) { _workers.push_back(std::make_shared([this, i]() { task::set_tls_dsn_context(node(), nullptr); diff --git a/src/runtime/rpc/asio_net_provider.h b/src/runtime/rpc/asio_net_provider.h index bbd200c1c5..774a6b5759 100644 --- a/src/runtime/rpc/asio_net_provider.h +++ b/src/runtime/rpc/asio_net_provider.h @@ -37,6 +37,7 @@ #include "runtime/rpc/message_parser.h" #include "runtime/rpc/network.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_spec.h" #include "utils/error_code.h" @@ -79,6 +80,7 @@ class asio_network_provider : public connection_oriented_network virtual error_code start(rpc_channel channel, int port, bool client_only) override; virtual ::dsn::rpc_address address() override { return _address; } + virtual ::dsn::host_port host_port() override { return _hp; } virtual rpc_session_ptr create_client_session(::dsn::rpc_address server_addr) override; private: @@ -93,6 +95,7 @@ class asio_network_provider : public connection_oriented_network std::vector> _io_services; std::vector> _workers; ::dsn::rpc_address _address; + ::dsn::host_port _hp; }; // TODO(Tangyanzhao): change the network model like asio_network_provider @@ -109,6 +112,8 @@ class asio_udp_provider : public network virtual ::dsn::rpc_address address() override { return _address; } + virtual ::dsn::host_port host_port() override { return _hp; } + virtual void inject_drop_message(message_ex *msg, bool is_send) override { // nothing to do for UDP @@ -125,6 +130,7 @@ class asio_udp_provider : public network std::shared_ptr _socket; std::vector> _workers; ::dsn::rpc_address _address; + ::dsn::host_port _hp; message_reader _recv_reader; ::dsn::utils::ex_lock_nr _lock; // [ diff --git a/src/runtime/rpc/dsn_message_parser.cpp b/src/runtime/rpc/dsn_message_parser.cpp index 6c72a06519..b9dc05524d 100644 --- a/src/runtime/rpc/dsn_message_parser.cpp +++ b/src/runtime/rpc/dsn_message_parser.cpp @@ -71,7 +71,7 @@ message_ex *dsn_message_parser::get_message_on_receive(message_reader *reader, if (!is_right_body(msg)) { message_header *header = (message_header *)buf_ptr; LOG_ERROR("dsn message body check failed, id = {}, trace_id = {:#018x}, rpc_name " - "= {}, from_addr = {}", + "= {}, from = {}", header->id, header->trace_id, header->rpc_name, diff --git a/src/runtime/rpc/group_address.h b/src/runtime/rpc/group_address.h index fdd97a798a..6e3783606b 100644 --- a/src/runtime/rpc/group_address.h +++ b/src/runtime/rpc/group_address.h @@ -32,6 +32,7 @@ #include "runtime/rpc/rpc_address.h" #include "utils/api_utilities.h" #include "utils/autoref_ptr.h" +#include "utils/fmt_logging.h" #include "utils/rand.h" #include "utils/synchronize.h" @@ -46,10 +47,7 @@ class rpc_group_address : public ref_counter void add_list(const std::vector &addrs) { for (const auto &addr : addrs) { - // TODO(yingchun): add LOG_WARNING_IF/LOG_ERROR_IF - if (!add(addr)) { - LOG_WARNING("duplicate adress {}", addr); - } + LOG_WARNING_IF(!add(addr), "duplicate adress {}", addr); } } void set_leader(rpc_address addr); diff --git a/src/runtime/rpc/group_host_port.h b/src/runtime/rpc/group_host_port.h index a92937c754..d306fb6321 100644 --- a/src/runtime/rpc/group_host_port.h +++ b/src/runtime/rpc/group_host_port.h @@ -23,7 +23,6 @@ #include #include "runtime/rpc/group_address.h" -#include "runtime/rpc/group_host_port.h" #include "runtime/rpc/rpc_host_port.h" #include "utils/autoref_ptr.h" #include "utils/fmt_logging.h" @@ -55,7 +54,7 @@ class rpc_group_host_port void add_list(const std::vector &hps) { for (const auto &hp : hps) { - LOG_WARNING_IF(!add(hp), "duplicate adress {}", hp); + LOG_WARNING_IF(!add(hp), "duplicate host_port {}", hp); } } void set_leader(const host_port &hp); diff --git a/src/runtime/rpc/network.cpp b/src/runtime/rpc/network.cpp index f169409717..d28363fae6 100644 --- a/src/runtime/rpc/network.cpp +++ b/src/runtime/rpc/network.cpp @@ -388,10 +388,10 @@ rpc_session::rpc_session(connection_oriented_network &net, _message_sent(0), _net(net), _remote_addr(remote_addr), + _remote_host_port(host_port::from_address(remote_addr)), _max_buffer_block_count_per_send(net.max_buffer_block_count_per_send()), _reader(net.message_buffer_block_size()), _parser(parser), - _is_client(is_client), _matcher(_net.engine()->matcher()), _delay_server_receive_ms(0) @@ -433,9 +433,12 @@ void rpc_session::on_failure(bool is_write) bool rpc_session::on_recv_message(message_ex *msg, int delay_ms) { - if (msg->header->from_address.is_invalid()) + if (msg->header->from_address.is_invalid()) { msg->header->from_address = _remote_addr; + } + msg->to_address = _net.address(); + msg->to_host_port = _net.host_port(); msg->io_session = this; // ignore msg if join point return false diff --git a/src/runtime/rpc/network.h b/src/runtime/rpc/network.h index 05bfcda453..3ec3ef42fd 100644 --- a/src/runtime/rpc/network.h +++ b/src/runtime/rpc/network.h @@ -34,6 +34,7 @@ #include "rpc_address.h" #include "runtime/rpc/message_parser.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_spec.h" #include "utils/autoref_ptr.h" @@ -91,6 +92,7 @@ class network // the named address // virtual ::dsn::rpc_address address() = 0; + virtual ::dsn::host_port host_port() = 0; // // this is where the upper rpc engine calls down for a RPC call @@ -227,6 +229,7 @@ class rpc_session : public ref_counter bool is_client() const { return _is_client; } dsn::rpc_address remote_address() const { return _remote_addr; } + dsn::host_port remote_host_port() const { return _remote_host_port; } connection_oriented_network &net() const { return _net; } message_parser_ptr parser() const { return _parser; } @@ -328,6 +331,7 @@ class rpc_session : public ref_counter // constant info connection_oriented_network &_net; dsn::rpc_address _remote_addr; + dsn::host_port _remote_host_port; int _max_buffer_block_count_per_send; message_reader _reader; message_parser_ptr _parser; diff --git a/src/runtime/rpc/network.sim.cpp b/src/runtime/rpc/network.sim.cpp index d97d857eec..4aad6b933a 100644 --- a/src/runtime/rpc/network.sim.cpp +++ b/src/runtime/rpc/network.sim.cpp @@ -86,6 +86,7 @@ static message_ex *virtual_send_message(message_ex *msg) blob bb(buffer, 0, msg->header->body_length + sizeof(message_header)); message_ex *recv_msg = message_ex::create_receive_message(bb); recv_msg->to_address = msg->to_address; + recv_msg->to_host_port = msg->to_host_port; msg->copy_to(*recv_msg); // extensible object state move @@ -160,6 +161,7 @@ sim_network_provider::sim_network_provider(rpc_engine *rpc, network *inner_provi : connection_oriented_network(rpc, inner_provider) { _address = rpc_address::from_host_port("localhost", 1); + _hp = ::dsn::host_port::from_address(_address); } error_code sim_network_provider::start(rpc_channel channel, int port, bool client_only) @@ -169,6 +171,7 @@ error_code sim_network_provider::start(rpc_channel channel, int port, bool clien channel); _address = dsn::rpc_address::from_host_port("localhost", port); + _hp = ::dsn::host_port::from_address(_address); auto hostname = boost::asio::ip::host_name(); if (!client_only) { for (int i = NET_HDR_INVALID + 1; i <= network_header_format::max_value(); i++) { diff --git a/src/runtime/rpc/network.sim.h b/src/runtime/rpc/network.sim.h index 279eafe742..ed91f1f09f 100644 --- a/src/runtime/rpc/network.sim.h +++ b/src/runtime/rpc/network.sim.h @@ -31,6 +31,7 @@ #include "runtime/rpc/message_parser.h" #include "runtime/rpc/network.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_spec.h" #include "utils/error_code.h" @@ -91,6 +92,7 @@ class sim_network_provider : public connection_oriented_network virtual error_code start(rpc_channel channel, int port, bool client_only); virtual ::dsn::rpc_address address() { return _address; } + virtual ::dsn::host_port host_port() { return _hp; } virtual rpc_session_ptr create_client_session(::dsn::rpc_address server_addr) { @@ -109,6 +111,7 @@ class sim_network_provider : public connection_oriented_network private: ::dsn::rpc_address _address; + ::dsn::host_port _hp; }; //------------- inline implementations ------------- diff --git a/src/runtime/rpc/rpc_address.h b/src/runtime/rpc/rpc_address.h index ef70726159..aaba4011da 100644 --- a/src/runtime/rpc/rpc_address.h +++ b/src/runtime/rpc/rpc_address.h @@ -141,7 +141,7 @@ class rpc_address switch (type()) { case HOST_TYPE_IPV4: - return ip() == r.ip() && _addr.v4.port == r.port(); + return ip() == r.ip() && port() == r.port(); case HOST_TYPE_GROUP: return _addr.group.group == r._addr.group.group; default: diff --git a/src/runtime/rpc/rpc_engine.cpp b/src/runtime/rpc/rpc_engine.cpp index 48774a0a12..bb690c19f0 100644 --- a/src/runtime/rpc/rpc_engine.cpp +++ b/src/runtime/rpc/rpc_engine.cpp @@ -36,6 +36,7 @@ #include "runtime/api_layer1.h" #include "runtime/global_config.h" #include "runtime/rpc/group_address.h" +#include "runtime/rpc/group_host_port.h" #include "runtime/rpc/network.h" #include "runtime/rpc/serialization.h" #include "runtime/service_engine.h" @@ -149,6 +150,8 @@ bool rpc_client_matcher::on_recv_reply(network *net, uint64_t key, message_ex *r case GRPC_TO_LEADER: if (req->server_address.group_address()->is_update_leader_automatically()) { req->server_address.group_address()->set_leader(addr); + req->server_host_port.group_host_port()->set_leader( + host_port::from_address(addr)); } break; default: @@ -177,6 +180,8 @@ bool rpc_client_matcher::on_recv_reply(network *net, uint64_t key, message_ex *r req->server_address.group_address()->is_update_leader_automatically()) { req->server_address.group_address()->set_leader( reply->header->from_address); + req->server_host_port.group_host_port()->set_leader( + host_port::from_address(reply->header->from_address)); } break; default: @@ -517,9 +522,11 @@ error_code rpc_engine::start(const service_app_spec &aspec) _local_primary_address = _client_nets[NET_HDR_DSN][0]->address(); _local_primary_address.set_port(aspec.ports.size() > 0 ? *aspec.ports.begin() : aspec.id); + _local_primary_host_port = host_port::from_address(_local_primary_address); - LOG_INFO("=== service_node=[{}], primary_address=[{}] ===", + LOG_INFO("=== service_node=[{}], primary_address=[{}({})] ===", _node->full_name(), + _local_primary_host_port, _local_primary_address); _is_running = true; @@ -616,7 +623,7 @@ void rpc_engine::on_recv_request(network *net, message_ex *msg, int delay_ms) void rpc_engine::call(message_ex *request, const rpc_response_task_ptr &call) { auto &hdr = *request->header; - hdr.from_address = primary_address(); + hdr.from_address = _local_primary_address; hdr.trace_id = rand::next_u64(std::numeric_limits::min(), std::numeric_limits::max()); @@ -668,6 +675,7 @@ void rpc_engine::call_ip(rpc_address addr, } request->to_address = addr; + request->to_host_port = host_port::from_address(addr); auto sp = task_spec::get(request->local_rpc_code); auto &hdr = *request->header; @@ -833,7 +841,7 @@ void rpc_engine::forward(message_ex *request, rpc_address address) task_spec::get(request->local_rpc_code)->name, request->header->trace_id); CHECK_NE_MSG(address, - primary_address(), + _local_primary_address, "cannot forward msg {} (trace_id = {:#018x}) to the local node", task_spec::get(request->local_rpc_code)->name, request->header->trace_id); diff --git a/src/runtime/rpc/rpc_engine.h b/src/runtime/rpc/rpc_engine.h index 0613b721aa..47980afbde 100644 --- a/src/runtime/rpc/rpc_engine.h +++ b/src/runtime/rpc/rpc_engine.h @@ -36,6 +36,7 @@ #include "network.h" #include "runtime/api_task.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task.h" #include "runtime/task/task_code.h" @@ -63,8 +64,7 @@ struct service_app_spec; // the RPC request message is sent to. In this case, a shared rpc_engine level matcher is used. // // WE NOW USE option (3) so as to enable more features and the performance should not be degraded -// (due to -// less std::shared_ptr operations in rpc_timeout_task +// (due to less std::shared_ptr operations in rpc_timeout_task) // #define MATCHER_BUCKET_NR 13 class rpc_client_matcher : public ref_counter @@ -175,6 +175,7 @@ class rpc_engine // service_node *node() const { return _node; } ::dsn::rpc_address primary_address() const { return _local_primary_address; } + host_port primary_host_port() const { return _local_primary_host_port; } rpc_client_matcher *matcher() { return &_rpc_matcher; } // call with group address only @@ -202,6 +203,7 @@ class rpc_engine std::unordered_map>> _server_nets; // > ::dsn::rpc_address _local_primary_address; + host_port _local_primary_host_port; rpc_client_matcher _rpc_matcher; rpc_server_dispatcher _rpc_dispatcher; diff --git a/src/runtime/rpc/rpc_host_port.cpp b/src/runtime/rpc/rpc_host_port.cpp index 91ed9f01bd..c2cb5b2778 100644 --- a/src/runtime/rpc/rpc_host_port.cpp +++ b/src/runtime/rpc/rpc_host_port.cpp @@ -17,6 +17,7 @@ * under the License. */ +#include #include #include #include @@ -42,8 +43,11 @@ const host_port host_port::s_invalid_host_port; host_port::host_port(std::string host, uint16_t port) : _host(std::move(host)), _port(port), _type(HOST_TYPE_IPV4) { - // ipv4_from_host may be slow, just call it in DEBUG version. - DCHECK_OK(rpc_address::ipv4_from_host(_host, nullptr), "invalid hostname: {}", _host); + // Solve the problem of not translating "0.0.0.0" + if (_host != "0.0.0.0") { + // ipv4_from_host may be slow, just call it in DEBUG version. + DCHECK_OK(rpc_address::ipv4_from_host(_host, nullptr), "invalid hostname: {}", _host); + } } host_port host_port::from_address(rpc_address addr) @@ -169,11 +173,14 @@ error_s host_port::resolve_addresses(std::vector &addresses) const __builtin_unreachable(); } - // 1. Try to resolve hostname in the form of "localhost:80" or "192.168.0.1:8080". - const auto rpc_addr = rpc_address::from_ip_port(this->to_string()); - if (rpc_addr) { - addresses.emplace_back(rpc_addr); - return error_s::ok(); + // 1. Try to resolve hostname in the form of "192.168.0.1:8080". + uint32_t ip_addr; + if (inet_pton(AF_INET, this->_host.c_str(), &ip_addr)) { + const auto rpc_addr = rpc_address::from_ip_port(this->to_string()); + if (rpc_addr) { + addresses.emplace_back(rpc_addr); + return error_s::ok(); + } } // 2. Try to resolve hostname in the form of "host1:80". @@ -209,4 +216,13 @@ error_s host_port::resolve_addresses(std::vector &addresses) const return error_s::ok(); } +void host_port::fill_host_ports_from_addresses(const std::vector &addr_v, + std::vector &hp_v) +{ + CHECK(hp_v.empty(), "optional host_port should be empty!"); + for (const auto &addr : addr_v) { + hp_v.emplace_back(host_port::from_address(addr)); + } +} + } // namespace dsn diff --git a/src/runtime/rpc/rpc_host_port.h b/src/runtime/rpc/rpc_host_port.h index 4bec096c14..cc1cd70e0b 100644 --- a/src/runtime/rpc/rpc_host_port.h +++ b/src/runtime/rpc/rpc_host_port.h @@ -44,6 +44,16 @@ class TProtocol; } // namespace thrift } // namespace apache +#define GET_HOST_PORT(obj, field, target) \ + do { \ + const auto &_obj = (obj); \ + if (_obj.__isset.hp_##field) { \ + target = _obj.hp_##field; \ + } else { \ + target = std::move(dsn::host_port::from_address(_obj.field)); \ + } \ + } while (0) + namespace dsn { class rpc_group_host_port; @@ -67,6 +77,8 @@ class host_port [[nodiscard]] bool is_invalid() const { return _type == HOST_TYPE_INVALID; } + operator bool() const { return !is_invalid(); } + std::string to_string() const; friend std::ostream &operator<<(std::ostream &os, const host_port &hp) @@ -94,6 +106,9 @@ class host_port uint32_t read(::apache::thrift::protocol::TProtocol *iprot); uint32_t write(::apache::thrift::protocol::TProtocol *oprot) const; + static void fill_host_ports_from_addresses(const std::vector &addr_v, + /*output*/ std::vector &hp_v); + private: friend class dns_resolver; FRIEND_TEST(host_port_test, transfer_rpc_address); diff --git a/src/runtime/rpc/rpc_message.cpp b/src/runtime/rpc/rpc_message.cpp index e691ac3cd4..18caf362bb 100644 --- a/src/runtime/rpc/rpc_message.cpp +++ b/src/runtime/rpc/rpc_message.cpp @@ -35,6 +35,7 @@ #include "network.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "utils/crc.h" #include "utils/flags.h" @@ -221,6 +222,7 @@ message_ex *message_ex::copy(bool clone_content, bool copy_for_receive) message_ex *msg = new message_ex(); msg->to_address = to_address; + msg->to_host_port = to_host_port; msg->local_rpc_code = local_rpc_code; msg->hdr_format = hdr_format; @@ -355,6 +357,7 @@ message_ex *message_ex::create_response() // the primary address. msg->header->from_address = to_address; msg->to_address = header->from_address; + msg->to_host_port = host_port::from_address(header->from_address); msg->io_session = io_session; msg->hdr_format = hdr_format; diff --git a/src/runtime/rpc/rpc_message.h b/src/runtime/rpc/rpc_message.h index d07cb0c5b6..63e4ecb5be 100644 --- a/src/runtime/rpc/rpc_message.h +++ b/src/runtime/rpc/rpc_message.h @@ -33,6 +33,7 @@ #include "common/gpid.h" #include "rpc_address.h" +#include "rpc_host_port.h" #include "runtime/task/task_code.h" #include "runtime/task/task_spec.h" #include "utils/autoref_ptr.h" @@ -95,13 +96,13 @@ typedef struct message_header // Attention: // here, from_address must be IPv4 address, namely we can regard from_address as a - // POD-type structure, so no memory-leak will occur even if we don't call it's + // POD-type structure, so no memory-leak will occur even if we don't call its // destructor. // // generally, it is the from_node's primary address, except the // case described in message_ex::create_response()'s ATTENTION comment. // - // in the forwarding case, the from_address is always the orignal client's address + // in the forwarding case, the from_address is always the original client's address rpc_address from_address; struct @@ -137,6 +138,8 @@ class message_ex : public ref_counter, public extensible_object rpc_session_ptr io_session; // send/recv session rpc_address to_address; // always ipv4/v6 address, it is the to_node's net address rpc_address server_address; // used by requests, and may be of uri/group address + host_port to_host_port; // fqdn from 'to_address' + host_port server_host_port; // fqdn from 'server_address' dsn::task_code local_rpc_code; network_header_format hdr_format; int send_retry_count; diff --git a/src/runtime/rpc/serialization.h b/src/runtime/rpc/serialization.h index 6d9eec3d5a..598dd0163a 100644 --- a/src/runtime/rpc/serialization.h +++ b/src/runtime/rpc/serialization.h @@ -28,10 +28,13 @@ #include "utils/utils.h" #include "rpc_address.h" +#include "rpc_host_port.h" #include "runtime/rpc/rpc_stream.h" #include "common/serialization_helper/thrift_helper.h" namespace dsn { +class partition_configuration; + namespace serialization { template @@ -101,4 +104,7 @@ inline void unmarshall(dsn::message_ex *msg, /*out*/ T &val) unmarshall(reader, val, (dsn_msg_serialize_format)msg->header->context.u.serialize_format); } +template <> +inline void unmarshall(dsn::message_ex *msg, /*out*/ partition_configuration &val); + } // namespace dsn diff --git a/src/runtime/service_api_c.cpp b/src/runtime/service_api_c.cpp index 028555a509..a26f417de3 100644 --- a/src/runtime/service_api_c.cpp +++ b/src/runtime/service_api_c.cpp @@ -51,6 +51,7 @@ #include "runtime/global_config.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_engine.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "security/init.h" #include "security/negotiation_manager.h" @@ -138,6 +139,11 @@ void dsn_coredump() // rpc calls dsn::rpc_address dsn_primary_address() { return ::dsn::task::get_current_rpc()->primary_address(); } +dsn::host_port dsn_primary_host_port() +{ + return ::dsn::task::get_current_rpc()->primary_host_port(); +} + bool dsn_rpc_register_handler(dsn::task_code code, const char *extra_name, const dsn::rpc_request_handler &cb) @@ -156,6 +162,7 @@ void dsn_rpc_call(dsn::rpc_address server, dsn::rpc_response_task *rpc_call) auto msg = rpc_call->get_request(); msg->server_address = server; + msg->server_host_port = dsn::host_port::from_address(server); ::dsn::task::get_current_rpc()->call(msg, dsn::rpc_response_task_ptr(rpc_call)); } @@ -163,6 +170,7 @@ dsn::message_ex *dsn_rpc_call_wait(dsn::rpc_address server, dsn::message_ex *req { auto msg = ((::dsn::message_ex *)request); msg->server_address = server; + msg->server_host_port = dsn::host_port::from_address(server); ::dsn::rpc_response_task *rtask = new ::dsn::rpc_response_task(msg, nullptr, 0); rtask->add_ref(); @@ -183,6 +191,7 @@ void dsn_rpc_call_one_way(dsn::rpc_address server, dsn::message_ex *request) { auto msg = ((::dsn::message_ex *)request); msg->server_address = server; + msg->server_host_port = dsn::host_port::from_address(server); ::dsn::task::get_current_rpc()->call(msg, nullptr); } diff --git a/src/runtime/service_app.h b/src/runtime/service_app.h index 0e6e2cfc86..71e23fc6d7 100644 --- a/src/runtime/service_app.h +++ b/src/runtime/service_app.h @@ -34,7 +34,7 @@ #include "utils/threadpool_code.h" #include "runtime/task/task_code.h" #include "common/gpid.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "common/gpid.h" #include "utils/factory_store.h" #include @@ -88,14 +88,14 @@ class service_app } bool is_started() const { return _started; } - rpc_address primary_address() const { return _address; } - void set_address(const rpc_address &addr) { _address = addr; } + host_port primary_host_port() const { return _host_port; } + void set_host_port(const host_port &hp) { _host_port = hp; } void set_started(bool start_flag) { _started = start_flag; } const service_app_info &info() const; protected: const service_app_info *const _info; - rpc_address _address; + host_port _host_port; bool _started; }; diff --git a/src/runtime/service_engine.cpp b/src/runtime/service_engine.cpp index ee0c742d81..acfca18ab3 100644 --- a/src/runtime/service_engine.cpp +++ b/src/runtime/service_engine.cpp @@ -76,7 +76,7 @@ error_code service_node::init_rpc_engine() dsn::error_code service_node::start_app() { CHECK(_entity, "entity hasn't initialized"); - _entity->set_address(rpc()->primary_address()); + _entity->set_host_port(rpc()->primary_host_port()); std::vector args; utils::split_args(spec().arguments.c_str(), args); diff --git a/src/runtime/test/rpc.cpp b/src/runtime/test/rpc.cpp index b9e2804d84..f3aa9a99e2 100644 --- a/src/runtime/test/rpc.cpp +++ b/src/runtime/test/rpc.cpp @@ -34,6 +34,7 @@ #include "gtest/gtest.h" #include "runtime/rpc/group_address.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -80,9 +81,9 @@ TEST(core, group_address_talk_to_others) auto typed_callback = [addr](error_code err_code, const std::string &result) { EXPECT_EQ(ERR_OK, err_code); LOG_INFO("talk to others callback, result: {}", result); - const auto addr_got = rpc_address::from_ip_port(result); - EXPECT_TRUE(addr_got); - EXPECT_EQ(TEST_PORT_END, addr_got.port()); + const auto hp_got = host_port::from_string(result); + EXPECT_TRUE(hp_got); + EXPECT_EQ(TEST_PORT_END, hp_got.port()); }; ::dsn::task_ptr resp = ::dsn::rpc::call(addr, RPC_TEST_STRING_COMMAND, @@ -100,10 +101,10 @@ TEST(core, group_address_change_leader) auto typed_callback = [addr, &rpc_err](error_code err_code, const std::string &result) -> void { rpc_err = err_code; if (ERR_OK == err_code) { + const auto hp_got = host_port::from_string(result); LOG_INFO("talk to others callback, result: {}", result); - const auto addr_got = rpc_address::from_ip_port(result); - EXPECT_TRUE(addr_got); - EXPECT_EQ(TEST_PORT_END, addr_got.port()); + EXPECT_FALSE(hp_got.is_invalid()); + EXPECT_EQ(TEST_PORT_END, hp_got.port()); } }; @@ -207,8 +208,8 @@ TEST(core, group_address_no_response_2) EXPECT_EQ(ERR_OK, err); std::string result; ::dsn::unmarshall(resp, result); - const auto addr = ::dsn::rpc_address::from_ip_port(result); - EXPECT_EQ(TEST_PORT_END, addr.port()); + const auto hp_got = host_port::from_string(result); + EXPECT_EQ(TEST_PORT_END, hp_got.port()); }; rpc_reply_handler action_on_failure = @@ -224,8 +225,8 @@ TEST(core, group_address_no_response_2) TEST(core, send_to_invalid_address) { ::dsn::rpc_address group = build_group(); - /* here we assume 10.255.254.253:32766 is not assigned */ - group.group_address()->set_leader(dsn::rpc_address::from_ip_port("10.255.254.253", 32766)); + /* here we assume 127.0.0.1:32766 is not assigned */ + group.group_address()->set_leader(dsn::rpc_address::from_ip_port("127.0.0.1", 32766)); rpc_reply_handler action_on_succeed = [](error_code err, dsn::message_ex *, dsn::message_ex *resp) { diff --git a/src/runtime/test_utils.h b/src/runtime/test_utils.h index 5d06cc587d..4b54fde386 100644 --- a/src/runtime/test_utils.h +++ b/src/runtime/test_utils.h @@ -34,11 +34,11 @@ #include "utils/threadpool_code.h" #include "runtime/task/task_code.h" #include "common/gpid.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/serialization.h" #include "runtime/rpc/rpc_stream.h" #include "runtime/serverlet.h" #include "runtime/service_app.h" -#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_worker.h" @@ -93,20 +93,20 @@ class test_client : public ::dsn::serverlet, public ::dsn::service_ ::dsn::unmarshall(message, command); if (command == "expect_talk_to_others") { - dsn::rpc_address next_addr = dsn::service_app::primary_address(); - if (next_addr.port() != TEST_PORT_END) { - next_addr._addr.v4.port++; - LOG_INFO("test_client_server, talk_to_others: {}", next_addr); - dsn_rpc_forward(message, next_addr); + auto next_hp = dsn::service_app::primary_host_port(); + if (next_hp.port() != TEST_PORT_END) { + next_hp = dsn::host_port(next_hp.host(), next_hp.port() + 1); + LOG_INFO("test_client_server, talk_to_others: {}", next_hp); + dsn_rpc_forward(message, dsn::dns_resolver::instance().resolve_address(next_hp)); } else { - LOG_INFO("test_client_server, talk_to_me: {}", next_addr); - reply(message, std::string(next_addr.to_string())); + LOG_INFO("test_client_server, talk_to_me: {}", next_hp); + reply(message, next_hp.to_string()); } } else if (command == "expect_no_reply") { - if (dsn::service_app::primary_address().port() == TEST_PORT_END) { + if (dsn::service_app::primary_host_port().port() == TEST_PORT_END) { LOG_INFO("test_client_server, talk_with_reply: {}", - dsn::service_app::primary_address()); - reply(message, std::string(dsn::service_app::primary_address().to_string())); + dsn::service_app::primary_host_port()); + reply(message, dsn::service_app::primary_host_port().to_string()); } } else if (command.substr(0, 5) == "echo ") { reply(message, command.substr(5)); diff --git a/src/server/available_detector.h b/src/server/available_detector.h index 74a394406c..075c271dd2 100644 --- a/src/server/available_detector.h +++ b/src/server/available_detector.h @@ -27,7 +27,7 @@ #include "dsn.layer2_types.h" #include "perf_counter/perf_counter_wrapper.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_tracker.h" #include "utils/synchronize.h" @@ -76,7 +76,7 @@ class available_detector // client to access server. pegasus_client *_client; std::shared_ptr _ddl_client; - std::vector _meta_list; + std::vector _meta_list; ::dsn::utils::ex_lock_nr _alert_lock; // for record partition fail times. std::vector>> _fail_count; diff --git a/src/server/config.min.ini b/src/server/config.min.ini index 228330531a..870cc171e3 100644 --- a/src/server/config.min.ini +++ b/src/server/config.min.ini @@ -43,7 +43,7 @@ logging_start_level = LOG_LEVEL_INFO [network] - primary_interface = lo + primary_interface = @LOCAL_HOSTNAME@ [block_service.local_service] type = local_service diff --git a/src/server/hotspot_partition_calculator.cpp b/src/server/hotspot_partition_calculator.cpp index b3d481ccf0..52e1bc5034 100644 --- a/src/server/hotspot_partition_calculator.cpp +++ b/src/server/hotspot_partition_calculator.cpp @@ -218,20 +218,20 @@ void hotspot_partition_calculator::send_detect_hotkey_request( std::vector partitions; _shell_context->ddl_client->list_app(app_name, app_id, partition_count, partitions); - auto target_address = partitions[partition_index].primary; dsn::replication::detect_hotkey_response resp; dsn::replication::detect_hotkey_request req; req.type = hotkey_type; req.action = action; req.pid = dsn::gpid(app_id, partition_index); - auto error = _shell_context->ddl_client->detect_hotkey(target_address, req, resp); + auto error = _shell_context->ddl_client->detect_hotkey( + partitions[partition_index].hp_primary, req, resp); - LOG_INFO("{} {} hotkey detection in {}.{}, server address: {}", + LOG_INFO("{} {} hotkey detection in {}.{}, server host_port: {}", (action == dsn::replication::detect_action::STOP) ? "Stop" : "Start", (hotkey_type == dsn::replication::hotkey_type::WRITE) ? "write" : "read", app_name, partition_index, - target_address); + partitions[partition_index].hp_primary); if (error != dsn::ERR_OK) { LOG_ERROR("Hotkey detect rpc sending failed, in {}.{}, error_hint:{}", diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 6523cad4d9..5f504ce428 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -32,7 +32,7 @@ #include "hotspot_partition_calculator.h" #include "pegasus/client.h" #include "result_writer.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/group_host_port.h" #include "runtime/task/async_calls.h" #include "runtime/task/task_code.h" #include "shell/command_executor.h" @@ -71,12 +71,12 @@ DEFINE_TASK_CODE(LPC_PEGASUS_STORAGE_SIZE_STAT_TIMER, info_collector::info_collector() { - std::vector<::dsn::rpc_address> meta_servers; + std::vector<::dsn::host_port> meta_servers; replica_helper::load_meta_servers(meta_servers); _meta_servers.assign_group("meta-servers"); for (auto &ms : meta_servers) { - CHECK(_meta_servers.group_address()->add(ms), ""); + CHECK(_meta_servers.group_host_port()->add(ms), ""); } _cluster_name = dsn::get_current_cluster_name(); diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 649d0dee09..fe168ad7f1 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -29,7 +29,7 @@ #include "perf_counter/perf_counter.h" #include "perf_counter/perf_counter_wrapper.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_tracker.h" #include "shell/command_helper.h" @@ -213,7 +213,7 @@ class info_collector private: dsn::task_tracker _tracker; - ::dsn::rpc_address _meta_servers; + ::dsn::host_port _meta_servers; std::string _cluster_name; std::shared_ptr _shell_context; ::dsn::task_ptr _app_stat_timer_task; diff --git a/src/server/pegasus_server_impl_init.cpp b/src/server/pegasus_server_impl_init.cpp index e2321f2e8f..73afd5f0a6 100644 --- a/src/server/pegasus_server_impl_init.cpp +++ b/src/server/pegasus_server_impl_init.cpp @@ -46,7 +46,7 @@ #include "pegasus_value_schema.h" #include "replica_admin_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "server/capacity_unit_calculator.h" // IWYU pragma: keep #include "server/key_ttl_compaction_filter.h" #include "server/pegasus_read_service.h" @@ -629,7 +629,7 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) METRIC_VAR_INIT_replica(rdb_bloom_filter_point_lookup_positives), METRIC_VAR_INIT_replica(rdb_bloom_filter_point_lookup_true_positives) { - _primary_address = dsn::rpc_address(dsn_primary_address()).to_string(); + _primary_address = dsn_primary_host_port().to_string(); _gpid = get_gpid(); _read_hotkey_collector = diff --git a/src/shell/command_executor.h b/src/shell/command_executor.h index cba3b0a5cf..20b7ad2ed6 100644 --- a/src/shell/command_executor.h +++ b/src/shell/command_executor.h @@ -22,9 +22,10 @@ #include #include #include -#include "client/replication_ddl_client.h" #include +#include "client/replication_ddl_client.h" + #include "sds/sds.h" struct command_executor; @@ -32,7 +33,7 @@ struct shell_context { std::string current_cluster_name; std::string current_app_name; - std::vector meta_list; + std::vector meta_list; std::unique_ptr ddl_client; pegasus::pegasus_client *pg_client; bool escape_all; diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index afc2579e39..af6a964c2b 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -625,20 +625,21 @@ inline void scan_data_next(scan_data_context *context) struct node_desc { std::string desc; - dsn::rpc_address address; - node_desc(const std::string &s, const dsn::rpc_address &n) : desc(s), address(n) {} + dsn::host_port hp; + node_desc(const std::string &s, const dsn::host_port &n) : desc(s), hp(n) {} }; + // type: all | replica-server | meta-server inline bool fill_nodes(shell_context *sc, const std::string &type, std::vector &nodes) { if (type == "all" || type == "meta-server") { - for (auto &addr : sc->meta_list) { - nodes.emplace_back("meta-server", addr); + for (auto &hp : sc->meta_list) { + nodes.emplace_back("meta-server", hp); } } if (type == "all" || type == "replica-server") { - std::map rs_nodes; + std::map rs_nodes; ::dsn::error_code err = sc->ddl_client->list_nodes(dsn::replication::node_status::NS_ALIVE, rs_nodes); if (err != ::dsn::ERR_OK) { @@ -674,8 +675,8 @@ inline std::vector get_metrics(const std::vector &n } \ } while (0) - SET_RESULT_AND_RETURN_IF_URL_NOT_OK(host, nodes[i].address.ipv4_str()); - SET_RESULT_AND_RETURN_IF_URL_NOT_OK(port, nodes[i].address.port()); + SET_RESULT_AND_RETURN_IF_URL_NOT_OK(host, nodes[i].hp.host().c_str()); + SET_RESULT_AND_RETURN_IF_URL_NOT_OK(port, nodes[i].hp.port()); SET_RESULT_AND_RETURN_IF_URL_NOT_OK( path, dsn::metrics_http_service::kMetricsQueryPath.c_str()); SET_RESULT_AND_RETURN_IF_URL_NOT_OK(query, query_string.c_str()); @@ -693,14 +694,14 @@ inline std::vector get_metrics(const std::vector &n do { \ if (dsn_unlikely(!result.error())) { \ std::cout << "ERROR: send http request to query " << fmt::format(what, ##__VA_ARGS__) \ - << " metrics from node " << node.address << " failed: " << result.error() \ + << " metrics from node " << node.hp << " failed: " << result.error() \ << std::endl; \ return true; \ } \ if (dsn_unlikely(result.status() != dsn::http_status_code::kOk)) { \ std::cout << "ERROR: send http request to query " << what << " metrics from node " \ - << node.address \ - << " failed: " << dsn::get_http_status_message(result.status()) << std::endl \ + << node.hp << " failed: " << dsn::get_http_status_message(result.status()) \ + << std::endl \ << result.body() << std::endl; \ return true; \ } \ @@ -711,7 +712,7 @@ inline std::vector get_metrics(const std::vector &n const auto &res = (expr); \ if (dsn_unlikely(!res)) { \ std::cout << "ERROR: parse " << fmt::format(what, ##__VA_ARGS__) \ - << " metrics response from node " << node.address << " failed: " << res \ + << " metrics response from node " << node.hp << " failed: " << res \ << std::endl; \ return true; \ } \ @@ -1071,7 +1072,11 @@ call_remote_command(shell_context *sc, } }; tasks[i] = dsn::dist::cmd::async_call_remote( - nodes[i].address, cmd, arguments, callback, std::chrono::milliseconds(5000)); + dsn::dns_resolver::instance().resolve_address(nodes[i].hp), + cmd, + arguments, + callback, + std::chrono::milliseconds(5000)); } for (int i = 0; i < nodes.size(); ++i) { tasks[i]->wait(); @@ -1445,7 +1450,7 @@ inline stat_var_map create_rates(row_data &row) // partitions should have their primary replicas on this node. inline std::unique_ptr create_table_aggregate_stats_calcs( const std::map> &table_partitions, - const dsn::rpc_address &node, + const dsn::host_port &node, const std::string &entity_type, std::vector &rows) { @@ -1470,7 +1475,7 @@ inline std::unique_ptr create_table_aggregate_stats_calcs row.app_id); for (const auto &partition : table->second) { - if (partition.primary != node) { + if (partition.hp_primary != node) { // Ignore once the replica of the metrics is not the primary of the partition. continue; } @@ -1491,7 +1496,7 @@ inline std::unique_ptr create_table_aggregate_stats_calcs inline std::unique_ptr create_partition_aggregate_stats_calcs(const int32_t table_id, const std::vector &partitions, - const dsn::rpc_address &node, + const dsn::host_port &node, const std::string &entity_type, std::vector &rows) { @@ -1501,7 +1506,7 @@ create_partition_aggregate_stats_calcs(const int32_t table_id, partition_stat_map increases; partition_stat_map rates; for (size_t i = 0; i < rows.size(); ++i) { - if (partitions[i].primary != node) { + if (partitions[i].hp_primary != node) { // Ignore once the replica of the metrics is not the primary of the partition. continue; } @@ -1686,24 +1691,22 @@ get_app_partitions(shell_context *sc, return true; } -inline bool decode_node_perf_counter_info(const dsn::rpc_address &node_addr, +inline bool decode_node_perf_counter_info(const dsn::host_port &hp, const std::pair &result, dsn::perf_counter_info &info) { if (!result.first) { - LOG_ERROR("query perf counter info from node {} failed", node_addr); + LOG_ERROR("query perf counter info from node {} failed", hp); return false; } dsn::blob bb(result.second.data(), 0, result.second.size()); if (!dsn::json::json_forwarder::decode(bb, info)) { - LOG_ERROR( - "decode perf counter info from node {} failed, result = {}", node_addr, result.second); + LOG_ERROR("decode perf counter info from node {} failed, result = {}", hp, result.second); return false; } if (info.result != "OK") { - LOG_ERROR("query perf counter info from node {} returns error, error = {}", - node_addr, - info.result); + LOG_ERROR( + "query perf counter info from node {} returns error, error = {}", hp, info.result); return false; } return true; @@ -1742,7 +1745,7 @@ inline bool get_app_partition_stat(shell_context *sc, for (int i = 0; i < nodes.size(); ++i) { // decode info of perf-counters on node i dsn::perf_counter_info info; - if (!decode_node_perf_counter_info(nodes[i].address, results[i], info)) { + if (!decode_node_perf_counter_info(nodes[i].hp, results[i], info)) { return false; } @@ -1757,7 +1760,7 @@ inline bool get_app_partition_stat(shell_context *sc, // only primary partition will be counted auto find = app_partitions.find(app_id_x); if (find != app_partitions.end() && - find->second[partition_index_x].primary == nodes[i].address) { + find->second[partition_index_x].hp_primary == nodes[i].hp) { row_data &row = rows[app_id_name[app_id_x]][partition_index_x]; row.row_name = std::to_string(partition_index_x); row.app_id = app_id_x; @@ -1817,7 +1820,7 @@ get_table_stats(shell_context *sc, uint32_t sample_interval_ms, std::vectoraggregate_metrics(results_start[i].body(), results_end[i].body()), nodes[i], @@ -1867,7 +1870,7 @@ inline bool get_partition_stats(shell_context *sc, results_end[i], nodes[i], "ending row data requests for table(id={})", table_id); auto calcs = create_partition_aggregate_stats_calcs( - table_id, partitions, nodes[i].address, "replica", rows); + table_id, partitions, nodes[i].hp, "replica", rows); RETURN_SHELL_IF_PARSE_METRICS_FAILED( calcs->aggregate_metrics(results_start[i].body(), results_end[i].body()), nodes[i], @@ -1928,14 +1931,14 @@ inline bool get_capacity_unit_stat(shell_context *sc, nodes_stat.resize(nodes.size()); for (int i = 0; i < nodes.size(); ++i) { - dsn::rpc_address node_addr = nodes[i].address; dsn::perf_counter_info info; - if (!decode_node_perf_counter_info(node_addr, results[i], info)) { - LOG_WARNING("decode perf counter from node({}) failed, just ignore it", node_addr); + if (!decode_node_perf_counter_info(nodes[i].hp, results[i], info)) { + LOG_WARNING("decode perf counter from node({}) failed, just ignore it", nodes[i].hp); continue; } nodes_stat[i].timestamp = info.timestamp_str; - nodes_stat[i].node_address = node_addr.to_string(); + nodes_stat[i].node_address = + dsn::dns_resolver::instance().resolve_address(nodes[i].hp).to_string(); for (dsn::perf_counter_metric &m : info.counters) { int32_t app_id, pidx; std::string counter_name; @@ -1995,10 +1998,9 @@ inline bool get_storage_size_stat(shell_context *sc, app_storage_size_stat &st_s sc, nodes, "perf-counters-by-prefix", {"replica*app.pegasus*disk.storage.sst(MB)"}); for (int i = 0; i < nodes.size(); ++i) { - dsn::rpc_address node_addr = nodes[i].address; dsn::perf_counter_info info; - if (!decode_node_perf_counter_info(node_addr, results[i], info)) { - LOG_WARNING("decode perf counter from node({}) failed, just ignore it", node_addr); + if (!decode_node_perf_counter_info(nodes[i].hp, results[i], info)) { + LOG_WARNING("decode perf counter from node({}) failed, just ignore it", nodes[i].hp); continue; } for (dsn::perf_counter_metric &m : info.counters) { @@ -2013,7 +2015,7 @@ inline bool get_storage_size_stat(shell_context *sc, app_storage_size_stat &st_s if (find == app_partitions.end()) // app id not found continue; dsn::partition_configuration &pc = find->second[partition_index_x]; - if (pc.primary != node_addr) // not primary replica + if (pc.hp_primary != nodes[i].hp) // not primary replica continue; if (pc.partition_flags != 0) // already calculated continue; @@ -2035,11 +2037,15 @@ inline bool get_storage_size_stat(shell_context *sc, app_storage_size_stat &st_s inline configuration_proposal_action new_proposal_action(const dsn::rpc_address &target, const dsn::rpc_address &node, + const dsn::host_port &hp_target, + const dsn::host_port &hp_node, config_type::type type) { configuration_proposal_action act; act.__set_target(target); act.__set_node(node); + act.__set_hp_target(hp_target); + act.__set_hp_node(hp_node); act.__set_type(type); return act; } diff --git a/src/shell/command_utils.cpp b/src/shell/command_utils.cpp index afe08ffe87..fb3be3bced 100644 --- a/src/shell/command_utils.cpp +++ b/src/shell/command_utils.cpp @@ -22,21 +22,21 @@ #include "client/replication_ddl_client.h" #include "command_executor.h" #include "meta_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/error_code.h" bool validate_ip(shell_context *sc, const std::string &ip_str, - dsn::rpc_address &target_address, + dsn::host_port &target_hp, std::string &err_info) { - target_address = dsn::rpc_address::from_ip_port(ip_str); - if (!target_address) { - err_info = fmt::format("invalid ip:port={}, can't transform it into rpc_address", ip_str); + target_hp = dsn::host_port::from_string(ip_str); + if (!target_hp) { + err_info = fmt::format("invalid ip:port={}, can't transform it into host_port", ip_str); return false; } - std::map nodes; + std::map nodes; auto error = sc->ddl_client->list_nodes(dsn::replication::node_status::NS_INVALID, nodes); if (error != dsn::ERR_OK) { err_info = fmt::format("list nodes failed, error={}", error.to_string()); @@ -44,7 +44,7 @@ bool validate_ip(shell_context *sc, } for (const auto &node : nodes) { - if (target_address == node.first) { + if (target_hp == node.first) { return true; } } diff --git a/src/shell/command_utils.h b/src/shell/command_utils.h index f4a126df85..5e1095d9a1 100644 --- a/src/shell/command_utils.h +++ b/src/shell/command_utils.h @@ -30,7 +30,7 @@ #include "utils/strings.h" namespace dsn { -class rpc_address; +class host_port; } struct shell_context; @@ -67,7 +67,7 @@ inline bool validate_cmd(const argh::parser &cmd, bool validate_ip(shell_context *sc, const std::string &ip_str, - /*out*/ dsn::rpc_address &target_address, + /*out*/ dsn::host_port &target_hp, /*out*/ std::string &err_info); // Print messages to stderr and return false if `exp` is evaluated to false. diff --git a/src/shell/commands/data_operations.cpp b/src/shell/commands/data_operations.cpp index 065d6a55dc..87c6110b9d 100644 --- a/src/shell/commands/data_operations.cpp +++ b/src/shell/commands/data_operations.cpp @@ -50,7 +50,7 @@ #include "pegasus_key_schema.h" #include "pegasus_utils.h" #include "rrdb/rrdb_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/async_calls.h" #include "shell/args.h" #include "shell/command_executor.h" @@ -2232,7 +2232,7 @@ inline dsn::metric_filters rdb_estimated_keys_filters(int32_t table_id) std::unique_ptr create_rdb_estimated_keys_stats_calcs(const int32_t table_id, const std::vector &partitions, - const dsn::rpc_address &node, + const dsn::host_port &node, const std::string &entity_type, std::vector &rows) { @@ -2240,7 +2240,7 @@ create_rdb_estimated_keys_stats_calcs(const int32_t table_id, partition_stat_map sums; for (size_t i = 0; i < rows.size(); ++i) { - if (partitions[i].primary != node) { + if (partitions[i].hp_primary != node) { // Ignore once the replica of the metrics is not the primary of the partition. continue; } @@ -2290,7 +2290,7 @@ bool get_rdb_estimated_keys_stats(shell_context *sc, results[i], nodes[i], "rdb_estimated_keys for table(id={})", table_id); auto calcs = create_rdb_estimated_keys_stats_calcs( - table_id, partitions, nodes[i].address, "replica", rows); + table_id, partitions, nodes[i].hp, "replica", rows); RETURN_SHELL_IF_PARSE_METRICS_FAILED(calcs->aggregate_metrics(results[i].body()), nodes[i], "rdb_estimated_keys for table(id={})", @@ -2885,13 +2885,13 @@ bool calculate_hash_value(command_executor *e, shell_context *sc, arguments args tp.add_row_name_and_data("partition_index", partition_index); if (partitions.size() > partition_index) { ::dsn::partition_configuration &pc = partitions[partition_index]; - tp.add_row_name_and_data("primary", pc.primary.to_string()); + tp.add_row_name_and_data("primary", pc.hp_primary.to_string()); std::ostringstream oss; - for (int i = 0; i < pc.secondaries.size(); ++i) { + for (int i = 0; i < pc.hp_secondaries.size(); ++i) { if (i != 0) oss << ","; - oss << pc.secondaries[i]; + oss << pc.hp_secondaries[i]; } tp.add_row_name_and_data("secondaries", oss.str()); } diff --git a/src/shell/commands/detect_hotkey.cpp b/src/shell/commands/detect_hotkey.cpp index 6d300d31d3..c78f906c67 100644 --- a/src/shell/commands/detect_hotkey.cpp +++ b/src/shell/commands/detect_hotkey.cpp @@ -24,7 +24,7 @@ #include "client/replication_ddl_client.h" #include "common/gpid.h" #include "replica_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "shell/argh.h" #include "shell/command_executor.h" #include "shell/command_utils.h" @@ -100,10 +100,10 @@ bool detect_hotkey(command_executor *e, shell_context *sc, arguments args) return false; } - dsn::rpc_address target_address; + dsn::host_port target_hp; std::string err_info; std::string ip_str = cmd({"-d", "--address"}).str(); - if (!validate_ip(sc, ip_str, target_address, err_info)) { + if (!validate_ip(sc, ip_str, target_hp, err_info)) { fmt::print(stderr, "{}\n", err_info); return false; } @@ -118,7 +118,7 @@ bool detect_hotkey(command_executor *e, shell_context *sc, arguments args) } detect_hotkey_response resp; - auto err = sc->ddl_client->detect_hotkey(dsn::rpc_address(target_address), req, resp); + auto err = sc->ddl_client->detect_hotkey(target_hp, req, resp); if (err != dsn::ERR_OK) { fmt::print(stderr, "Hotkey detection rpc sending failed, in {}.{}, error_hint:{}\n", diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index b81c76f8aa..ee813cc02b 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -38,7 +38,9 @@ #include "common/replication_enums.h" #include "dsn.layer2_types.h" #include "meta_admin_types.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "shell/command_executor.h" #include "shell/command_helper.h" #include "shell/command_utils.h" @@ -52,7 +54,6 @@ #include "utils/output_utils.h" #include "utils/ports.h" #include "utils/strings.h" -#include "utils/utils.h" DSN_DEFINE_uint32(shell, nodes_sample_interval_ms, 1000, "The interval between sampling metrics."); DSN_DEFINE_validator(nodes_sample_interval_ms, [](uint32_t value) -> bool { return value > 0; }); @@ -312,24 +313,23 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) status); } - std::map nodes; + std::map nodes; auto r = sc->ddl_client->list_nodes(s, nodes); if (r != dsn::ERR_OK) { std::cout << "list nodes failed, error=" << r << std::endl; return true; } - std::map tmp_map; + std::map tmp_map; int alive_node_count = 0; for (auto &kv : nodes) { if (kv.second == dsn::replication::node_status::NS_ALIVE) alive_node_count++; std::string status_str = dsn::enum_to_string(kv.second); status_str = status_str.substr(status_str.find("NS_") + 3); - std::string node_name = kv.first.to_string(); + auto node_name = kv.first.to_string(); if (resolve_ip) { - // TODO: put hostname_from_ip_port into common utils - dsn::utils::hostname_from_ip_port(node_name.c_str(), &node_name); + node_name = dsn::dns_resolver::instance().resolve_address(kv.first).to_string(); } tmp_map.emplace(kv.first, list_nodes_helper(node_name, status_str)); } @@ -353,14 +353,14 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) } for (const dsn::partition_configuration &p : partitions) { - if (!p.primary.is_invalid()) { - auto find = tmp_map.find(p.primary); + if (!p.hp_primary.is_invalid()) { + auto find = tmp_map.find(p.hp_primary); if (find != tmp_map.end()) { find->second.primary_count++; } } - for (const dsn::rpc_address &addr : p.secondaries) { - auto find = tmp_map.find(addr); + for (const auto &hp : p.hp_secondaries) { + auto find = tmp_map.find(hp); if (find != tmp_map.end()) { find->second.secondary_count++; } @@ -379,7 +379,7 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) const auto &results = get_metrics(nodes, resource_usage_filters().to_query_string()); for (size_t i = 0; i < nodes.size(); ++i) { - auto tmp_it = tmp_map.find(nodes[i].address); + auto tmp_it = tmp_map.find(nodes[i].hp); if (tmp_it == tmp_map.end()) { continue; } @@ -405,7 +405,7 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) const auto &results_end = get_metrics(nodes, query_string); for (size_t i = 0; i < nodes.size(); ++i) { - auto tmp_it = tmp_map.find(nodes[i].address); + auto tmp_it = tmp_map.find(nodes[i].hp); if (tmp_it == tmp_map.end()) { continue; } @@ -444,7 +444,7 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) const auto &results = get_metrics(nodes, profiler_latency_filters().to_query_string()); for (size_t i = 0; i < nodes.size(); ++i) { - auto tmp_it = tmp_map.find(nodes[i].address); + auto tmp_it = tmp_map.find(nodes[i].hp); if (tmp_it == tmp_map.end()) { continue; } @@ -641,7 +641,7 @@ bool remote_command(command_executor *e, shell_context *sc, arguments args) } for (std::string &token : tokens) { - const auto node = dsn::rpc_address::from_host_port(token); + const auto node = dsn::host_port::from_string(token); if (!node) { fprintf(stderr, "parse %s as a ip:port node failed\n", token.c_str()); return true; @@ -666,9 +666,9 @@ bool remote_command(command_executor *e, shell_context *sc, arguments args) node_desc &n = node_list[i]; std::string hostname; if (resolve_ip) { - dsn::utils::hostname_from_ip_port(n.address.to_string(), &hostname); + hostname = dsn::dns_resolver::instance().resolve_address(n.hp).to_string(); } else { - hostname = n.address.to_string(); + hostname = n.hp.to_string(); } fprintf(stderr, "CALL [%s] [%s] ", n.desc.c_str(), hostname.c_str()); if (results[i].first) { diff --git a/src/shell/commands/rebalance.cpp b/src/shell/commands/rebalance.cpp index 74c5f43490..853e00274e 100644 --- a/src/shell/commands/rebalance.cpp +++ b/src/shell/commands/rebalance.cpp @@ -31,7 +31,9 @@ #include "client/replication_ddl_client.h" #include "common/gpid.h" #include "meta_admin_types.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "shell/command_executor.h" #include "shell/command_helper.h" #include "shell/command_utils.h" @@ -88,7 +90,7 @@ bool propose(command_executor *e, shell_context *sc, arguments args) dverify(args.argc >= 9); dsn::replication::configuration_balancer_request request; request.gpid.set_app_id(-1); - dsn::rpc_address target, node; + dsn::host_port target, node; std::string proposal_type = "CT_"; request.force = false; bool ans; @@ -112,12 +114,12 @@ bool propose(command_executor *e, shell_context *sc, arguments args) proposal_type += optarg; break; case 't': - target = dsn::rpc_address::from_host_port(optarg); - PRINT_AND_RETURN_FALSE_IF_NOT(target, "parse {} as target_address failed\n", optarg); + target = dsn::host_port::from_string(optarg); + PRINT_AND_RETURN_FALSE_IF_NOT(target, "parse {} as target_host_port failed\n", optarg); break; case 'n': - node = dsn::rpc_address::from_host_port(optarg); - PRINT_AND_RETURN_FALSE_IF_NOT(node, "parse {} as node failed\n", optarg); + node = dsn::host_port::from_string(optarg); + PRINT_AND_RETURN_FALSE_IF_NOT(target, "parse {} as node failed\n", optarg); break; default: return false; @@ -132,7 +134,12 @@ bool propose(command_executor *e, shell_context *sc, arguments args) type_from_string(_config_type_VALUES_TO_NAMES, proposal_type, config_type::CT_INVALID); PRINT_AND_RETURN_FALSE_IF_NOT( tp != config_type::CT_INVALID, "parse {} as config_type failed.\n", proposal_type); - request.action_list = {new_proposal_action(target, node, tp)}; + request.action_list = { + new_proposal_action(dsn::dns_resolver::instance().resolve_address(target), + dsn::dns_resolver::instance().resolve_address(node), + target, + node, + tp)}; dsn::error_code err = sc->ddl_client->send_balancer_proposal(request); std::cout << "send proposal response: " << err << std::endl; return true; @@ -152,7 +159,7 @@ bool balance(command_executor *e, shell_context *sc, arguments args) dsn::replication::configuration_balancer_request request; request.gpid.set_app_id(-1); std::string balance_type; - dsn::rpc_address from, to; + dsn::host_port from, to; bool ans; optind = 0; @@ -174,14 +181,14 @@ bool balance(command_executor *e, shell_context *sc, arguments args) balance_type = optarg; break; case 'f': - from = dsn::rpc_address::from_host_port(optarg); + from = dsn::host_port::from_string(optarg); if (!from) { - fprintf(stderr, "parse %s as from_address failed\n", optarg); + fprintf(stderr, "parse %s as from_host_port failed\n", optarg); return false; } break; case 't': - to = dsn::rpc_address::from_host_port(optarg); + to = dsn::host_port::from_string(optarg); if (!to) { fprintf(stderr, "parse %s as target_address failed\n", optarg); return false; @@ -194,20 +201,31 @@ bool balance(command_executor *e, shell_context *sc, arguments args) std::vector &actions = request.action_list; actions.reserve(4); + const auto &from_addr = dsn::dns_resolver::instance().resolve_address(from); + const auto &to_addr = dsn::dns_resolver::instance().resolve_address(to); if (balance_type == "move_pri") { + actions.emplace_back(new_proposal_action( + from_addr, from_addr, from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); actions.emplace_back( - new_proposal_action(from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); - actions.emplace_back(new_proposal_action(to, to, config_type::CT_UPGRADE_TO_PRIMARY)); + new_proposal_action(to_addr, to_addr, to, to, config_type::CT_UPGRADE_TO_PRIMARY)); } else if (balance_type == "copy_pri") { - actions.emplace_back(new_proposal_action(from, to, config_type::CT_ADD_SECONDARY_FOR_LB)); + actions.emplace_back(new_proposal_action( + from_addr, to_addr, from, to, config_type::CT_ADD_SECONDARY_FOR_LB)); + actions.emplace_back(new_proposal_action( + from_addr, from_addr, from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); actions.emplace_back( - new_proposal_action(from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); - actions.emplace_back(new_proposal_action(to, to, config_type::CT_UPGRADE_TO_PRIMARY)); + new_proposal_action(to_addr, to_addr, to, to, config_type::CT_UPGRADE_TO_PRIMARY)); } else if (balance_type == "copy_sec") { - actions.emplace_back( - new_proposal_action(dsn::rpc_address(), to, config_type::CT_ADD_SECONDARY_FOR_LB)); - actions.emplace_back( - new_proposal_action(dsn::rpc_address(), from, config_type::CT_DOWNGRADE_TO_INACTIVE)); + actions.emplace_back(new_proposal_action(dsn::rpc_address(), + to_addr, + dsn::host_port(), + to, + config_type::CT_ADD_SECONDARY_FOR_LB)); + actions.emplace_back(new_proposal_action(dsn::rpc_address(), + from_addr, + dsn::host_port(), + from, + config_type::CT_DOWNGRADE_TO_INACTIVE)); } else { fprintf(stderr, "parse %s as a balance type failed\n", balance_type.c_str()); return false; diff --git a/src/shell/commands/recovery.cpp b/src/shell/commands/recovery.cpp index 13f262dc50..15d1aded07 100644 --- a/src/shell/commands/recovery.cpp +++ b/src/shell/commands/recovery.cpp @@ -32,7 +32,8 @@ #include "common/gpid.h" #include "dsn.layer2_types.h" #include "meta_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_host_port.h" #include "shell/command_executor.h" #include "shell/command_helper.h" #include "shell/commands.h" @@ -106,7 +107,7 @@ bool recover(command_executor *e, shell_context *sc, arguments args) return false; } - std::vector node_list; + std::vector node_list; if (!node_list_str.empty()) { std::vector tokens; dsn::utils::split_args(node_list_str.c_str(), tokens, ','); @@ -116,7 +117,7 @@ bool recover(command_executor *e, shell_context *sc, arguments args) } for (std::string &token : tokens) { - const auto node = dsn::rpc_address::from_host_port(token); + const auto node = dsn::host_port::from_string(token); if (!node) { fprintf(stderr, "parse %s as a ip:port node failed\n", token.c_str()); return true; @@ -137,7 +138,7 @@ bool recover(command_executor *e, shell_context *sc, arguments args) boost::trim(str); if (str.empty() || str[0] == '#' || str[0] == ';') continue; - const auto node = dsn::rpc_address::from_host_port(str); + const auto node = dsn::host_port::from_string(str); if (!node) { fprintf(stderr, "parse %s at file %s line %d as ip:port failed\n", @@ -163,20 +164,18 @@ bool recover(command_executor *e, shell_context *sc, arguments args) return true; } -dsn::rpc_address diagnose_recommend(const ddd_partition_info &pinfo); - -dsn::rpc_address diagnose_recommend(const ddd_partition_info &pinfo) +dsn::host_port diagnose_recommend(const ddd_partition_info &pinfo) { - if (pinfo.config.last_drops.size() < 2) - return dsn::rpc_address(); + if (pinfo.config.hp_last_drops.size() < 2) + return dsn::host_port(); - std::vector last_two_nodes(pinfo.config.last_drops.end() - 2, - pinfo.config.last_drops.end()); + std::vector last_two_nodes(pinfo.config.hp_last_drops.end() - 2, + pinfo.config.hp_last_drops.end()); std::vector last_dropped; for (auto &node : last_two_nodes) { auto it = std::find_if(pinfo.dropped.begin(), pinfo.dropped.end(), - [&node](const ddd_node_info &r) { return r.node == node; }); + [&node](const ddd_node_info &r) { return r.hp_node == node; }); if (it->is_alive && it->is_collected) last_dropped.push_back(*it); } @@ -184,7 +183,7 @@ dsn::rpc_address diagnose_recommend(const ddd_partition_info &pinfo) if (last_dropped.size() == 1) { const ddd_node_info &ninfo = last_dropped.back(); if (ninfo.last_committed_decree >= pinfo.config.last_committed_decree) - return ninfo.node; + return ninfo.hp_node; } else if (last_dropped.size() == 2) { const ddd_node_info &secondary = last_dropped.front(); const ddd_node_info &latest = last_dropped.back(); @@ -195,18 +194,18 @@ dsn::rpc_address diagnose_recommend(const ddd_partition_info &pinfo) if (latest.last_committed_decree == secondary.last_committed_decree && latest.last_committed_decree >= pinfo.config.last_committed_decree) - return latest.ballot >= secondary.ballot ? latest.node : secondary.node; + return latest.ballot >= secondary.ballot ? latest.hp_node : secondary.hp_node; if (latest.last_committed_decree > secondary.last_committed_decree && latest.last_committed_decree >= pinfo.config.last_committed_decree) - return latest.node; + return latest.hp_node; if (secondary.last_committed_decree > latest.last_committed_decree && secondary.last_committed_decree >= pinfo.config.last_committed_decree) - return secondary.node; + return secondary.hp_node; } - return dsn::rpc_address(); + return dsn::host_port(); } bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) @@ -287,37 +286,40 @@ bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) out << " config: ballot(" << pinfo.config.ballot << "), " << "last_committed(" << pinfo.config.last_committed_decree << ")" << std::endl; out << " ----" << std::endl; - dsn::rpc_address latest_dropped, secondary_latest_dropped; - if (pinfo.config.last_drops.size() > 0) - latest_dropped = pinfo.config.last_drops[pinfo.config.last_drops.size() - 1]; - if (pinfo.config.last_drops.size() > 1) - secondary_latest_dropped = pinfo.config.last_drops[pinfo.config.last_drops.size() - 2]; + dsn::host_port latest_dropped, secondary_latest_dropped; + if (pinfo.config.hp_last_drops.size() > 0) + latest_dropped = pinfo.config.hp_last_drops[pinfo.config.hp_last_drops.size() - 1]; + if (pinfo.config.hp_last_drops.size() > 1) + secondary_latest_dropped = + pinfo.config.hp_last_drops[pinfo.config.hp_last_drops.size() - 2]; int j = 0; for (const ddd_node_info &n : pinfo.dropped) { + dsn::host_port hp_node; + GET_HOST_PORT(n, node, hp_node); char time_buf[30] = {0}; ::dsn::utils::time_ms_to_string(n.drop_time_ms, time_buf); out << " dropped[" << j++ << "]: " - << "node(" << n.node << "), " + << "node(" << hp_node << "), " << "drop_time(" << time_buf << "), " << "alive(" << (n.is_alive ? "true" : "false") << "), " << "collected(" << (n.is_collected ? "true" : "false") << "), " << "ballot(" << n.ballot << "), " << "last_committed(" << n.last_committed_decree << "), " << "last_prepared(" << n.last_prepared_decree << ")"; - if (n.node == latest_dropped) + if (hp_node == latest_dropped) out << " <== the latest"; - else if (n.node == secondary_latest_dropped) + else if (hp_node == secondary_latest_dropped) out << " <== the secondary latest"; out << std::endl; } out << " ----" << std::endl; j = 0; - for (const ::dsn::rpc_address &r : pinfo.config.last_drops) { + for (const ::dsn::host_port &r : pinfo.config.hp_last_drops) { out << " last_drops[" << j++ << "]: " - << "node(" << r << ")"; - if (j == (int)pinfo.config.last_drops.size() - 1) + << "node(" << r.to_string() << ")"; + if (j == (int)pinfo.config.hp_last_drops.size() - 1) out << " <== the secondary latest"; - else if (j == (int)pinfo.config.last_drops.size()) + else if (j == (int)pinfo.config.hp_last_drops.size()) out << " <== the latest"; out << std::endl; } @@ -326,7 +328,7 @@ bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) if (diagnose) { out << " ----" << std::endl; - dsn::rpc_address primary = diagnose_recommend(pinfo); + auto primary = diagnose_recommend(pinfo); out << " recommend_primary: " << (primary.is_invalid() ? "none" : primary.to_string()); if (primary == latest_dropped) @@ -344,7 +346,7 @@ bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) if (c == 'y') { break; } else if (c == 'n') { - primary.set_invalid(); + primary.reset(); break; } else if (c == 's') { skip_this = true; @@ -358,9 +360,9 @@ bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) if (primary.is_invalid() && !skip_prompt && !skip_this) { do { std::cout << " > Please input the primary node: "; - std::string addr; - std::cin >> addr; - primary = dsn::rpc_address::from_host_port(addr); + std::string node; + std::cin >> node; + primary = dsn::host_port::from_string(node); if (primary) { break; } @@ -372,8 +374,9 @@ bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) if (!primary.is_invalid() && !skip_this) { dsn::replication::configuration_balancer_request request; request.gpid = pinfo.config.pid; - request.action_list = { - new_proposal_action(primary, primary, config_type::CT_ASSIGN_PRIMARY)}; + const auto &primary_hp = dsn::dns_resolver::instance().resolve_address(primary); + request.action_list = {new_proposal_action( + primary_hp, primary_hp, primary, primary, config_type::CT_ASSIGN_PRIMARY)}; request.force = false; dsn::error_code err = sc->ddl_client->send_balancer_proposal(request); out << " propose_request: propose -g " << request.gpid diff --git a/src/shell/commands/table_management.cpp b/src/shell/commands/table_management.cpp index 8fd1e76f92..262cd62562 100644 --- a/src/shell/commands/table_management.cpp +++ b/src/shell/commands/table_management.cpp @@ -38,7 +38,7 @@ #include "dsn.layer2_types.h" #include "meta_admin_types.h" #include "pegasus_utils.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "shell/command_executor.h" #include "shell/command_helper.h" #include "shell/command_utils.h" @@ -305,16 +305,15 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) const auto &results = get_metrics(nodes, sst_stat_filters(app_id).to_query_string()); - std::map> disk_map; - std::map> count_map; + std::map> disk_map; + std::map> count_map; for (size_t i = 0; i < nodes.size(); ++i) { RETURN_SHELL_IF_GET_METRICS_FAILED(results[i], nodes[i], "sst"); - RETURN_SHELL_IF_PARSE_METRICS_FAILED(parse_sst_stat(results[i].body(), - count_map[nodes[i].address], - disk_map[nodes[i].address]), - nodes[i], - "sst"); + RETURN_SHELL_IF_PARSE_METRICS_FAILED( + parse_sst_stat(results[i].body(), count_map[nodes[i].hp], disk_map[nodes[i].hp]), + nodes[i], + "sst"); } ::dsn::utils::table_printer tp_general("result"); @@ -338,10 +337,10 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) for (int i = 0; i < partitions.size(); i++) { const dsn::partition_configuration &p = partitions[i]; int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; } - replica_count += p.secondaries.size(); + replica_count += p.hp_secondaries.size(); std::string replica_count_str; { std::stringstream oss; @@ -349,10 +348,10 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) replica_count_str = oss.str(); } std::string primary_str("-"); - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { bool disk_found = false; double disk_value = 0; - auto f1 = disk_map.find(p.primary); + auto f1 = disk_map.find(p.hp_primary); if (f1 != disk_map.end()) { auto &sub_map = f1->second; auto f2 = sub_map.find(p.pid.get_partition_index()); @@ -367,7 +366,7 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } bool count_found = false; double count_value = 0; - auto f3 = count_map.find(p.primary); + auto f3 = count_map.find(p.hp_primary); if (f3 != count_map.end()) { auto &sub_map = f3->second; auto f3 = sub_map.find(p.pid.get_partition_index()); @@ -378,11 +377,11 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } std::stringstream oss; std::string hostname; - std::string ip = p.primary.to_string(); + const auto &ip = p.hp_primary.to_string(); if (resolve_ip && dsn::utils::hostname_from_ip_port(ip.c_str(), &hostname)) { oss << hostname << "("; } else { - oss << p.primary << "("; + oss << p.hp_primary << "("; }; if (disk_found) oss << disk_value; @@ -400,12 +399,12 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) { std::stringstream oss; oss << "["; - for (int j = 0; j < p.secondaries.size(); j++) { + for (int j = 0; j < p.hp_secondaries.size(); j++) { if (j != 0) oss << ","; bool found = false; double value = 0; - auto f1 = disk_map.find(p.secondaries[j]); + auto f1 = disk_map.find(p.hp_secondaries[j]); if (f1 != disk_map.end()) { auto &sub_map = f1->second; auto f2 = sub_map.find(p.pid.get_partition_index()); @@ -418,7 +417,7 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } bool count_found = false; double count_value = 0; - auto f3 = count_map.find(p.secondaries[j]); + auto f3 = count_map.find(p.hp_secondaries[j]); if (f3 != count_map.end()) { auto &sub_map = f3->second; auto f3 = sub_map.find(p.pid.get_partition_index()); @@ -429,11 +428,11 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } std::string hostname; - std::string ip = p.secondaries[j].to_string(); + const auto &ip = p.hp_secondaries[j].to_string(); if (resolve_ip && dsn::utils::hostname_from_ip_port(ip.c_str(), &hostname)) { oss << hostname << "("; } else { - oss << p.secondaries[j] << "("; + oss << p.hp_secondaries[j] << "("; }; if (found) oss << value; diff --git a/src/test/function_test/config.ini b/src/test/function_test/config.ini index 21ede3ff24..34fb4a9383 100644 --- a/src/test/function_test/config.ini +++ b/src/test/function_test/config.ini @@ -75,8 +75,8 @@ rpc_timeout_milliseconds = 5000 lb_interval_ms = 3000 [pegasus.clusters] -onebox = 127.0.0.1:34601,127.0.0.1:34602,127.0.0.1:34603 -single_master_cluster = 127.0.0.1:34601 +onebox = @LOCAL_HOSTNAME@:34601,@LOCAL_HOSTNAME@:34602,@LOCAL_HOSTNAME@:34603 +single_master_cluster = @LOCAL_HOSTNAME@:34601 [pegasus.server] encrypt_data_at_rest = false diff --git a/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp b/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp index 659d379053..56fbc75391 100644 --- a/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp +++ b/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp @@ -98,8 +98,9 @@ class detect_hotspot_test : public test_util dsn::replication::detect_hotkey_response resp; for (int partition_index = 0; partition_index < partitions_.size(); partition_index++) { req.pid = dsn::gpid(table_id_, partition_index); - ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[partition_index].primary, req, resp)); + ASSERT_EQ( + dsn::ERR_OK, + ddl_client_->detect_hotkey(partitions_[partition_index].hp_primary, req, resp)); if (!resp.hotkey_result.empty()) { find_hotkey = true; break; @@ -118,16 +119,18 @@ class detect_hotspot_test : public test_util req.action = dsn::replication::detect_action::STOP; for (int partition_index = 0; partition_index < partitions_.size(); partition_index++) { - ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[partition_index].primary, req, resp)); + ASSERT_EQ( + dsn::ERR_OK, + ddl_client_->detect_hotkey(partitions_[partition_index].hp_primary, req, resp)); ASSERT_EQ(dsn::ERR_OK, resp.err); } req.action = dsn::replication::detect_action::QUERY; for (int partition_index = 0; partition_index < partitions_.size(); partition_index++) { req.pid = dsn::gpid(table_id_, partition_index); - ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[partition_index].primary, req, resp)); + ASSERT_EQ( + dsn::ERR_OK, + ddl_client_->detect_hotkey(partitions_[partition_index].hp_primary, req, resp)); ASSERT_EQ("Can't get hotkey now, now state: hotkey_collector_state::STOPPED", resp.err_hint); } @@ -159,12 +162,12 @@ class detect_hotspot_test : public test_util dsn::replication::detect_hotkey_response resp; ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[target_partition].primary, req, resp)); + ddl_client_->detect_hotkey(partitions_[target_partition].hp_primary, req, resp)); ASSERT_EQ(dsn::ERR_OK, resp.err); req.action = dsn::replication::detect_action::QUERY; ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[target_partition].primary, req, resp)); + ddl_client_->detect_hotkey(partitions_[target_partition].hp_primary, req, resp)); ASSERT_EQ("Can't get hotkey now, now state: hotkey_collector_state::COARSE_DETECTING", resp.err_hint); @@ -175,7 +178,7 @@ class detect_hotspot_test : public test_util req.action = dsn::replication::detect_action::QUERY; ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[target_partition].primary, req, resp)); + ddl_client_->detect_hotkey(partitions_[target_partition].hp_primary, req, resp)); ASSERT_EQ("Can't get hotkey now, now state: hotkey_collector_state::STOPPED", resp.err_hint); } diff --git a/src/test/function_test/recovery/test_recovery.cpp b/src/test/function_test/recovery/test_recovery.cpp index 9bfdfd993f..04d59933f4 100644 --- a/src/test/function_test/recovery/test_recovery.cpp +++ b/src/test/function_test/recovery/test_recovery.cpp @@ -18,8 +18,8 @@ */ #include +#include #include -#include #include #include #include @@ -32,8 +32,7 @@ #include "gtest/gtest.h" #include "include/pegasus/client.h" #include "pegasus/error.h" -#include "runtime/rpc/rpc_address.h" -#include "test/function_test/utils/global_env.h" +#include "runtime/rpc/rpc_host_port.h" #include "test/function_test/utils/test_util.h" #include "utils/error_code.h" #include "utils/rand.h" @@ -71,12 +70,15 @@ class recovery_test : public test_util // cluster has only one meta server, while "onebox" means the cluster has 3 meta servers. recovery_test() : test_util(std::map(), "single_master_cluster") {} - std::vector get_rpc_address_list(const std::vector &ports) + std::vector get_rpc_host_port_list(const std::vector ports) { - std::vector result; + std::vector result; result.reserve(ports.size()); for (const auto &port : ports) { - result.emplace_back(dsn::rpc_address(global_env::instance()._host_ip, port)); + char hostname[1024]; + gethostname(hostname, 1024); + dsn::host_port hp(hostname, port); + result.push_back(hp); } return result; } @@ -187,7 +189,7 @@ TEST_F(recovery_test, recovery) std::this_thread::sleep_for(std::chrono::seconds(10)); // then do recovery - auto nodes = get_rpc_address_list({34801, 34802, 34803}); + auto nodes = get_rpc_host_port_list({34801, 34802, 34803}); ASSERT_EQ(dsn::ERR_OK, ddl_client_->do_recovery(nodes, 30, false, false, std::string())); // send another recovery command @@ -216,7 +218,7 @@ TEST_F(recovery_test, recovery) std::this_thread::sleep_for(std::chrono::seconds(10)); // recovery only from 1 & 2 - std::vector nodes = get_rpc_address_list({34801, 34802}); + auto nodes = get_rpc_host_port_list({34801, 34802}); ASSERT_EQ(dsn::ERR_OK, ddl_client_->do_recovery(nodes, 30, false, false, std::string())); // then wait the app to ready @@ -247,7 +249,7 @@ TEST_F(recovery_test, recovery) std::this_thread::sleep_for(std::chrono::seconds(10)); // then do recovery - auto nodes = get_rpc_address_list({34801, 34802, 34803}); + auto nodes = get_rpc_host_port_list({34801, 34802, 34803}); ASSERT_EQ(dsn::ERR_OK, ddl_client_->do_recovery(nodes, 30, false, false, std::string())); // then wait the apps to ready @@ -277,7 +279,7 @@ TEST_F(recovery_test, recovery) std::this_thread::sleep_for(std::chrono::seconds(10)); // then do recovery - auto nodes = get_rpc_address_list({34801, 34802, 34803}); + auto nodes = get_rpc_host_port_list({34801, 34802, 34803}); ASSERT_EQ(dsn::ERR_OK, ddl_client_->do_recovery(nodes, 30, false, false, std::string())); // then wait the apps to ready diff --git a/src/test/function_test/utils/test_util.cpp b/src/test/function_test/utils/test_util.cpp index e757f75eb0..cf0b0fd491 100644 --- a/src/test/function_test/utils/test_util.cpp +++ b/src/test/function_test/utils/test_util.cpp @@ -89,6 +89,7 @@ void test_util::SetUp() ddl_client_ = std::make_shared(meta_list_); ASSERT_TRUE(ddl_client_ != nullptr); ddl_client_->set_max_wait_app_ready_secs(120); + ddl_client_->set_meta_servers_leader(); dsn::error_code ret = ddl_client_->create_app(table_name_, "pegasus", partition_count_, 3, kCreateEnvs, false); diff --git a/src/test/function_test/utils/test_util.h b/src/test/function_test/utils/test_util.h index a0d98ce19a..8e3a1663ce 100644 --- a/src/test/function_test/utils/test_util.h +++ b/src/test/function_test/utils/test_util.h @@ -28,7 +28,7 @@ #include #include "dsn.layer2_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" // TODO(yingchun): it's too tricky, but I don't know how does it happen, we can fix it later. #define TRICKY_CODE_TO_AVOID_LINK_ERROR \ @@ -117,7 +117,7 @@ class test_util : public ::testing::Test int32_t partition_count_ = 8; std::vector partitions_; pegasus_client *client_ = nullptr; - std::vector meta_list_; + std::vector meta_list_; std::shared_ptr ddl_client_; }; } // namespace pegasus diff --git a/src/test/kill_test/kill_testor.cpp b/src/test/kill_test/kill_testor.cpp index b10abbe5fa..cadd2c3b1d 100644 --- a/src/test/kill_test/kill_testor.cpp +++ b/src/test/kill_test/kill_testor.cpp @@ -32,7 +32,6 @@ #include "common/gpid.h" #include "common/replication_other_types.h" #include "kill_testor.h" -#include "runtime/rpc/rpc_address.h" #include "utils/error_code.h" #include "utils/flags.h" #include "utils/fmt_logging.h" @@ -113,22 +112,22 @@ dsn::error_code kill_testor::get_partition_info(bool debug_unhealthy, for (int i = 0; i < partitions.size(); i++) { const dsn::partition_configuration &p = partitions[i]; int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; } - replica_count += p.secondaries.size(); + replica_count += p.hp_secondaries.size(); if (replica_count == p.max_replica_count) { healthy_partition_cnt++; } else { std::stringstream info; info << "gpid=" << p.pid.get_app_id() << "." << p.pid.get_partition_index() << ", "; - info << "primay=" << p.primary << ", "; + info << "primay=" << p.hp_primary << ", "; info << "secondaries=["; - for (int idx = 0; idx < p.secondaries.size(); idx++) { + for (int idx = 0; idx < p.hp_secondaries.size(); idx++) { if (idx != 0) - info << "," << p.secondaries[idx]; + info << "," << p.hp_secondaries[idx]; else - info << p.secondaries[idx]; + info << p.hp_secondaries[idx]; } info << "], "; info << "last_committed_decree=" << p.last_committed_decree; diff --git a/src/test/kill_test/kill_testor.h b/src/test/kill_test/kill_testor.h index ed1a873518..f66f31540f 100644 --- a/src/test/kill_test/kill_testor.h +++ b/src/test/kill_test/kill_testor.h @@ -24,7 +24,7 @@ #include #include "dsn.layer2_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/error_code.h" namespace dsn { @@ -64,7 +64,7 @@ class kill_testor protected: shared_ptr ddl_client; - vector meta_list; + vector meta_list; std::vector partitions; }; diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp index d20a10ccf6..d7ca3b5038 100644 --- a/src/utils/metrics.cpp +++ b/src/utils/metrics.cpp @@ -29,8 +29,8 @@ #include "http/http_method.h" #include "http/http_status_code.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_engine.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/service_app.h" #include "runtime/service_engine.h" #include "runtime/task/task.h" @@ -516,7 +516,7 @@ void encode_port(dsn::metric_json_writer &writer) writer.Key(dsn::kMetricPortField.c_str()); const auto *const rpc = dsn::task::get_current_rpc2(); - ENCODE_OBJ_VAL(rpc != nullptr, rpc->primary_address().port()); + ENCODE_OBJ_VAL(rpc != nullptr, rpc->primary_host_port().port()); } void encode_timestamp_ns(dsn::metric_json_writer &writer)