From f36cb5804733a544a7a2a5d23871bc45a73de448 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 14 Apr 2023 20:08:47 +0800 Subject: [PATCH] feat(new_metrics): migrate server-level metrics for meta_service (#1437) https://github.com/apache/incubator-pegasus/issues/1331 Migrate metrics to new framework for meta_service, including the number of disconnections with replica servers, and the number of unalive and alive replica servers. All of these metrics are server-level, maintained in meta server. The old type in perf counters of the number of disconnections is volatile counter, which would be changed to non-volatile, while another 2 metrics would keep the type of gauge. --- src/meta/meta_service.cpp | 43 ++++++++++++++++++++++++--------------- src/meta/meta_service.h | 8 ++++---- src/utils/metrics.h | 2 ++ 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/meta/meta_service.cpp b/src/meta/meta_service.cpp index 5ea615c61d..c6282f82d5 100644 --- a/src/meta/meta_service.cpp +++ b/src/meta/meta_service.cpp @@ -54,7 +54,6 @@ #include "meta_service.h" #include "meta_split_service.h" #include "partition_split_types.h" -#include "perf_counter/perf_counter.h" #include "remote_cmd/remote_command.h" #include "runtime/ranger/ranger_resource_policy_manager.h" #include "runtime/rpc/rpc_holder.h" @@ -67,8 +66,24 @@ #include "utils/flags.h" #include "utils/fmt_logging.h" #include "utils/string_conv.h" +#include "utils/string_view.h" #include "utils/strings.h" +METRIC_DEFINE_counter(server, + replica_server_disconnections, + dsn::metric_unit::kDisconnections, + "The number of disconnections with replica servers"); + +METRIC_DEFINE_gauge_int64(server, + unalive_replica_servers, + dsn::metric_unit::kServers, + "The number of unalive replica servers"); + +METRIC_DEFINE_gauge_int64(server, + alive_replica_servers, + dsn::metric_unit::kServers, + "The number of alive replica servers"); + namespace dsn { namespace dist { DSN_DECLARE_string(hosts_list); @@ -141,7 +156,13 @@ DSN_DECLARE_string(cold_backup_root); } while (0) meta_service::meta_service() - : serverlet("meta_service"), _failure_detector(nullptr), _started(false), _recovering(false) + : serverlet("meta_service"), + _failure_detector(nullptr), + _started(false), + _recovering(false), + METRIC_VAR_INIT_server(replica_server_disconnections), + METRIC_VAR_INIT_server(unalive_replica_servers), + METRIC_VAR_INIT_server(alive_replica_servers) { _opts.initialize(); _meta_opts.initialize(); @@ -157,16 +178,6 @@ meta_service::meta_service() } } - _recent_disconnect_count.init_app_counter( - "eon.meta_service", - "recent_disconnect_count", - COUNTER_TYPE_VOLATILE_NUMBER, - "replica server disconnect count in the recent period"); - _unalive_nodes_count.init_app_counter( - "eon.meta_service", "unalive_nodes", COUNTER_TYPE_NUMBER, "current count of unalive nodes"); - _alive_nodes_count.init_app_counter( - "eon.meta_service", "alive_nodes", COUNTER_TYPE_NUMBER, "current count of alive nodes"); - _meta_op_status.store(meta_op_status::FREE); } @@ -241,9 +252,9 @@ void meta_service::set_node_state(const std::vector &nodes, bool is } } - _recent_disconnect_count->add(is_alive ? 0 : nodes.size()); - _unalive_nodes_count->set(_dead_set.size()); - _alive_nodes_count->set(_alive_set.size()); + METRIC_VAR_INCREMENT_BY(replica_server_disconnections, is_alive ? 0 : nodes.size()); + METRIC_VAR_SET(unalive_replica_servers, _dead_set.size()); + METRIC_VAR_SET(alive_replica_servers, _alive_set.size()); if (!_started) { return; @@ -326,7 +337,7 @@ void meta_service::start_service() _alive_set.insert(kv.first); } - _alive_nodes_count->set(_alive_set.size()); + METRIC_VAR_SET(alive_replica_servers, _alive_set.size()); for (const dsn::rpc_address &node : _alive_set) { // sync alive set and the failure_detector diff --git a/src/meta/meta_service.h b/src/meta/meta_service.h index 435d0a0ac2..d13415b6f0 100644 --- a/src/meta/meta_service.h +++ b/src/meta/meta_service.h @@ -46,7 +46,6 @@ #include "meta_options.h" #include "meta_rpc_types.h" #include "meta_server_failure_detector.h" -#include "perf_counter/perf_counter_wrapper.h" #include "runtime/api_layer1.h" #include "runtime/rpc/network.h" #include "runtime/rpc/rpc_address.h" @@ -61,6 +60,7 @@ #include "utils/enum_helper.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" +#include "utils/metrics.h" #include "utils/threadpool_code.h" #include "utils/zlocks.h" @@ -375,9 +375,9 @@ class meta_service : public serverlet std::string _cluster_root; - perf_counter_wrapper _recent_disconnect_count; - perf_counter_wrapper _unalive_nodes_count; - perf_counter_wrapper _alive_nodes_count; + METRIC_VAR_DECLARE_counter(replica_server_disconnections); + METRIC_VAR_DECLARE_gauge_int64(unalive_replica_servers); + METRIC_VAR_DECLARE_gauge_int64(alive_replica_servers); dsn::task_tracker _tracker; diff --git a/src/utils/metrics.h b/src/utils/metrics.h index 00bcd4b62b..244b234760 100644 --- a/src/utils/metrics.h +++ b/src/utils/metrics.h @@ -650,6 +650,8 @@ enum class metric_unit : size_t kWrites, kChanges, kOperations, + kDisconnections, + kServers, kInvalidUnit, };