Skip to content

Commit

Permalink
feat(new_metrics): migrate server-level metrics for meta_service (#1437)
Browse files Browse the repository at this point in the history
#1331

Migrate metrics to new framework for meta_service, including the number
of disconnections with replica servers, and the number of unalive and alive
replica servers. All of these metrics are server-level, maintained in meta
server.

The old type in perf counters of the number of disconnections is volatile
counter, which would be changed to non-volatile, while another 2 metrics
would keep the type of gauge.
  • Loading branch information
empiredan authored and wangdan committed May 11, 2023
1 parent 517d73a commit d6a4edf
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 20 deletions.
43 changes: 27 additions & 16 deletions src/meta/meta_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
#include "meta_service.h"
#include "meta_split_service.h"
#include "partition_split_types.h"
#include "perf_counter/perf_counter.h"
#include "remote_cmd/remote_command.h"
#include "runtime/ranger/ranger_resource_policy_manager.h"
#include "runtime/rpc/rpc_holder.h"
Expand All @@ -68,8 +67,24 @@
#include "utils/flags.h"
#include "utils/fmt_logging.h"
#include "utils/string_conv.h"
#include "utils/string_view.h"
#include "utils/strings.h"

METRIC_DEFINE_counter(server,
replica_server_disconnections,
dsn::metric_unit::kDisconnections,
"The number of disconnections with replica servers");

METRIC_DEFINE_gauge_int64(server,
unalive_replica_servers,
dsn::metric_unit::kServers,
"The number of unalive replica servers");

METRIC_DEFINE_gauge_int64(server,
alive_replica_servers,
dsn::metric_unit::kServers,
"The number of alive replica servers");

namespace dsn {
namespace dist {
DSN_DECLARE_string(hosts_list);
Expand Down Expand Up @@ -142,7 +157,13 @@ DSN_DECLARE_string(cold_backup_root);
} while (0)

meta_service::meta_service()
: serverlet("meta_service"), _failure_detector(nullptr), _started(false), _recovering(false)
: serverlet("meta_service"),
_failure_detector(nullptr),
_started(false),
_recovering(false),
METRIC_VAR_INIT_server(replica_server_disconnections),
METRIC_VAR_INIT_server(unalive_replica_servers),
METRIC_VAR_INIT_server(alive_replica_servers)
{
_opts.initialize();
_meta_opts.initialize();
Expand All @@ -158,16 +179,6 @@ meta_service::meta_service()
}
}

_recent_disconnect_count.init_app_counter(
"eon.meta_service",
"recent_disconnect_count",
COUNTER_TYPE_VOLATILE_NUMBER,
"replica server disconnect count in the recent period");
_unalive_nodes_count.init_app_counter(
"eon.meta_service", "unalive_nodes", COUNTER_TYPE_NUMBER, "current count of unalive nodes");
_alive_nodes_count.init_app_counter(
"eon.meta_service", "alive_nodes", COUNTER_TYPE_NUMBER, "current count of alive nodes");

_meta_op_status.store(meta_op_status::FREE);
}

Expand Down Expand Up @@ -242,9 +253,9 @@ void meta_service::set_node_state(const std::vector<rpc_address> &nodes, bool is
}
}

_recent_disconnect_count->add(is_alive ? 0 : nodes.size());
_unalive_nodes_count->set(_dead_set.size());
_alive_nodes_count->set(_alive_set.size());
METRIC_VAR_INCREMENT_BY(replica_server_disconnections, is_alive ? 0 : nodes.size());
METRIC_VAR_SET(unalive_replica_servers, _dead_set.size());
METRIC_VAR_SET(alive_replica_servers, _alive_set.size());

if (!_started) {
return;
Expand Down Expand Up @@ -327,7 +338,7 @@ void meta_service::start_service()
_alive_set.insert(kv.first);
}

_alive_nodes_count->set(_alive_set.size());
METRIC_VAR_SET(alive_replica_servers, _alive_set.size());

for (const dsn::rpc_address &node : _alive_set) {
// sync alive set and the failure_detector
Expand Down
8 changes: 4 additions & 4 deletions src/meta/meta_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
#include "meta_options.h"
#include "meta_rpc_types.h"
#include "meta_server_failure_detector.h"
#include "perf_counter/perf_counter_wrapper.h"
#include "runtime/api_layer1.h"
#include "runtime/rpc/network.h"
#include "runtime/rpc/rpc_address.h"
Expand All @@ -70,6 +69,7 @@
#include "utils/enum_helper.h"
#include "utils/error_code.h"
#include "utils/fmt_logging.h"
#include "utils/metrics.h"
#include "utils/threadpool_code.h"
#include "utils/zlocks.h"

Expand Down Expand Up @@ -383,9 +383,9 @@ class meta_service : public serverlet<meta_service>

std::string _cluster_root;

perf_counter_wrapper _recent_disconnect_count;
perf_counter_wrapper _unalive_nodes_count;
perf_counter_wrapper _alive_nodes_count;
METRIC_VAR_DECLARE_counter(replica_server_disconnections);
METRIC_VAR_DECLARE_gauge_int64(unalive_replica_servers);
METRIC_VAR_DECLARE_gauge_int64(alive_replica_servers);

dsn::task_tracker _tracker;

Expand Down
2 changes: 2 additions & 0 deletions src/utils/metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,8 @@ enum class metric_unit : size_t
kWrites,
kChanges,
kOperations,
kDisconnections,
kServers,
kInvalidUnit,
};

Expand Down

0 comments on commit d6a4edf

Please sign in to comment.