Skip to content

Commit

Permalink
[#21178] docdb: Add metric for the max master follower heartbeat delay.
Browse files Browse the repository at this point in the history
Summary:
Adds a metric for the maximum master follower heartbeat delay. Ideally this metric would report the raw follower delays directly and clients could calculate the maximum themselves. However there is no metric entity we could use to export a metric from the leader master with the right fields. Adding a new metric entity for just this metric seems too involved given the current use-case (alert if the follower delay is too large). Another alternative would be adding a server entity metric to each master, and have followers report their own delay. But we are interested in the perspective from the master leader here and that approach could introduce surprising behaviour.
Jira: DB-10113

Test Plan:
```
./yb_build.sh --with-tests --cxx-test-filter-re tablet_health_manager-itest --cxx-test tablet_health_manager-itest --gtest_filter 'AreNodesSafeToTakeDownItest.GetFollowerUpdate*'
```

Reviewers: asrivastava, sanketh

Reviewed By: asrivastava

Subscribers: ybase, slingam

Differential Revision: https://phorge.dev.yugabyte.com/D38319
  • Loading branch information
druzac committed Sep 25, 2024
1 parent 87cffc6 commit 678d277
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 2 deletions.
65 changes: 63 additions & 2 deletions src/yb/integration-tests/tablet_health_manager-itest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//

#include <algorithm>
#include <sstream>
#include <string>

#include <gtest/gtest.h>
Expand All @@ -29,20 +30,35 @@
#include "yb/tools/yb-admin_client.h"

#include "yb/util/backoff_waiter.h"
#include "yb/util/curl_util.h"
#include "yb/util/monotime.h"
#include "yb/util/test_macros.h"
#include "yb/util/tsan_util.h"

DECLARE_int32(are_nodes_safe_to_take_down_timeout_buffer_ms);
DECLARE_double(leader_failure_max_missed_heartbeat_periods);
DECLARE_int32(raft_heartbeat_interval_ms);
DECLARE_int32(catalog_manager_bg_task_wait_ms);

using std::string;
using std::vector;

namespace yb {
namespace integration_tests {

template <typename T>
Result<T> parse(const std::string& s);

template <>
Result<uint64> parse(const std::string& s) {
std::size_t pos;
auto out = std::stoull(s, &pos);
if (pos != s.size()) {
return STATUS(InvalidArgument, "Couldn't parse string to unsigned integer: $0", s);
}
return out;
}

class AreNodesSafeToTakeDownItest : public YBTableTestBase {
protected:
void BeforeCreateTable() override {
Expand Down Expand Up @@ -90,6 +106,32 @@ class AreNodesSafeToTakeDownItest : public YBTableTestBase {
}

const int kFollowerLagBoundMs = 1000 * kTimeMultiplier;

template <typename T>
Result<T> GetLeaderMasterMetric(const std::string& metric_name) {
EasyCurl curl;
auto addr = external_mini_cluster_->GetLeaderMaster()->bound_http_hostport();
faststring out;
RETURN_NOT_OK(
curl.FetchURL(Format("http://$0:$1/prometheus-metrics", addr.host(), addr.port()), &out));
std::stringstream ss(out.ToString());
for (std::string line; std::getline(ss, line, '\n');) {
if (!line.starts_with(metric_name)) {
continue;
}
std::vector<std::string> words;
std::stringstream line_ss(line);
for (std::string word; std::getline(line_ss, word, ' ');) {
words.push_back(std::move(word));
}
if (words.size() < 2) {
return STATUS_FORMAT(IllegalState, "Bad prometheus metric line: $0", line);
}
return parse<T>(words[words.size() - 2]);
}
return STATUS_FORMAT(
NotFound, "Couldn't find metric $0 on prometheus-metrics page", metric_name);
}
};

TEST_F(AreNodesSafeToTakeDownItest, HealthyCluster) {
Expand Down Expand Up @@ -257,6 +299,9 @@ TEST_F(AreNodesSafeToTakeDownItest, GetFollowerUpdateDelay) {
for (const auto& heartbeat_delay : resp.heartbeat_delay()) {
ASSERT_LE(heartbeat_delay.last_heartbeat_delta_ms(), max_expected_heartbeat_time);
}
auto max_actual_heartbeat_delay =
ASSERT_RESULT(GetLeaderMasterMetric<uint64_t>("max_follower_heartbeat_delay"));
ASSERT_LE(max_actual_heartbeat_delay, max_expected_heartbeat_time);
}

// Stop one of the nodes and ensure its delay increases.
Expand All @@ -271,7 +316,7 @@ TEST_F(AreNodesSafeToTakeDownItest, GetFollowerUpdateDelayWithStoppedNode) {
ASSERT_NE(master_it, masters.end());
auto other_master = *master_it;
other_master->Shutdown();
auto sleep_time = FLAGS_raft_heartbeat_interval_ms * 3;
auto sleep_time = static_cast<uint64>(FLAGS_raft_heartbeat_interval_ms * 3);
SleepFor(MonoDelta::FromMilliseconds(sleep_time));
master::GetMasterHeartbeatDelaysRequestPB req;
master::GetMasterHeartbeatDelaysResponsePB resp;
Expand All @@ -286,7 +331,23 @@ TEST_F(AreNodesSafeToTakeDownItest, GetFollowerUpdateDelayWithStoppedNode) {
ASSERT_GE(heartbeat_delay.last_heartbeat_delta_ms(), sleep_time);
}
}

// Verify the heartbeat delay shows up in the metric max_follower_heartbeat_delay. This metric is
// updated by the catalog manager background task. The background task may not have run recently
// enough to pick up the delay so we retry a few times.
std::string failure_message;
ASSERT_OK(WaitFor(
[this, sleep_time, &failure_message]() -> Result<bool> {
auto max_actual_heartbeat_delay =
VERIFY_RESULT(GetLeaderMasterMetric<uint64_t>("max_follower_heartbeat_delay"));
auto done = max_actual_heartbeat_delay >= sleep_time;
if (!done) {
failure_message = Format(
"Heartbeat delay from metric is $0, expecting it to be at least $1",
max_actual_heartbeat_delay, sleep_time);
}
return done;
},
MonoDelta::FromMilliseconds(FLAGS_catalog_manager_bg_task_wait_ms * 3), failure_message));
ASSERT_OK(other_master->Restart());
}

Expand Down
28 changes: 28 additions & 0 deletions src/yb/master/catalog_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,15 @@ METRIC_DEFINE_gauge_uint32(cluster, num_tablet_servers_dead,
"heartbeat in the time interval defined by the gflag "
"FLAGS_tserver_unresponsive_timeout_ms.");

METRIC_DEFINE_gauge_uint64(cluster, max_follower_heartbeat_delay, "The maximum heartbeat delay "
"among all master followers",
yb::MetricUnit::kMilliseconds,
"The number of milliseconds since the master leader has ACK'd a "
"heartbeat from all peers of the system catalog tablet. The master "
"leader does not ACK heartbeats from peers that have fallen behind due "
"to WAL GC so such peers will cause this metric to increase "
"indefinitely.");

METRIC_DEFINE_counter(cluster, create_table_too_many_tablets,
"How many CreateTable requests have failed due to too many tablets", yb::MetricUnit::kRequests,
"The number of CreateTable request errors due to attempting to create too many tablets.");
Expand Down Expand Up @@ -976,6 +985,9 @@ Status CatalogManager::Init() {
metric_create_table_too_many_tablets_ =
METRIC_create_table_too_many_tablets.Instantiate(master_->metric_entity_cluster());

metric_max_follower_heartbeat_delay_ =
METRIC_max_follower_heartbeat_delay.Instantiate(master_->metric_entity_cluster(), 0);

cdc_state_table_ = std::make_unique<cdc::CDCStateTable>(master_->cdc_state_client_future());

RETURN_NOT_OK(xcluster_manager_->Init());
Expand Down Expand Up @@ -11679,6 +11691,22 @@ void CatalogManager::ReportMetrics() {
auto num_servers = master_->ts_manager()->NumDescriptors();
metric_num_tablet_servers_dead_->set_value(
narrow_cast<uint32_t>(num_servers - num_live_servers));

// Report the max master follower heartbeat delay.
auto consensus_result = tablet_peer()->GetConsensus();
if (consensus_result) {
MonoTime earliest_time = MonoTime::kUninitialized;
for (const auto& last_communication_time :
(*consensus_result)->GetFollowerCommunicationTimes()) {
earliest_time.MakeAtMost(last_communication_time.last_successful_communication);
}
int64_t time_in_ms =
earliest_time ? MonoTime::Now().GetDeltaSince(earliest_time).ToMilliseconds() : 0;
if (time_in_ms < 0) {
time_in_ms = 0;
}
metric_max_follower_heartbeat_delay_->set_value(static_cast<uint64_t>(time_in_ms));
}
}

void CatalogManager::ResetMetrics() {
Expand Down
2 changes: 2 additions & 0 deletions src/yb/master/catalog_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -2379,6 +2379,8 @@ class CatalogManager : public tserver::TabletPeerLookupIf,

scoped_refptr<Counter> metric_create_table_too_many_tablets_;

scoped_refptr<AtomicGauge<uint64_t>> metric_max_follower_heartbeat_delay_;

friend class ClusterLoadBalancer;

// Policy for load balancing tablets on tablet servers.
Expand Down
6 changes: 6 additions & 0 deletions src/yb/util/monotime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,12 @@ void MonoTime::MakeAtLeast(MonoTime rhs) {
}
}

void MonoTime::MakeAtMost(MonoTime rhs) {
if (rhs.Initialized() && (!Initialized() || value_ > rhs.value_)) {
value_ = rhs.value_;
}
}

// ------------------------------------------------------------------------------------------------

std::string FormatForComparisonFailureMessage(const MonoDelta& op, const MonoDelta& other) {
Expand Down
2 changes: 2 additions & 0 deletions src/yb/util/monotime.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,8 @@ class MonoTime {
// Set this time to the given value if it is lower than that or uninitialized.
void MakeAtLeast(MonoTime rhs);

void MakeAtMost(MonoTime rhs);

TimePoint ToSteadyTimePoint() const {
return value_;
}
Expand Down

0 comments on commit 678d277

Please sign in to comment.