Skip to content

Commit

Permalink
[yugabyte#6772] Display tserver clock information in yb-master UI
Browse files Browse the repository at this point in the history
Display tserver clock information in yb-master UI

PR yugabyte#6776
  • Loading branch information
VishnuK007 authored and Alex Ball committed Mar 9, 2021
1 parent b256cf5 commit 5e6c317
Show file tree
Hide file tree
Showing 15 changed files with 164 additions and 45 deletions.
6 changes: 3 additions & 3 deletions src/yb/client/snapshot-txn-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ std::thread RandomClockSkewWalkThread(MiniCluster* cluster, std::atomic<bool>* s
auto* tserver = cluster->mini_tablet_server(i)->server();
auto* hybrid_clock = down_cast<server::HybridClock*>(tserver->clock());
auto skewed_clock =
std::static_pointer_cast<server::SkewedClock>(hybrid_clock->TEST_clock());
std::static_pointer_cast<server::SkewedClock>(hybrid_clock->physical_clock());
auto shift = RandomUniformInt(-10, 10);
std::chrono::milliseconds change(1 << std::abs(shift));
if (shift < 0) {
Expand Down Expand Up @@ -183,7 +183,7 @@ std::thread StrobeThread(MiniCluster* cluster, std::atomic<bool>* stop) {
auto* tserver = cluster->mini_tablet_server(i)->server();
auto* hybrid_clock = down_cast<server::HybridClock*>(tserver->clock());
auto skewed_clock =
std::static_pointer_cast<server::SkewedClock>(hybrid_clock->TEST_clock());
std::static_pointer_cast<server::SkewedClock>(hybrid_clock->physical_clock());
server::SkewedClock::DeltaTime time_delta;
if (iteration & 1) {
time_delta = server::SkewedClock::DeltaTime();
Expand Down Expand Up @@ -254,7 +254,7 @@ void SnapshotTxnTest::TestBankAccounts(BankAccountsOptions options, CoarseDurati
auto* tserver = cluster_->mini_tablet_server(0)->server();
auto* hybrid_clock = down_cast<server::HybridClock*>(tserver->clock());
auto skewed_clock =
std::static_pointer_cast<server::SkewedClock>(hybrid_clock->TEST_clock());
std::static_pointer_cast<server::SkewedClock>(hybrid_clock->physical_clock());
auto old_delta = skewed_clock->SetDelta(duration);
std::this_thread::sleep_for(1s);
skewed_clock->SetDelta(old_delta);
Expand Down
2 changes: 1 addition & 1 deletion src/yb/integration-tests/master_path_handlers-itest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class MasterPathHandlersItest : public YBMiniClusterTestBase<MiniCluster> {
ASSERT_OK(cluster_->Start());

Endpoint master_http_endpoint = cluster_->leader_mini_master()->bound_http_addr();
master_http_url_ = "http://" + ToString(master_http_endpoint);
master_http_url_ = "http://" + AsString(master_http_endpoint);
}

void DoTearDown() override {
Expand Down
3 changes: 2 additions & 1 deletion src/yb/integration-tests/mini_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,8 @@ std::vector<server::SkewedClockDeltaChanger> SkewClocks(
auto* tserver = cluster->mini_tablet_server(i)->server();
auto* hybrid_clock = down_cast<server::HybridClock*>(tserver->clock());
delta_changers.emplace_back(
i * clock_skew, std::static_pointer_cast<server::SkewedClock>(hybrid_clock->TEST_clock()));
i * clock_skew, std::static_pointer_cast<server::SkewedClock>(
hybrid_clock->physical_clock()));
}
return delta_changers;
}
Expand Down
117 changes: 82 additions & 35 deletions src/yb/master/master-path-handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <sstream>
#include <unordered_set>

#include "yb/common/hybrid_time.h"
#include "yb/common/partition.h"
#include "yb/common/schema.h"
#include "yb/consensus/consensus.pb.h"
Expand All @@ -60,6 +61,7 @@
#include "yb/server/webui_util.h"
#include "yb/util/curl_util.h"
#include "yb/util/string_case.h"
#include "yb/util/timestamp.h"
#include "yb/util/url-coding.h"
#include "yb/util/version_info.h"
#include "yb/util/version_info.pb.h"
Expand Down Expand Up @@ -213,25 +215,38 @@ void MasterPathHandlers::CallIfLeaderOrPrintRedirect(
}
}

inline void MasterPathHandlers::TServerTable(std::stringstream* output) {
inline void MasterPathHandlers::TServerTable(std::stringstream* output,
TServersViewType viewType) {
*output << "<table class='table table-striped'>\n";
*output << " <tr>\n"
<< " <th>Server</th>\n"
<< " <th>Time since </br>heartbeat</th>\n"
<< " <th>Status & Uptime</th>\n"
<< " <th>User Tablet-Peers / Leaders</th>\n"
<< " <th>RAM Used</th>\n"
<< " <th>Num SST Files</th>\n"
<< " <th>Total SST Files Size</th>\n"
<< " <th>Uncompressed SST </br>Files Size</th>\n"
<< " <th>Read ops/sec</th>\n"
<< " <th>Write ops/sec</th>\n"
<< " <th>Cloud</th>\n"
<< " <th>Status & Uptime</th>\n";

if (viewType == TServersViewType::kTServersClocksView) {
*output << " <th>Physical Time (UTC)</th>\n"
<< " <th>Hybrid Time (UTC)</th>\n";
} else {
DCHECK_EQ(viewType, TServersViewType::kTServersDefaultView);
*output << " <th>User Tablet-Peers / Leaders</th>\n"
<< " <th>RAM Used</th>\n"
<< " <th>Num SST Files</th>\n"
<< " <th>Total SST Files Size</th>\n"
<< " <th>Uncompressed SST </br>Files Size</th>\n"
<< " <th>Read ops/sec</th>\n"
<< " <th>Write ops/sec</th>\n";
}

*output << " <th>Cloud</th>\n"
<< " <th>Region</th>\n"
<< " <th>Zone</th>\n"
<< " <th>System Tablet-Peers / Leaders</th>\n"
<< " <th>Active Tablet-Peers</th>\n"
<< " </tr>\n";
<< " <th>Zone</th>\n";

if (viewType == TServersViewType::kTServersDefaultView) {
*output << " <th>System Tablet-Peers / Leaders</th>\n"
<< " <th>Active Tablet-Peers</th>\n";
}

*output << " </tr>\n";
}

namespace {
Expand Down Expand Up @@ -311,7 +326,8 @@ void MasterPathHandlers::TServerDisplay(const std::string& current_uuid,
std::vector<std::shared_ptr<TSDescriptor>>* descs,
TabletCountMap* tablet_map,
std::stringstream* output,
const int hide_dead_node_threshold_mins) {
const int hide_dead_node_threshold_mins,
TServersViewType viewType) {
// Copy vector to avoid changes to the reference descs passed
std::vector<std::shared_ptr<TSDescriptor>> local_descs(*descs);

Expand Down Expand Up @@ -339,22 +355,44 @@ void MasterPathHandlers::TServerDisplay(const std::string& current_uuid,

auto tserver = tablet_map->find(desc->permanent_uuid());
bool no_tablets = tserver == tablet_map->end();
*output << " <td>" << (no_tablets ? 0
: tserver->second.user_tablet_leaders + tserver->second.user_tablet_followers)
<< " / " << (no_tablets ? 0 : tserver->second.user_tablet_leaders) << "</td>";
*output << " <td>" << HumanizeBytes(desc->total_memory_usage()) << "</td>";
*output << " <td>" << desc->num_sst_files() << "</td>";
*output << " <td>" << HumanizeBytes(desc->total_sst_file_size()) << "</td>";
*output << " <td>" << HumanizeBytes(desc->uncompressed_sst_file_size()) << "</td>";
*output << " <td>" << desc->read_ops_per_sec() << "</td>";
*output << " <td>" << desc->write_ops_per_sec() << "</td>";

if (viewType == TServersViewType::kTServersClocksView) {
// Render physical time.
const Timestamp p_ts(desc->physical_time());
*output << " <td>" << p_ts.ToHumanReadableTime() << "</td>";

// Render the physical and logical components of the hybrid time.
const HybridTime ht = desc->hybrid_time();
const Timestamp h_ts(ht.GetPhysicalValueMicros());
*output << " <td>" << h_ts.ToHumanReadableTime();
if (ht.GetLogicalValue()) {
*output << " / Logical: " << ht.GetLogicalValue();
}
*output << "</td>";
} else {
DCHECK_EQ(viewType, TServersViewType::kTServersDefaultView);
*output << " <td>" << (no_tablets ? 0
: tserver->second.user_tablet_leaders + tserver->second.user_tablet_followers)
<< " / " << (no_tablets ? 0 : tserver->second.user_tablet_leaders) << "</td>";
*output << " <td>" << HumanizeBytes(desc->total_memory_usage()) << "</td>";
*output << " <td>" << desc->num_sst_files() << "</td>";
*output << " <td>" << HumanizeBytes(desc->total_sst_file_size()) << "</td>";
*output << " <td>" << HumanizeBytes(desc->uncompressed_sst_file_size()) << "</td>";
*output << " <td>" << desc->read_ops_per_sec() << "</td>";
*output << " <td>" << desc->write_ops_per_sec() << "</td>";
}

*output << " <td>" << reg.common().cloud_info().placement_cloud() << "</td>";
*output << " <td>" << reg.common().cloud_info().placement_region() << "</td>";
*output << " <td>" << reg.common().cloud_info().placement_zone() << "</td>";
*output << " <td>" << (no_tablets ? 0
: tserver->second.system_tablet_leaders + tserver->second.system_tablet_followers)
<< " / " << (no_tablets ? 0 : tserver->second.system_tablet_leaders) << "</td>";
*output << " <td>" << (no_tablets ? 0 : desc->num_live_replicas()) << "</td>";

if (viewType == TServersViewType::kTServersDefaultView) {
*output << " <td>" << (no_tablets ? 0
: tserver->second.system_tablet_leaders + tserver->second.system_tablet_followers)
<< " / " << (no_tablets ? 0 : tserver->second.system_tablet_leaders) << "</td>";
*output << " <td>" << (no_tablets ? 0 : desc->num_live_replicas()) << "</td>";
}

*output << " </tr>\n";
}
}
Expand Down Expand Up @@ -479,7 +517,8 @@ MasterPathHandlers::ZoneTabletCounts::CloudTree MasterPathHandlers::CalculateTab
}

void MasterPathHandlers::HandleTabletServers(const Webserver::WebRequest& req,
Webserver::WebResponse* resp) {
Webserver::WebResponse* resp,
TServersViewType viewType) {
std::stringstream *output = &resp->output;
master_->catalog_manager()->AssertLeaderLockAcquiredForReading();

Expand Down Expand Up @@ -521,15 +560,16 @@ void MasterPathHandlers::HandleTabletServers(const Webserver::WebRequest& req,
<< live_id << "</h3>\n";
}

TServerTable(output);
TServerDisplay(live_id, &descs, &tablet_map, output, hide_dead_node_threshold_override);
TServerTable(output, viewType);
TServerDisplay(live_id, &descs, &tablet_map, output, hide_dead_node_threshold_override,
viewType);

for (const auto& read_replica_uuid : read_replica_uuids) {
*output << "<h3 style=\"color:" << kYBDarkBlue << "\">Read Replica UUID: "
<< (read_replica_uuid.empty() ? kNoPlacementUUID : read_replica_uuid) << "</h3>\n";
TServerTable(output);
TServerDisplay(
read_replica_uuid, &descs, &tablet_map, output, hide_dead_node_threshold_override);
TServerTable(output, viewType);
TServerDisplay(read_replica_uuid, &descs, &tablet_map, output,
hide_dead_node_threshold_override, viewType);
}

ZoneTabletCounts::CloudTree counts_tree = CalculateTabletCountsTree(descs, tablet_map);
Expand Down Expand Up @@ -1782,11 +1822,18 @@ Status MasterPathHandlers::Register(Webserver* server) {
"/", "Home", std::bind(&MasterPathHandlers::RootHandler, this, _1, _2), is_styled,
is_on_nav_bar, "fa fa-home");
Webserver::PathHandlerCallback cb =
std::bind(&MasterPathHandlers::HandleTabletServers, this, _1, _2);
std::bind(&MasterPathHandlers::HandleTabletServers, this, _1, _2,
TServersViewType::kTServersDefaultView);
server->RegisterPathHandler(
"/tablet-servers", "Tablet Servers",
std::bind(&MasterPathHandlers::CallIfLeaderOrPrintRedirect, this, _1, _2, cb), is_styled,
is_on_nav_bar, "fa fa-server");
cb = std::bind(&MasterPathHandlers::HandleTabletServers, this, _1, _2,
TServersViewType::kTServersClocksView);
server->RegisterPathHandler(
"/tablet-server-clocks", "Tablet Server Clocks",
std::bind(&MasterPathHandlers::CallIfLeaderOrPrintRedirect, this, _1, _2, cb), is_styled,
false /* is_on_nav_bar */);
cb = std::bind(&MasterPathHandlers::HandleCatalogManager,
this, _1, _2, false /* only_user_tables */);
server->RegisterPathHandler(
Expand Down
11 changes: 8 additions & 3 deletions src/yb/master/master-path-handlers.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "yb/master/catalog_entity_info.h"
#include "yb/master/catalog_manager.h"
#include "yb/server/webserver.h"
#include "yb/util/enums.h"

namespace yb {

Expand All @@ -56,6 +57,8 @@ struct TabletReplica;
class TSDescriptor;
class TSRegistrationPB;

YB_DEFINE_ENUM(TServersViewType, (kTServersDefaultView)(kTServersClocksView));

// Web page support for the master.
class MasterPathHandlers {
public:
Expand Down Expand Up @@ -120,13 +123,14 @@ class MasterPathHandlers {

const string kNoPlacementUUID = "NONE";

static inline void TServerTable(std::stringstream* output);
static inline void TServerTable(std::stringstream* output, TServersViewType viewType);

void TServerDisplay(const std::string& current_uuid,
std::vector<std::shared_ptr<TSDescriptor>>* descs,
TabletCountMap* tmap,
std::stringstream* output,
const int hide_dead_node_threshold_override);
const int hide_dead_node_threshold_override,
TServersViewType viewType);

// Outputs a ZoneTabletCounts::CloudTree as an html table with a heading.
static void DisplayTabletZonesTable(
Expand All @@ -148,7 +152,8 @@ class MasterPathHandlers {
void RootHandler(const Webserver::WebRequest& req,
Webserver::WebResponse* resp);
void HandleTabletServers(const Webserver::WebRequest& req,
Webserver::WebResponse* resp);
Webserver::WebResponse* resp,
TServersViewType viewType);
void HandleCatalogManager(const Webserver::WebRequest& req,
Webserver::WebResponse* resp,
bool only_user_tables = false);
Expand Down
1 change: 1 addition & 0 deletions src/yb/master/master.cc
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ void Master::DisplayGeneralInfoIcons(std::stringstream* output) {
// Tasks.
DisplayIconTile(output, "fa-check", "Tasks", "/tasks");
DisplayIconTile(output, "fa-clone", "Replica Info", "/tablet-replication");
DisplayIconTile(output, "fa-check", "TServer Clocks", "/tablet-server-clocks");
}

Status Master::StartAsync() {
Expand Down
6 changes: 6 additions & 0 deletions src/yb/master/master.proto
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,12 @@ message TSHeartbeatRequestPB {

// List of candidate tablets for split based on tablet splitting strategy and settings.
repeated TabletForSplitPB tablets_for_split = 9;

// Physical time on tablet server
optional fixed64 ts_physical_time = 10;

// Hybrid time on tablet server
optional fixed64 ts_hybrid_time = 11;
}

message TSHeartbeatResponsePB {
Expand Down
2 changes: 2 additions & 0 deletions src/yb/master/master_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ void MasterServiceImpl::TSHeartbeat(const TSHeartbeatRequestPB* req,
ts_desc->UpdateHeartbeatTime();
ts_desc->set_num_live_replicas(req->num_live_tablets());
ts_desc->set_leader_count(req->leader_count());
ts_desc->set_physical_time(req->ts_physical_time());
ts_desc->set_hybrid_time(HybridTime::FromPB(req->ts_hybrid_time()));

// Adjust the table report limit per heartbeat so this can be dynamically changed.
if (ts_desc->HasCapability(CAPABILITY_TabletReportLimit)) {
Expand Down
26 changes: 26 additions & 0 deletions src/yb/master/ts_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <mutex>
#include <string>

#include "yb/common/hybrid_time.h"
#include "yb/gutil/gscoped_ptr.h"

#include "yb/master/master_fwd.h"
Expand All @@ -49,6 +50,7 @@
#include "yb/util/capabilities.h"
#include "yb/util/locks.h"
#include "yb/util/monotime.h"
#include "yb/util/physical_time.h"
#include "yb/util/status.h"
#include "yb/util/shared_ptr_tuple.h"
#include "yb/util/shared_lock.h"
Expand Down Expand Up @@ -177,6 +179,26 @@ class TSDescriptor {
return leader_count_;
}

void set_physical_time(MicrosTime physical_time) {
std::lock_guard<decltype(lock_)> l(lock_);
physical_time_ = physical_time;
}

MicrosTime physical_time() const {
SharedLock<decltype(lock_)> l(lock_);
return physical_time_;
}

void set_hybrid_time(HybridTime hybrid_time) {
std::lock_guard<decltype(lock_)> l(lock_);
hybrid_time_ = hybrid_time;
}

HybridTime hybrid_time() const {
SharedLock<decltype(lock_)> l(lock_);
return hybrid_time_;
}

void set_total_memory_usage(uint64_t total_memory_usage) {
std::lock_guard<decltype(lock_)> l(lock_);
ts_metrics_.total_memory_usage = total_memory_usage;
Expand Down Expand Up @@ -337,6 +359,10 @@ class TSDescriptor {
// The last time a heartbeat was received for this node.
MonoTime last_heartbeat_;

// The physical and hybrid times on this node at the time of heartbeat
MicrosTime physical_time_;
HybridTime hybrid_time_;

// Set to true once this instance has reported all of its tablets.
bool has_tablet_report_;

Expand Down
2 changes: 1 addition & 1 deletion src/yb/server/hybrid_clock.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ class HybridClock : public Clock {
// Enables check whether clock skew within configured bounds.
static void EnableClockSkewControl();

const PhysicalClockPtr& TEST_clock() { return clock_; }
const PhysicalClockPtr& physical_clock() { return clock_; }

private:
enum State {
Expand Down
17 changes: 17 additions & 0 deletions src/yb/tserver/heartbeater.cc
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,23 @@ Status Heartbeater::Thread::TryHeartbeat() {
req.set_config_index(server_->GetCurrentMasterIndex());
req.set_cluster_config_version(server_->cluster_config_version());

// Include the hybrid time of this tablet server in the heartbeat.
auto* hybrid_clock = dynamic_cast<server::HybridClock*>(server_->Clock());
if (hybrid_clock) {
req.set_ts_hybrid_time(hybrid_clock->Now().ToUint64());
// Also include the physical clock time of this tablet server in the heartbeat.
Result<PhysicalTime> now = hybrid_clock->physical_clock()->Now();
if (!now.ok()) {
YB_LOG_EVERY_N_SECS(WARNING, 10) << "Failed to read clock: " << now.status();
req.set_ts_physical_time(0);
} else {
req.set_ts_physical_time(now->time_point);
}
} else {
req.set_ts_hybrid_time(0);
req.set_ts_physical_time(0);
}

{
VLOG_WITH_PREFIX(2) << "Sending heartbeat:\n" << req.DebugString();
master::TSHeartbeatResponsePB resp;
Expand Down
Loading

0 comments on commit 5e6c317

Please sign in to comment.