Skip to content

Commit

Permalink
feat(new_metrics): show disk-level resource usage by shell nodes comm…
Browse files Browse the repository at this point in the history
…and based on new metrics (apache#1889)
  • Loading branch information
empiredan authored Feb 2, 2024
1 parent 41cba64 commit fd137bf
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 32 deletions.
16 changes: 7 additions & 9 deletions src/common/fs_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

#include <fmt/std.h> // IWYU pragma: keep
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <utility>

Expand All @@ -41,6 +40,7 @@
#include "utils/fail_point.h"
#include "utils/filesystem.h"
#include "utils/fmt_logging.h"
#include "utils/math.h"
#include "utils/ports.h"

METRIC_DEFINE_entity(disk);
Expand Down Expand Up @@ -171,17 +171,16 @@ void dir_node::update_disk_stat()

disk_capacity_mb = dsi.capacity >> 20;
disk_available_mb = dsi.available >> 20;
disk_available_ratio = static_cast<int>(
disk_capacity_mb == 0 ? 0 : std::round(disk_available_mb * 100.0 / disk_capacity_mb));
disk_available_ratio = dsn::utils::calc_percentage<int>(disk_available_mb, disk_capacity_mb);

METRIC_SET(disk_capacity, disk_capacity_total_mb, disk_capacity_mb);
METRIC_SET(disk_capacity, disk_capacity_avail_mb, disk_available_mb);

// It's able to change status from NORMAL to SPACE_INSUFFICIENT, and vice versa.
disk_status::type old_status = status;
auto new_status = disk_available_ratio < FLAGS_disk_min_available_space_ratio
? disk_status::SPACE_INSUFFICIENT
: disk_status::NORMAL;
const disk_status::type old_status = status;
const auto new_status = disk_available_ratio < FLAGS_disk_min_available_space_ratio
? disk_status::SPACE_INSUFFICIENT
: disk_status::NORMAL;
if (old_status != new_status) {
status = new_status;
}
Expand Down Expand Up @@ -389,8 +388,7 @@ void fs_manager::update_disk_stat()
min_available_ratio = std::min(dn->disk_available_ratio, min_available_ratio);
max_available_ratio = std::max(dn->disk_available_ratio, max_available_ratio);
}
total_available_ratio = static_cast<int>(
total_capacity_mb == 0 ? 0 : std::round(total_available_mb * 100.0 / total_capacity_mb));
total_available_ratio = dsn::utils::calc_percentage<int>(total_available_mb, total_capacity_mb);

LOG_INFO("update disk space succeed: disk_count = {}, total_capacity_mb = {}, "
"total_available_mb = {}, total_available_ratio = {}%, min_available_ratio = {}%, "
Expand Down
9 changes: 5 additions & 4 deletions src/replica/test/replica_disk_test_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <gtest/gtest.h>
#include "utils/fail_point.h"
#include "utils/fmt_logging.h"
#include "utils/math.h"

#include "replica/test/replica_test_base.h"

Expand Down Expand Up @@ -159,12 +160,12 @@ class replica_disk_test_base : public replica_test_base

void generate_mock_dir_nodes(int num)
{
int64_t disk_capacity_mb = num * 100;
const int64_t disk_capacity_mb = num * 100;
int count = 0;
while (count++ < num) {
int64_t disk_available_mb = count * 50;
int disk_available_ratio =
static_cast<int>(std::round((double)100 * disk_available_mb / disk_capacity_mb));
const int64_t disk_available_mb = count * 50;
const auto disk_available_ratio =
dsn::utils::calc_percentage<int>(disk_available_mb, disk_capacity_mb);
// create one mock dir_node and make sure disk_capacity_mb_ > disk_available_mb_
dir_node *node_disk = new dir_node("tag_" + std::to_string(count),
"./tag_" + std::to_string(count),
Expand Down
50 changes: 32 additions & 18 deletions src/shell/commands/node_management.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include "utils/blob.h"
#include "utils/error_code.h"
#include "utils/errors.h"
#include "utils/math.h"
#include "utils/metrics.h"
#include "utils/output_utils.h"
#include "utils/ports.h"
Expand Down Expand Up @@ -98,11 +99,13 @@ dsn::metric_filters resource_usage_filters()
{
dsn::metric_filters filters;
filters.with_metric_fields = {dsn::kMetricNameField, dsn::kMetricSingleValueField};
filters.entity_types = {"server", "replica"};
filters.entity_types = {"server", "replica", "disk"};
filters.entity_metrics = {"resident_mem_usage_mb",
"rdb_block_cache_mem_usage_bytes",
"rdb_memtable_mem_usage_bytes",
"rdb_index_and_filter_blocks_mem_usage_bytes"};
"rdb_index_and_filter_blocks_mem_usage_bytes",
"disk_capacity_total_mb",
"disk_capacity_avail_mb"};
return filters;
}

Expand All @@ -117,24 +120,48 @@ dsn::error_s parse_resource_usage(const std::string &json_string, list_nodes_hel
return FMT_ERR(dsn::ERR_INVALID_DATA, "invalid json string");
}

int64_t total_capacity_mb = 0;
int64_t total_available_mb = 0;
stat.disk_available_min_ratio = 100;
for (const auto &entity : query_snapshot.entities) {
for (const auto &m : entity.metrics) {
if (entity.type == "server") {
if (entity.type == "server") {
for (const auto &m : entity.metrics) {
if (m.name == "resident_mem_usage_mb") {
stat.memused_res_mb += m.value;
} else if (m.name == "rdb_block_cache_mem_usage_bytes") {
stat.block_cache_bytes += m.value;
}
} else if (entity.type == "replica") {
}
} else if (entity.type == "replica") {
for (const auto &m : entity.metrics) {
if (m.name == "rdb_memtable_mem_usage_bytes") {
stat.mem_tbl_bytes += m.value;
} else if (m.name == "rdb_index_and_filter_blocks_mem_usage_bytes") {
stat.mem_idx_bytes += m.value;
}
}
} else if (entity.type == "disk") {
int64_t capacity_mb = 0;
int64_t available_mb = 0;
for (const auto &m : entity.metrics) {
if (m.name == "disk_capacity_total_mb") {
total_capacity_mb += m.value;
capacity_mb = m.value;
} else if (m.name == "disk_capacity_avail_mb") {
total_available_mb += m.value;
available_mb = m.value;
}
}

const auto available_ratio = dsn::utils::calc_percentage(available_mb, capacity_mb);
stat.disk_available_min_ratio =
std::min(stat.disk_available_min_ratio, available_ratio);
}
}

stat.disk_available_total_ratio =
dsn::utils::calc_percentage(total_available_mb, total_capacity_mb);

return dsn::error_s::ok();
}

Expand Down Expand Up @@ -281,10 +308,6 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args)

const auto &results = get_metrics(nodes, resource_usage_filters().to_query_string());

// TODO(wangdan): following replica-level and disk-level metrics would be replaced:
// "replica*eon.replica_stub*disk.available.total.ratio"
// "replica*eon.replica_stub*disk.available.min.ratio"

for (size_t i = 0; i < nodes.size(); ++i) {
auto tmp_it = tmp_map.find(nodes[i].address);
if (tmp_it == tmp_map.end()) {
Expand Down Expand Up @@ -312,15 +335,6 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args)
<< " failed: " << res << std::endl;
return true;
}

// TODO(wangdan): after migrated to new metrics, remove following code:
dsn::perf_counter_info info;
for (dsn::perf_counter_metric &m : info.counters) {
if (m.name.find("disk.available.total.ratio") != std::string::npos)
stat.disk_available_total_ratio += m.value;
else if (m.name.find("disk.available.min.ratio") != std::string::npos)
stat.disk_available_min_ratio += m.value;
}
}
}

Expand Down
1 change: 0 additions & 1 deletion src/utils/math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

#include "math.h"

#include <math.h>
#include <algorithm>
#include <numeric>

Expand Down
17 changes: 17 additions & 0 deletions src/utils/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,29 @@
#pragma once

#include <vector>
#include <cmath>
#include <cstdint>
#include <type_traits>

#include "utils/ports.h"

namespace dsn {
namespace utils {

double mean_stddev(const std::vector<uint32_t> &result_set, bool partial_sample);

template <typename TOutput = int64_t,
typename TInput = int64_t,
typename = typename std::enable_if<std::is_arithmetic<TOutput>::value>::type,
typename = typename std::enable_if<std::is_arithmetic<TInput>::value>::type>
TOutput calc_percentage(TInput numerator, TInput denominator)
{
if (dsn_unlikely(denominator == 0)) {
return static_cast<TOutput>(0);
}

return static_cast<TOutput>(std::round(numerator * 100.0 / denominator));
}

} // namespace utils
} // namespace dsn

0 comments on commit fd137bf

Please sign in to comment.