From fd137bf4a7a26248e05cc39f16b2802424d8767b Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 2 Feb 2024 17:51:54 +0800 Subject: [PATCH] feat(new_metrics): show disk-level resource usage by shell nodes command based on new metrics (#1889) --- src/common/fs_manager.cpp | 16 ++++---- src/replica/test/replica_disk_test_base.h | 9 ++-- src/shell/commands/node_management.cpp | 50 +++++++++++++++-------- src/utils/math.cpp | 1 - src/utils/math.h | 17 ++++++++ 5 files changed, 61 insertions(+), 32 deletions(-) diff --git a/src/common/fs_manager.cpp b/src/common/fs_manager.cpp index 88b1da6603..89487b0c63 100644 --- a/src/common/fs_manager.cpp +++ b/src/common/fs_manager.cpp @@ -28,7 +28,6 @@ #include // IWYU pragma: keep #include -#include #include #include @@ -41,6 +40,7 @@ #include "utils/fail_point.h" #include "utils/filesystem.h" #include "utils/fmt_logging.h" +#include "utils/math.h" #include "utils/ports.h" METRIC_DEFINE_entity(disk); @@ -171,17 +171,16 @@ void dir_node::update_disk_stat() disk_capacity_mb = dsi.capacity >> 20; disk_available_mb = dsi.available >> 20; - disk_available_ratio = static_cast( - disk_capacity_mb == 0 ? 0 : std::round(disk_available_mb * 100.0 / disk_capacity_mb)); + disk_available_ratio = dsn::utils::calc_percentage(disk_available_mb, disk_capacity_mb); METRIC_SET(disk_capacity, disk_capacity_total_mb, disk_capacity_mb); METRIC_SET(disk_capacity, disk_capacity_avail_mb, disk_available_mb); // It's able to change status from NORMAL to SPACE_INSUFFICIENT, and vice versa. - disk_status::type old_status = status; - auto new_status = disk_available_ratio < FLAGS_disk_min_available_space_ratio - ? disk_status::SPACE_INSUFFICIENT - : disk_status::NORMAL; + const disk_status::type old_status = status; + const auto new_status = disk_available_ratio < FLAGS_disk_min_available_space_ratio + ? disk_status::SPACE_INSUFFICIENT + : disk_status::NORMAL; if (old_status != new_status) { status = new_status; } @@ -389,8 +388,7 @@ void fs_manager::update_disk_stat() min_available_ratio = std::min(dn->disk_available_ratio, min_available_ratio); max_available_ratio = std::max(dn->disk_available_ratio, max_available_ratio); } - total_available_ratio = static_cast( - total_capacity_mb == 0 ? 0 : std::round(total_available_mb * 100.0 / total_capacity_mb)); + total_available_ratio = dsn::utils::calc_percentage(total_available_mb, total_capacity_mb); LOG_INFO("update disk space succeed: disk_count = {}, total_capacity_mb = {}, " "total_available_mb = {}, total_available_ratio = {}%, min_available_ratio = {}%, " diff --git a/src/replica/test/replica_disk_test_base.h b/src/replica/test/replica_disk_test_base.h index e20883cd76..37ed1de52d 100644 --- a/src/replica/test/replica_disk_test_base.h +++ b/src/replica/test/replica_disk_test_base.h @@ -20,6 +20,7 @@ #include #include "utils/fail_point.h" #include "utils/fmt_logging.h" +#include "utils/math.h" #include "replica/test/replica_test_base.h" @@ -159,12 +160,12 @@ class replica_disk_test_base : public replica_test_base void generate_mock_dir_nodes(int num) { - int64_t disk_capacity_mb = num * 100; + const int64_t disk_capacity_mb = num * 100; int count = 0; while (count++ < num) { - int64_t disk_available_mb = count * 50; - int disk_available_ratio = - static_cast(std::round((double)100 * disk_available_mb / disk_capacity_mb)); + const int64_t disk_available_mb = count * 50; + const auto disk_available_ratio = + dsn::utils::calc_percentage(disk_available_mb, disk_capacity_mb); // create one mock dir_node and make sure disk_capacity_mb_ > disk_available_mb_ dir_node *node_disk = new dir_node("tag_" + std::to_string(count), "./tag_" + std::to_string(count), diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index fee5346aac..b2b761a584 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -47,6 +47,7 @@ #include "utils/blob.h" #include "utils/error_code.h" #include "utils/errors.h" +#include "utils/math.h" #include "utils/metrics.h" #include "utils/output_utils.h" #include "utils/ports.h" @@ -98,11 +99,13 @@ dsn::metric_filters resource_usage_filters() { dsn::metric_filters filters; filters.with_metric_fields = {dsn::kMetricNameField, dsn::kMetricSingleValueField}; - filters.entity_types = {"server", "replica"}; + filters.entity_types = {"server", "replica", "disk"}; filters.entity_metrics = {"resident_mem_usage_mb", "rdb_block_cache_mem_usage_bytes", "rdb_memtable_mem_usage_bytes", - "rdb_index_and_filter_blocks_mem_usage_bytes"}; + "rdb_index_and_filter_blocks_mem_usage_bytes", + "disk_capacity_total_mb", + "disk_capacity_avail_mb"}; return filters; } @@ -117,24 +120,48 @@ dsn::error_s parse_resource_usage(const std::string &json_string, list_nodes_hel return FMT_ERR(dsn::ERR_INVALID_DATA, "invalid json string"); } + int64_t total_capacity_mb = 0; + int64_t total_available_mb = 0; + stat.disk_available_min_ratio = 100; for (const auto &entity : query_snapshot.entities) { - for (const auto &m : entity.metrics) { - if (entity.type == "server") { + if (entity.type == "server") { + for (const auto &m : entity.metrics) { if (m.name == "resident_mem_usage_mb") { stat.memused_res_mb += m.value; } else if (m.name == "rdb_block_cache_mem_usage_bytes") { stat.block_cache_bytes += m.value; } - } else if (entity.type == "replica") { + } + } else if (entity.type == "replica") { + for (const auto &m : entity.metrics) { if (m.name == "rdb_memtable_mem_usage_bytes") { stat.mem_tbl_bytes += m.value; } else if (m.name == "rdb_index_and_filter_blocks_mem_usage_bytes") { stat.mem_idx_bytes += m.value; } } + } else if (entity.type == "disk") { + int64_t capacity_mb = 0; + int64_t available_mb = 0; + for (const auto &m : entity.metrics) { + if (m.name == "disk_capacity_total_mb") { + total_capacity_mb += m.value; + capacity_mb = m.value; + } else if (m.name == "disk_capacity_avail_mb") { + total_available_mb += m.value; + available_mb = m.value; + } + } + + const auto available_ratio = dsn::utils::calc_percentage(available_mb, capacity_mb); + stat.disk_available_min_ratio = + std::min(stat.disk_available_min_ratio, available_ratio); } } + stat.disk_available_total_ratio = + dsn::utils::calc_percentage(total_available_mb, total_capacity_mb); + return dsn::error_s::ok(); } @@ -281,10 +308,6 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) const auto &results = get_metrics(nodes, resource_usage_filters().to_query_string()); - // TODO(wangdan): following replica-level and disk-level metrics would be replaced: - // "replica*eon.replica_stub*disk.available.total.ratio" - // "replica*eon.replica_stub*disk.available.min.ratio" - for (size_t i = 0; i < nodes.size(); ++i) { auto tmp_it = tmp_map.find(nodes[i].address); if (tmp_it == tmp_map.end()) { @@ -312,15 +335,6 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) << " failed: " << res << std::endl; return true; } - - // TODO(wangdan): after migrated to new metrics, remove following code: - dsn::perf_counter_info info; - for (dsn::perf_counter_metric &m : info.counters) { - if (m.name.find("disk.available.total.ratio") != std::string::npos) - stat.disk_available_total_ratio += m.value; - else if (m.name.find("disk.available.min.ratio") != std::string::npos) - stat.disk_available_min_ratio += m.value; - } } } diff --git a/src/utils/math.cpp b/src/utils/math.cpp index e6394ca0e0..783a8a786d 100644 --- a/src/utils/math.cpp +++ b/src/utils/math.cpp @@ -17,7 +17,6 @@ #include "math.h" -#include #include #include diff --git a/src/utils/math.h b/src/utils/math.h index d79f21073b..351dab86d0 100644 --- a/src/utils/math.h +++ b/src/utils/math.h @@ -18,12 +18,29 @@ #pragma once #include +#include #include +#include + +#include "utils/ports.h" namespace dsn { namespace utils { double mean_stddev(const std::vector &result_set, bool partial_sample); +template ::value>::type, + typename = typename std::enable_if::value>::type> +TOutput calc_percentage(TInput numerator, TInput denominator) +{ + if (dsn_unlikely(denominator == 0)) { + return static_cast(0); + } + + return static_cast(std::round(numerator * 100.0 / denominator)); +} + } // namespace utils } // namespace dsn