From c99cfd64284dcaa24f3be730f478fdb70ec2703a Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 23 Aug 2024 15:54:56 +0800 Subject: [PATCH] feat(new_metrics): support `server_stat` command showing some important server-level metrics (part 1) (#2085) As the 1st part that support `server_stat` command, both built-in metrics, the usage of virtual and physical memory would be shown. --- .clang-tidy | 2 +- .github/workflows/module_labeler_conf.yml | 3 +- build_tools/clang_tidy.py | 2 +- src/shell/command_helper.h | 102 ++++++++--- src/shell/commands/data_operations.cpp | 3 +- src/shell/commands/node_management.cpp | 207 +++++++++++++++++++--- src/shell/commands/table_management.cpp | 22 +-- src/shell/main.cpp | 11 +- src/utils/error_code.h | 2 + src/utils/metrics.h | 90 ++++++++-- 10 files changed, 364 insertions(+), 80 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 2e072d7d84..1cdfca281d 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -18,7 +18,7 @@ # https://releases.llvm.org/14.0.0/tools/clang/tools/extra/docs/clang-tidy/index.html CheckOptions: [] -Checks: 'abseil-*,boost-*,bugprone-*,cert-*,clang-analyzer-*,concurrency-*,cppcoreguidelines-*,darwin-*,fuchsia-*,google-*,hicpp-*,linuxkernel-*,llvm-*,misc-*,modernize-*,performance-*,portability-*,readability-*,-cppcoreguidelines-pro-type-union-access,-llvm-include-order,-modernize-use-trailing-return-type,-cppcoreguidelines-avoid-non-const-global-variables,-fuchsia-statically-constructed-objects,-fuchsia-overloaded-operator,-bugprone-easily-swappable-parameters,-cppcoreguidelines-non-private-member-variables-in-classes,-misc-non-private-member-variables-in-classes,-cppcoreguidelines-pro-bounds-array-to-pointer-decay,-hicpp-no-array-decay,-hicpp-named-parameter,-readability-named-parameter,-cppcoreguidelines-pro-bounds-pointer-arithmetic,-readability-function-cognitive-complexity,-cert-err58-cpp,-cppcoreguidelines-avoid-c-arrays,-hicpp-avoid-c-arrays,-modernize-avoid-c-arrays,-cppcoreguidelines-owning-memory,-cppcoreguidelines-pro-type-const-cast,-readability-identifier-length,-fuchsia-default-arguments-calls,-google-readability-avoid-underscore-in-googletest-name,-cppcoreguidelines-avoid-magic-numbers,-readability-magic-numbers' +Checks: 'abseil-*,boost-*,bugprone-*,cert-*,clang-analyzer-*,concurrency-*,cppcoreguidelines-*,darwin-*,fuchsia-*,google-*,hicpp-*,linuxkernel-*,llvm-*,misc-*,modernize-*,performance-*,portability-*,readability-*,-cppcoreguidelines-pro-type-union-access,-llvm-include-order,-misc-definitions-in-headers,-modernize-use-trailing-return-type,-cppcoreguidelines-macro-usage,-modernize-replace-disallow-copy-and-assign-macro,-bugprone-macro-parentheses,-cppcoreguidelines-avoid-non-const-global-variables,-fuchsia-statically-constructed-objects,-fuchsia-overloaded-operator,-bugprone-easily-swappable-parameters,-cppcoreguidelines-non-private-member-variables-in-classes,-misc-non-private-member-variables-in-classes,-cppcoreguidelines-pro-bounds-array-to-pointer-decay,-hicpp-no-array-decay,-hicpp-named-parameter,-readability-named-parameter,-cppcoreguidelines-pro-bounds-pointer-arithmetic,-readability-function-cognitive-complexity,-cert-err58-cpp,-cppcoreguidelines-avoid-c-arrays,-hicpp-avoid-c-arrays,-modernize-avoid-c-arrays,-cppcoreguidelines-owning-memory,-cppcoreguidelines-pro-type-const-cast,-concurrency-mt-unsafe,-readability-identifier-length,-fuchsia-default-arguments-calls,-google-readability-avoid-underscore-in-googletest-name,-cppcoreguidelines-avoid-magic-numbers,-readability-magic-numbers' ExtraArgs: ExtraArgsBefore: [] FormatStyle: none diff --git a/.github/workflows/module_labeler_conf.yml b/.github/workflows/module_labeler_conf.yml index da0ecf3905..5994abb568 100644 --- a/.github/workflows/module_labeler_conf.yml +++ b/.github/workflows/module_labeler_conf.yml @@ -16,7 +16,8 @@ # under the License. --- permissions: - contents: write + contents: 'write' + pull-requests: 'write' github: - .github/**/* diff --git a/build_tools/clang_tidy.py b/build_tools/clang_tidy.py index d4c8964a0c..ed4f4d52a9 100755 --- a/build_tools/clang_tidy.py +++ b/build_tools/clang_tidy.py @@ -60,7 +60,7 @@ def tidy_on_path(path): "clang-tidy", "-p0", "-path", BUILD_PATH, - "-checks=-cppcoreguidelines-pro-type-union-access,-llvm-include-order,-modernize-use-trailing-return-type,-cppcoreguidelines-avoid-non-const-global-variables,-fuchsia-statically-constructed-objects,-fuchsia-overloaded-operator,-bugprone-easily-swappable-parameters,-cppcoreguidelines-non-private-member-variables-in-classes,-misc-non-private-member-variables-in-classes,-cppcoreguidelines-pro-bounds-array-to-pointer-decay,-hicpp-no-array-decay,-hicpp-named-parameter,-readability-named-parameter,-cppcoreguidelines-pro-bounds-pointer-arithmetic,-readability-function-cognitive-complexity,-cert-err58-cpp,-cppcoreguidelines-avoid-c-arrays,-hicpp-avoid-c-arrays,-modernize-avoid-c-arrays,-cppcoreguidelines-owning-memory,-cppcoreguidelines-pro-type-const-cast,-readability-identifier-length,-fuchsia-default-arguments-calls,-google-readability-avoid-underscore-in-googletest-name,-cppcoreguidelines-avoid-magic-numbers,-readability-magic-numbers", + "-checks=-cppcoreguidelines-pro-type-union-access,-llvm-include-order,-misc-definitions-in-headers,-modernize-use-trailing-return-type,-cppcoreguidelines-macro-usage,-modernize-replace-disallow-copy-and-assign-macro,-bugprone-macro-parentheses,-cppcoreguidelines-avoid-non-const-global-variables,-fuchsia-statically-constructed-objects,-fuchsia-overloaded-operator,-bugprone-easily-swappable-parameters,-cppcoreguidelines-non-private-member-variables-in-classes,-misc-non-private-member-variables-in-classes,-cppcoreguidelines-pro-bounds-array-to-pointer-decay,-hicpp-no-array-decay,-hicpp-named-parameter,-readability-named-parameter,-cppcoreguidelines-pro-bounds-pointer-arithmetic,-readability-function-cognitive-complexity,-cert-err58-cpp,-cppcoreguidelines-avoid-c-arrays,-hicpp-avoid-c-arrays,-modernize-avoid-c-arrays,-cppcoreguidelines-owning-memory,-cppcoreguidelines-pro-type-const-cast,-concurrency-mt-unsafe,-readability-identifier-length,-fuchsia-default-arguments-calls,-google-readability-avoid-underscore-in-googletest-name,-cppcoreguidelines-avoid-magic-numbers,-readability-magic-numbers", "-extra-arg=-language=c++", "-extra-arg=-std=c++17", "-extra-arg=-Ithirdparty/output/include"] diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 912fae9172..3e3b11ca45 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -709,30 +709,67 @@ inline std::vector get_metrics(const std::vector &n return results; } +// Adapt the result returned by `get_metrics` into the structure that could be processed by +// `remote_command`. +template +inline dsn::error_s process_get_metrics_result(const dsn::http_result &result, + const node_desc &node, + const char *what, + Args &&...args) +{ + if (dsn_unlikely(!result.error())) { + return FMT_ERR(result.error().code(), + "ERROR: query {} metrics from node {} failed, msg={}", + fmt::format(what, std::forward(args)...), + node.hp, + result.error()); + } + + if (dsn_unlikely(result.status() != dsn::http_status_code::kOk)) { + return FMT_ERR(dsn::ERR_HTTP_ERROR, + "ERROR: query {} metrics from node {} failed, http_status={}, msg={}", + fmt::format(what, std::forward(args)...), + node.hp, + dsn::get_http_status_message(result.status()), + result.body()); + } + + return dsn::error_s::ok(); +} + #define RETURN_SHELL_IF_GET_METRICS_FAILED(result, node, what, ...) \ do { \ - if (dsn_unlikely(!result.error())) { \ - std::cout << "ERROR: send http request to query " << fmt::format(what, ##__VA_ARGS__) \ - << " metrics from node " << node.hp << " failed: " << result.error() \ - << std::endl; \ - return true; \ - } \ - if (dsn_unlikely(result.status() != dsn::http_status_code::kOk)) { \ - std::cout << "ERROR: send http request to query " << what << " metrics from node " \ - << node.hp << " failed: " << dsn::get_http_status_message(result.status()) \ - << std::endl \ - << result.body() << std::endl; \ + const auto &res = process_get_metrics_result(result, node, what, ##__VA_ARGS__); \ + if (dsn_unlikely(!res)) { \ + fmt::println(res.description()); \ return true; \ } \ } while (0) +// Adapt the result of some parsing operations on the metrics returned by `get_metrics` into the +// structure that could be processed by `remote_command`. +template +inline dsn::error_s process_parse_metrics_result(const dsn::error_s &result, + const node_desc &node, + const char *what, + Args &&...args) +{ + if (dsn_unlikely(!result)) { + return FMT_ERR(result.code(), + "ERROR: {} metrics response from node {} failed, msg={}", + fmt::format(what, std::forward(args)...), + node.hp, + result); + } + + return dsn::error_s::ok(); +} + #define RETURN_SHELL_IF_PARSE_METRICS_FAILED(expr, node, what, ...) \ do { \ - const auto &res = (expr); \ + const auto &res = process_parse_metrics_result(expr, node, what, ##__VA_ARGS__); \ if (dsn_unlikely(!res)) { \ - std::cout << "ERROR: parse " << fmt::format(what, ##__VA_ARGS__) \ - << " metrics response from node " << node.hp << " failed: " << res \ - << std::endl; \ + fmt::println(res.description()); \ return true; \ } \ } while (0) @@ -832,12 +869,20 @@ class aggregate_stats_calcs } // Create the aggregations as needed. + DEF_CALC_CREATOR(assignments) DEF_CALC_CREATOR(sums) DEF_CALC_CREATOR(increases) DEF_CALC_CREATOR(rates) #undef DEF_CALC_CREATOR +#define CALC_ASSIGNMENT_STATS(entities) \ + do { \ + if (_assignments) { \ + RETURN_NOT_OK(_assignments->assign(entities)); \ + } \ + } while (0) + #define CALC_ACCUM_STATS(entities) \ do { \ if (_sums) { \ @@ -845,24 +890,38 @@ class aggregate_stats_calcs } \ } while (0) - // Perform the chosen accum aggregations on the fetched metrics. + // Perform the chosen aggregations (both assignment and accum) on the fetched metrics. dsn::error_s aggregate_metrics(const std::string &json_string) { DESERIALIZE_METRIC_QUERY_BRIEF_SNAPSHOT(value, json_string, query_snapshot); + return aggregate_metrics(query_snapshot); + } + + dsn::error_s aggregate_metrics(const dsn::metric_query_brief_value_snapshot &query_snapshot) + { + CALC_ASSIGNMENT_STATS(query_snapshot.entities); CALC_ACCUM_STATS(query_snapshot.entities); return dsn::error_s::ok(); } - // Perform all of the chosen aggregations (both accum and delta) on the fetched metrics. + // Perform the chosen aggregations (assignement, accum, delta and rate) on the fetched metrics. dsn::error_s aggregate_metrics(const std::string &json_string_start, const std::string &json_string_end) { DESERIALIZE_METRIC_QUERY_BRIEF_2_SAMPLES( json_string_start, json_string_end, query_snapshot_start, query_snapshot_end); - // Apply ending sample to the accum aggregations. + return aggregate_metrics(query_snapshot_start, query_snapshot_end); + } + + dsn::error_s + aggregate_metrics(const dsn::metric_query_brief_value_snapshot &query_snapshot_start, + const dsn::metric_query_brief_value_snapshot &query_snapshot_end) + { + // Apply ending sample to the assignment and accum aggregations. + CALC_ASSIGNMENT_STATS(query_snapshot_end.entities); CALC_ACCUM_STATS(query_snapshot_end.entities); const std::array deltas_list = {&_increases, &_rates}; @@ -884,9 +943,12 @@ class aggregate_stats_calcs #undef CALC_ACCUM_STATS +#undef CALC_ASSIGNMENT_STATS + private: DISALLOW_COPY_AND_ASSIGN(aggregate_stats_calcs); + std::unique_ptr _assignments; std::unique_ptr _sums; std::unique_ptr _increases; std::unique_ptr _rates; @@ -1940,7 +2002,7 @@ get_table_stats(shell_context *sc, uint32_t sample_interval_ms, std::vectoraggregate_metrics(results_start[i].body(), results_end[i].body()), nodes[i], - "row data requests"); + "aggregate row data requests"); } return true; @@ -1990,7 +2052,7 @@ inline bool get_partition_stats(shell_context *sc, RETURN_SHELL_IF_PARSE_METRICS_FAILED( calcs->aggregate_metrics(results_start[i].body(), results_end[i].body()), nodes[i], - "row data requests for table(id={})", + "aggregate row data requests for table(id={})", table_id); } diff --git a/src/shell/commands/data_operations.cpp b/src/shell/commands/data_operations.cpp index 150f33bede..2c36739c27 100644 --- a/src/shell/commands/data_operations.cpp +++ b/src/shell/commands/data_operations.cpp @@ -62,7 +62,6 @@ #include "utils/blob.h" #include "utils/defer.h" #include "utils/error_code.h" -#include "utils/errors.h" #include "utils/flags.h" #include "utils/fmt_logging.h" #include "utils/metrics.h" @@ -2294,7 +2293,7 @@ bool get_rdb_estimated_keys_stats(shell_context *sc, create_rdb_estimated_keys_stats_calcs(table_id, pcs, nodes[i].hp, "replica", rows); RETURN_SHELL_IF_PARSE_METRICS_FAILED(calcs->aggregate_metrics(results[i].body()), nodes[i], - "rdb_estimated_keys for table(id={})", + "aggregate rdb_estimated_keys for table(id={})", table_id); } diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index 32cdc091b3..57b649f881 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -40,6 +40,7 @@ #include #include "client/replication_ddl_client.h" +#include "common/json_helper.h" #include "common/replication_enums.h" #include "dsn.layer2_types.h" #include "meta_admin_types.h" @@ -49,6 +50,7 @@ #include "shell/command_helper.h" #include "shell/command_utils.h" #include "shell/commands.h" +#include "utils/blob.h" #include "utils/error_code.h" #include "utils/errors.h" #include "utils/flags.h" @@ -232,19 +234,173 @@ dsn::metric_filters rw_requests_filters() return filters; } +dsn::metric_filters server_stat_filters() +{ + dsn::metric_filters filters; + filters.with_metric_fields = {dsn::kMetricNameField, dsn::kMetricSingleValueField}; + filters.entity_types = {"server"}; + filters.entity_metrics = {"virtual_mem_usage_mb", "resident_mem_usage_mb"}; + return filters; +} + +struct meta_server_stats +{ + meta_server_stats() = default; + + double virt_mem_mb{0.0}; + double res_mem_mb{0.0}; + + DEFINE_JSON_SERIALIZATION(virt_mem_mb, res_mem_mb) +}; + +std::pair +aggregate_meta_server_stats(const node_desc &node, + const dsn::metric_query_brief_value_snapshot &query_snapshot) +{ + aggregate_stats_calcs calcs; + meta_server_stats stats; + calcs.create_assignments( + "server", + stat_var_map({{"virtual_mem_usage_mb", &stats.virt_mem_mb}, + {"resident_mem_usage_mb", &stats.res_mem_mb}})); + + auto command_result = process_parse_metrics_result( + calcs.aggregate_metrics(query_snapshot), node, "aggregate meta server stats"); + if (!command_result) { + // Metrics failed to be aggregated. + return std::make_pair(false, command_result.description()); + } + + return std::make_pair(true, + dsn::json::json_forwarder::encode(stats).to_string()); +} + +struct replica_server_stats +{ + replica_server_stats() = default; + + double virt_mem_mb{0.0}; + double res_mem_mb{0.0}; + + DEFINE_JSON_SERIALIZATION(virt_mem_mb, res_mem_mb) +}; + +std::pair +aggregate_replica_server_stats(const node_desc &node, + const dsn::metric_query_brief_value_snapshot &query_snapshot_start, + const dsn::metric_query_brief_value_snapshot &query_snapshot_end) +{ + aggregate_stats_calcs calcs; + meta_server_stats stats; + calcs.create_assignments( + "server", + stat_var_map({{"virtual_mem_usage_mb", &stats.virt_mem_mb}, + {"resident_mem_usage_mb", &stats.res_mem_mb}})); + + auto command_result = process_parse_metrics_result( + calcs.aggregate_metrics(query_snapshot_start, query_snapshot_end), + node, + "aggregate replica server stats"); + if (!command_result) { + // Metrics failed to be aggregated. + return std::make_pair(false, command_result.description()); + } + + return std::make_pair(true, + dsn::json::json_forwarder::encode(stats).to_string()); +} + +std::vector> get_server_stats(const std::vector &nodes, + uint32_t sample_interval_ms) +{ + // Ask target node (meta or replica server) for the metrics of server stats. + const auto &query_string = server_stat_filters().to_query_string(); + const auto &results_start = get_metrics(nodes, query_string); + std::this_thread::sleep_for(std::chrono::milliseconds(sample_interval_ms)); + const auto &results_end = get_metrics(nodes, query_string); + + std::vector> command_results; + command_results.reserve(nodes.size()); + for (size_t i = 0; i < nodes.size(); ++i) { + +#define SKIP_IF_PROCESS_RESULT_FALSE() \ + if (!command_result) { \ + command_results.emplace_back(command_result, command_result.description()); \ + continue; \ + } + +#define PROCESS_GET_METRICS_RESULT(result, what, ...) \ + { \ + auto command_result = process_get_metrics_result(result, nodes[i], what, ##__VA_ARGS__); \ + SKIP_IF_PROCESS_RESULT_FALSE() \ + } + + // Skip the metrics that failed to be fetched. + PROCESS_GET_METRICS_RESULT(results_start[i], "starting server stats") + PROCESS_GET_METRICS_RESULT(results_end[i], "ending server stats") + +#undef PROCESS_GET_METRICS_RESULT + + dsn::metric_query_brief_value_snapshot query_snapshot_start; + dsn::metric_query_brief_value_snapshot query_snapshot_end; + { + // Skip the metrics that failed to be deserialized. + auto command_result = process_parse_metrics_result( + deserialize_metric_query_2_samples(results_start[i].body(), + results_end[i].body(), + query_snapshot_start, + query_snapshot_end), + nodes[i], + "deserialize server stats"); + SKIP_IF_PROCESS_RESULT_FALSE() + } + +#undef SKIP_IF_PROCESS_RESULT_FALSE + + if (query_snapshot_end.role == "meta") { + command_results.push_back(aggregate_meta_server_stats(nodes[i], query_snapshot_end)); + continue; + } + + if (query_snapshot_end.role == "replica") { + command_results.push_back( + aggregate_replica_server_stats(nodes[i], query_snapshot_start, query_snapshot_end)); + continue; + } + + command_results.emplace_back( + false, fmt::format("role {} is unsupported", query_snapshot_end.role)); + } + + return command_results; +} + +std::vector> call_nodes(shell_context *sc, + const std::vector &nodes, + const std::string &command, + const std::vector &arguments, + uint32_t sample_interval_ms) +{ + if (command == "server_stat") { + return get_server_stats(nodes, sample_interval_ms); + } + + return call_remote_command(sc, nodes, command, arguments); +} + } // anonymous namespace -bool ls_nodes(command_executor *e, shell_context *sc, arguments args) +bool ls_nodes(command_executor *, shell_context *sc, arguments args) { - static struct option long_options[] = {{"detailed", no_argument, 0, 'd'}, - {"resolve_ip", no_argument, 0, 'r'}, - {"resource_usage", no_argument, 0, 'u'}, - {"qps", no_argument, 0, 'q'}, - {"json", no_argument, 0, 'j'}, - {"status", required_argument, 0, 's'}, - {"output", required_argument, 0, 'o'}, - {"sample_interval_ms", required_argument, 0, 't'}, - {0, 0, 0, 0}}; + static struct option long_options[] = {{"detailed", no_argument, nullptr, 'd'}, + {"resolve_ip", no_argument, nullptr, 'r'}, + {"resource_usage", no_argument, nullptr, 'u'}, + {"qps", no_argument, nullptr, 'q'}, + {"json", no_argument, nullptr, 'j'}, + {"status", required_argument, nullptr, 's'}, + {"output", required_argument, nullptr, 'o'}, + {"sample_interval_ms", required_argument, nullptr, 'i'}, + {nullptr, 0, nullptr, 0}}; std::string status; std::string output_file; @@ -259,7 +415,9 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) optind = 0; while (true) { int option_index = 0; - int c = getopt_long(args.argc, args.argv, "druqjs:o:t:", long_options, &option_index); + // TODO(wangdan): getopt_long() is not thread-safe (clang-tidy[concurrency-mt-unsafe]), + // could use https://github.com/p-ranav/argparse instead. + int c = getopt_long(args.argc, args.argv, "druqjs:o:i:", long_options, &option_index); if (c == -1) { // -1 means all command-line options have been parsed. break; @@ -288,7 +446,7 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) case 'o': output_file = optarg; break; - case 't': + case 'i': RETURN_FALSE_IF_SAMPLE_INTERVAL_MS_INVALID(); break; default: @@ -388,7 +546,7 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) auto &stat = tmp_it->second; RETURN_SHELL_IF_PARSE_METRICS_FAILED( - parse_resource_usage(results[i].body(), stat), nodes[i], "resource"); + parse_resource_usage(results[i].body(), stat), nodes[i], "parse resource usage"); } } @@ -430,7 +588,7 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) RETURN_SHELL_IF_PARSE_METRICS_FAILED( calcs.aggregate_metrics(results_start[i].body(), results_end[i].body()), nodes[i], - "rw requests"); + "aggregate rw requests"); } } @@ -452,8 +610,9 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) RETURN_SHELL_IF_GET_METRICS_FAILED(results[i], nodes[i], "profiler latency"); auto &stat = tmp_it->second; - RETURN_SHELL_IF_PARSE_METRICS_FAILED( - parse_profiler_latency(results[i].body(), stat), nodes[i], "profiler latency"); + RETURN_SHELL_IF_PARSE_METRICS_FAILED(parse_profiler_latency(results[i].body(), stat), + nodes[i], + "parse profiler latency"); } } @@ -568,6 +727,7 @@ bool remote_command(command_executor *e, shell_context *sc, arguments args) // [-t all|meta-server|replica-server] // [-r|--resolve_ip] // [-l host:port,host:port...] + // [-i|--sample_interval_ms num] argh::parser cmd(args.argc, args.argv, argh::parser::PREFER_PARAM_FOR_UNREG_OPTION); std::string command; @@ -593,9 +753,8 @@ bool remote_command(command_executor *e, shell_context *sc, arguments args) } // Initialize the command. - const std::map kCmdsMapping({{"server_info", "server-info"}, - {"server_stat", "server-stat"}, - {"flush_log", "flush-log"}}); + const std::map kCmdsMapping( + {{"server_info", "server-info"}, {"flush_log", "flush-log"}}); const auto &it = kCmdsMapping.find(pos_arg.str()); if (it != kCmdsMapping.end()) { // Use the mapped command. @@ -652,10 +811,16 @@ bool remote_command(command_executor *e, shell_context *sc, arguments args) nlohmann::json info; info["command"] = fmt::format("{} {}", command, fmt::join(pos_args, " ")); - const auto results = call_remote_command(sc, nodes, command, pos_args); + + uint32_t sample_interval_ms = 0; + PARSE_OPT_UINT( + sample_interval_ms, FLAGS_nodes_sample_interval_ms, {"-i", "--sample_interval_ms"}); + + const auto &results = call_nodes(sc, nodes, command, pos_args, sample_interval_ms); + CHECK_EQ(results.size(), nodes.size()); + int succeed = 0; int failed = 0; - CHECK_EQ(results.size(), nodes.size()); for (int i = 0; i < nodes.size(); ++i) { nlohmann::json node_info; node_info["role"] = nodes[i].desc; diff --git a/src/shell/commands/table_management.cpp b/src/shell/commands/table_management.cpp index b75e27f56a..f93939ff68 100644 --- a/src/shell/commands/table_management.cpp +++ b/src/shell/commands/table_management.cpp @@ -312,7 +312,7 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) RETURN_SHELL_IF_PARSE_METRICS_FAILED( parse_sst_stat(results[i].body(), count_map[nodes[i].hp], disk_map[nodes[i].hp]), nodes[i], - "sst"); + "parse sst stats"); } ::dsn::utils::table_printer tp_general("result"); @@ -452,15 +452,15 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) return true; } -bool app_stat(command_executor *e, shell_context *sc, arguments args) +bool app_stat(command_executor *, shell_context *sc, arguments args) { - static struct option long_options[] = {{"app_name", required_argument, 0, 'a'}, - {"only_qps", no_argument, 0, 'q'}, - {"only_usage", no_argument, 0, 'u'}, - {"json", no_argument, 0, 'j'}, - {"output", required_argument, 0, 'o'}, - {"sample_interval_ms", required_argument, 0, 't'}, - {0, 0, 0, 0}}; + static struct option long_options[] = {{"app_name", required_argument, nullptr, 'a'}, + {"only_qps", no_argument, nullptr, 'q'}, + {"only_usage", no_argument, nullptr, 'u'}, + {"json", no_argument, nullptr, 'j'}, + {"output", required_argument, nullptr, 'o'}, + {"sample_interval_ms", required_argument, nullptr, 'i'}, + {nullptr, 0, nullptr, 0}}; std::string app_name; std::string out_file; @@ -472,7 +472,7 @@ bool app_stat(command_executor *e, shell_context *sc, arguments args) optind = 0; while (true) { int option_index = 0; - int c = getopt_long(args.argc, args.argv, "a:qujo:t:", long_options, &option_index); + int c = getopt_long(args.argc, args.argv, "a:qujo:i:", long_options, &option_index); if (c == -1) { // -1 means all command-line options have been parsed. break; @@ -494,7 +494,7 @@ bool app_stat(command_executor *e, shell_context *sc, arguments args) case 'o': out_file = optarg; break; - case 't': + case 'i': RETURN_FALSE_IF_SAMPLE_INTERVAL_MS_INVALID(); break; default: diff --git a/src/shell/main.cpp b/src/shell/main.cpp index a6df779fe9..41dd95d144 100644 --- a/src/shell/main.cpp +++ b/src/shell/main.cpp @@ -101,7 +101,7 @@ static command_executor commands[] = { "get the node status for this cluster", "[-d|--detailed] [-j|--json] [-r|--resolve_ip] [-u|--resource_usage] " "[-o|--output file_name] [-s|--status all|alive|unalive] [-q|--qps] " - "[-t|--sample_interval_ms num]", + "[-i|--sample_interval_ms num]", ls_nodes, }, { @@ -372,8 +372,8 @@ static command_executor commands[] = { { "remote_command", "send remote command to servers", - "[-t all|meta-server|replica-server] [-r|--resolve_ip] [-l host:port,host:port...]" - " [arguments...]", + "[-t all|meta-server|replica-server] [-r|--resolve_ip] [-l host:port,host:port...] " + "[-i|--sample_interval_ms num] [arguments...]", remote_command, }, { @@ -385,14 +385,15 @@ static command_executor commands[] = { { "server_stat", "get stat of servers", - "[-t all|meta-server|replica-server] [-l host:port,host:port...] [-r|--resolve_ip]", + "[-t all|meta-server|replica-server] [-l host:port,host:port...] [-r|--resolve_ip] " + "[-i|--sample_interval_ms num]", server_stat, }, { "app_stat", "get stat of apps", "[-a|--app_name str] [-q|--only_qps] [-u|--only_usage] [-j|--json] " - "[-o|--output file_name] [-t|--sample_interval_ms num]", + "[-o|--output file_name] [-i|--sample_interval_ms num]", app_stat, }, { diff --git a/src/utils/error_code.h b/src/utils/error_code.h index 023ec2b253..dfdc680304 100644 --- a/src/utils/error_code.h +++ b/src/utils/error_code.h @@ -185,6 +185,8 @@ DEFINE_ERR_CODE(ERR_CURL_FAILED) DEFINE_ERR_CODE(ERR_DUP_EXIST) +DEFINE_ERR_CODE(ERR_HTTP_ERROR) + } // namespace dsn USER_DEFINED_STRUCTURE_FORMATTER(::dsn::error_code); diff --git a/src/utils/metrics.h b/src/utils/metrics.h index 9d9b0671d3..b399d9313a 100644 --- a/src/utils/metrics.h +++ b/src/utils/metrics.h @@ -42,9 +42,11 @@ #include #include "common/json_helper.h" +#include "gutil/map_util.h" #include "http/http_server.h" #include "utils/alloc.h" #include "utils/autoref_ptr.h" +#include "utils/blob.h" #include "utils/casts.h" #include "utils/enum_helper.h" #include "utils/error_code.h" @@ -52,7 +54,6 @@ #include "utils/fmt_logging.h" #include "utils/long_adder.h" #include "utils/macros.h" -#include "gutil/map_util.h" #include "utils/nth_element.h" #include "utils/ports.h" #include "utils/singleton.h" @@ -1662,7 +1663,7 @@ class auto_count struct metric_brief_##field##_snapshot \ { \ std::string name; \ - double field; \ + double field = 0.0; \ \ DEFINE_JSON_SERIALIZATION(name, field) \ } @@ -1700,31 +1701,84 @@ DEF_ALL_METRIC_BRIEF_SNAPSHOTS(value); DEF_ALL_METRIC_BRIEF_SNAPSHOTS(p99); -#define DESERIALIZE_METRIC_QUERY_BRIEF_SNAPSHOT(field, json_string, query_snapshot) \ - dsn::metric_query_brief_##field##_snapshot query_snapshot; \ +// Deserialize the json string into the snapshot. +template +inline error_s deserialize_metric_snapshot(const std::string &json_string, + TMetricSnapshot &snapshot) +{ + dsn::blob bb(json_string.data(), 0, json_string.size()); + if (dsn_unlikely(!dsn::json::json_forwarder::decode(bb, snapshot))) { + return FMT_ERR(dsn::ERR_INVALID_DATA, "invalid json string: {}", json_string); + } + + return error_s::ok(); +} + +#define DESERIALIZE_METRIC_SNAPSHOT(json_string, query_snapshot) \ do { \ - dsn::blob bb(json_string.data(), 0, json_string.size()); \ - if (dsn_unlikely( \ - !dsn::json::json_forwarder::decode( \ - bb, query_snapshot))) { \ - return FMT_ERR(dsn::ERR_INVALID_DATA, "invalid json string: {}", json_string); \ + const auto &res = deserialize_metric_snapshot(json_string, query_snapshot); \ + if (dsn_unlikely(!res)) { \ + return res; \ } \ } while (0) +// Deserialize the json string into the snapshot specially for metric query which is declared +// internally. +#define DESERIALIZE_METRIC_QUERY_BRIEF_SNAPSHOT(field, json_string, query_snapshot) \ + dsn::metric_query_brief_##field##_snapshot query_snapshot; \ + DESERIALIZE_METRIC_SNAPSHOT(json_string, query_snapshot) + +// Deserialize both json string samples into respective snapshots. +template +inline error_s deserialize_metric_2_samples(const std::string &json_string_start, + const std::string &json_string_end, + TMetricSnapshot &snapshot_start, + TMetricSnapshot &snapshot_end) +{ + DESERIALIZE_METRIC_SNAPSHOT(json_string_start, snapshot_start); + DESERIALIZE_METRIC_SNAPSHOT(json_string_end, snapshot_end); + return error_s::ok(); +} + +// Deserialize both json string samples into respective snapshots specially for metric queries. +template +inline error_s deserialize_metric_query_2_samples(const std::string &json_string_start, + const std::string &json_string_end, + TMetricQuerySnapshot &snapshot_start, + TMetricQuerySnapshot &snapshot_end) +{ + const auto &res = deserialize_metric_2_samples( + json_string_start, json_string_end, snapshot_start, snapshot_end); + if (!res) { + return res; + } + + if (snapshot_end.timestamp_ns <= snapshot_start.timestamp_ns) { + return FMT_ERR(dsn::ERR_INVALID_DATA, + "duration for metric samples should be > 0: timestamp_ns_start={}, " + "timestamp_ns_end={}", + snapshot_start.timestamp_ns, + snapshot_end.timestamp_ns); + } + + return error_s::ok(); +} + +// Deserialize both json string samples into respective snapshots specially for metric queries +// which are declared internally. +// // Currently only Gauge and Counter are considered to have "increase" and "rate", which means // samples are needed. Thus brief `value` field is enough. #define DESERIALIZE_METRIC_QUERY_BRIEF_2_SAMPLES( \ json_string_start, json_string_end, query_snapshot_start, query_snapshot_end) \ - DESERIALIZE_METRIC_QUERY_BRIEF_SNAPSHOT(value, json_string_start, query_snapshot_start); \ - DESERIALIZE_METRIC_QUERY_BRIEF_SNAPSHOT(value, json_string_end, query_snapshot_end); \ + dsn::metric_query_brief_value_snapshot query_snapshot_start; \ + dsn::metric_query_brief_value_snapshot query_snapshot_end; \ \ do { \ - if (query_snapshot_end.timestamp_ns <= query_snapshot_start.timestamp_ns) { \ - return FMT_ERR(dsn::ERR_INVALID_DATA, \ - "duration for metric samples should be > 0: timestamp_ns_start={}, " \ - "timestamp_ns_end={}", \ - query_snapshot_start.timestamp_ns, \ - query_snapshot_end.timestamp_ns); \ + const auto &res = deserialize_metric_query_2_samples( \ + json_string_start, json_string_end, query_snapshot_start, query_snapshot_end); \ + if (dsn_unlikely(!res)) { \ + return res; \ } \ } while (0) @@ -1756,7 +1810,7 @@ inline error_s parse_metric_attribute(const metric_entity::attr_map &attrs, return FMT_ERR(dsn::ERR_INVALID_DATA, "invalid {}: {}", name, *value_ptr); } - return dsn::error_s::ok(); + return error_s::ok(); } inline error_s parse_metric_table_id(const metric_entity::attr_map &attrs, int32_t &table_id)