From 14c8a102d37febb76ec37796580b7feceb0ca5da Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Thu, 30 Mar 2023 15:14:55 +0800 Subject: [PATCH] feat(new_metrics): migrate server-level metrics for nfs (#1421) https://github.com/apache/incubator-pegasus/issues/1329 Migrate server-level metrics to new framework for both server/client sides of nfs, including the data size in bytes that are requested by client or read from local file in server, the number of nfs copy requests that fail for client or server, the data size in bytes that are written to local file in client, the number of failed writes to local file in client. The old type in perf counters of all these metrics are volatile counter, while all of them become accumulated counter for new metrics. --- src/nfs/nfs_client_impl.cpp | 56 ++++++++++++++++------------- src/nfs/nfs_client_impl.h | 10 +++--- src/nfs/nfs_server_impl.cpp | 34 ++++++++++-------- src/nfs/nfs_server_impl.h | 6 ++-- src/nfs/test/CMakeLists.txt | 2 +- src/server/pegasus_server_write.cpp | 1 - 6 files changed, 60 insertions(+), 49 deletions(-) diff --git a/src/nfs/nfs_client_impl.cpp b/src/nfs/nfs_client_impl.cpp index 910bae8ec0..8d1c9f4143 100644 --- a/src/nfs/nfs_client_impl.cpp +++ b/src/nfs/nfs_client_impl.cpp @@ -31,15 +31,36 @@ #include "nfs/nfs_code_definition.h" #include "nfs/nfs_node.h" -#include "perf_counter/perf_counter.h" #include "utils/blob.h" #include "utils/command_manager.h" #include "utils/filesystem.h" #include "utils/flags.h" #include "utils/fmt_logging.h" #include "utils/string_conv.h" +#include "utils/string_view.h" #include "utils/token_buckets.h" +METRIC_DEFINE_counter(server, + nfs_client_copy_bytes, + dsn::metric_unit::kBytes, + "The accumulated data size in bytes requested by client during nfs copy"); + +METRIC_DEFINE_counter(server, + nfs_client_failed_copy_requests, + dsn::metric_unit::kRequests, + "The number of failed nfs copy requests (requested by client)"); + +METRIC_DEFINE_counter( + server, + nfs_client_write_bytes, + dsn::metric_unit::kBytes, + "The accumulated data size in bytes that are written to local file in client"); + +METRIC_DEFINE_counter(server, + nfs_client_failed_writes, + dsn::metric_unit::kWrites, + "The number of failed writes to local file in client"); + namespace dsn { namespace service { static uint32_t current_max_copy_rate_megabytes = 0; @@ -96,27 +117,12 @@ nfs_client_impl::nfs_client_impl() _concurrent_local_write_count(0), _buffered_local_write_count(0), _copy_requests_low(FLAGS_max_file_copy_request_count_per_file), - _high_priority_remaining_time(FLAGS_high_priority_speed_rate) + _high_priority_remaining_time(FLAGS_high_priority_speed_rate), + METRIC_VAR_INIT_server(nfs_client_copy_bytes), + METRIC_VAR_INIT_server(nfs_client_failed_copy_requests), + METRIC_VAR_INIT_server(nfs_client_write_bytes), + METRIC_VAR_INIT_server(nfs_client_failed_writes) { - _recent_copy_data_size.init_app_counter("eon.nfs_client", - "recent_copy_data_size", - COUNTER_TYPE_VOLATILE_NUMBER, - "nfs client copy data size in the recent period"); - _recent_copy_fail_count.init_app_counter( - "eon.nfs_client", - "recent_copy_fail_count", - COUNTER_TYPE_VOLATILE_NUMBER, - "nfs client copy fail count count in the recent period"); - _recent_write_data_size.init_app_counter("eon.nfs_client", - "recent_write_data_size", - COUNTER_TYPE_VOLATILE_NUMBER, - "nfs client write data size in the recent period"); - _recent_write_fail_count.init_app_counter( - "eon.nfs_client", - "recent_write_fail_count", - COUNTER_TYPE_VOLATILE_NUMBER, - "nfs client write fail count count in the recent period"); - _copy_token_buckets = std::make_unique(); register_cli_commands(); @@ -337,7 +343,7 @@ void nfs_client_impl::end_copy(::dsn::error_code err, } if (err != ::dsn::ERR_OK) { - _recent_copy_fail_count->increment(); + METRIC_VAR_INCREMENT(nfs_client_failed_copy_requests); if (!fc->user_req->is_finished) { if (reqc->retry_count > 0) { @@ -373,7 +379,7 @@ void nfs_client_impl::end_copy(::dsn::error_code err, } else { - _recent_copy_data_size->add(resp.size); + METRIC_VAR_INCREMENT_BY(nfs_client_copy_bytes, resp.size); reqc->response = resp; reqc->is_ready_for_write = true; @@ -507,7 +513,7 @@ void nfs_client_impl::end_write(error_code err, size_t sz, const copy_request_ex bool completed = false; if (err != ERR_OK) { - _recent_write_fail_count->increment(); + METRIC_VAR_INCREMENT(nfs_client_failed_writes); LOG_ERROR("[nfs_service] local write failed, dir = {}, file = {}, err = {}", fc->user_req->file_size_req.dst_dir, @@ -515,7 +521,7 @@ void nfs_client_impl::end_write(error_code err, size_t sz, const copy_request_ex err); completed = true; } else { - _recent_write_data_size->add(sz); + METRIC_VAR_INCREMENT_BY(nfs_client_write_bytes, sz); file_wrapper_ptr temp_holder; zauto_lock l(fc->user_req->user_req_lock); diff --git a/src/nfs/nfs_client_impl.h b/src/nfs/nfs_client_impl.h index 88f70aad47..0c15fc8b31 100644 --- a/src/nfs/nfs_client_impl.h +++ b/src/nfs/nfs_client_impl.h @@ -41,7 +41,6 @@ #include "aio/file_io.h" #include "nfs_code_definition.h" #include "nfs_types.h" -#include "perf_counter/perf_counter_wrapper.h" #include "runtime/rpc/rpc_address.h" #include "runtime/task/async_calls.h" #include "runtime/task/task.h" @@ -50,6 +49,7 @@ #include "utils/autoref_ptr.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" +#include "utils/metrics.h" #include "utils/zlocks.h" namespace dsn { @@ -311,10 +311,10 @@ class nfs_client_impl zlock _local_writes_lock; std::deque _local_writes; - perf_counter_wrapper _recent_copy_data_size; - perf_counter_wrapper _recent_copy_fail_count; - perf_counter_wrapper _recent_write_data_size; - perf_counter_wrapper _recent_write_fail_count; + METRIC_VAR_DECLARE_counter(nfs_client_copy_bytes); + METRIC_VAR_DECLARE_counter(nfs_client_failed_copy_requests); + METRIC_VAR_DECLARE_counter(nfs_client_write_bytes); + METRIC_VAR_DECLARE_counter(nfs_client_failed_writes); std::unique_ptr _nfs_max_copy_rate_megabytes_cmd; diff --git a/src/nfs/nfs_server_impl.cpp b/src/nfs/nfs_server_impl.cpp index fc7f9b6421..238dbb594c 100644 --- a/src/nfs/nfs_server_impl.cpp +++ b/src/nfs/nfs_server_impl.cpp @@ -34,16 +34,29 @@ #include #include "nfs/nfs_code_definition.h" -#include "perf_counter/perf_counter.h" #include "runtime/api_layer1.h" #include "runtime/task/async_calls.h" #include "utils/TokenBucket.h" #include "utils/env.h" +#include "utils/autoref_ptr.h" #include "utils/filesystem.h" #include "utils/flags.h" #include "utils/string_conv.h" +#include "utils/string_view.h" #include "utils/utils.h" +METRIC_DEFINE_counter( + server, + nfs_server_copy_bytes, + dsn::metric_unit::kBytes, + "The accumulated data size in bytes that are read from local file in server during nfs copy"); + +METRIC_DEFINE_counter( + server, + nfs_server_failed_copy_requests, + dsn::metric_unit::kRequests, + "The number of nfs copy requests (received by server) that fail to read local file in server"); + namespace dsn { class disk_file; @@ -59,7 +72,10 @@ DSN_TAG_VARIABLE(max_send_rate_megabytes_per_disk, FT_MUTABLE); DSN_DECLARE_int32(file_close_timer_interval_ms_on_server); DSN_DECLARE_int32(file_close_expire_time_ms); -nfs_service_impl::nfs_service_impl() : ::dsn::serverlet("nfs") +nfs_service_impl::nfs_service_impl() + : ::dsn::serverlet("nfs"), + METRIC_VAR_INIT_server(nfs_server_copy_bytes), + METRIC_VAR_INIT_server(nfs_server_failed_copy_requests) { _file_close_timer = ::dsn::tasking::enqueue_timer( LPC_NFS_FILE_CLOSE_TIMER, @@ -67,16 +83,6 @@ nfs_service_impl::nfs_service_impl() : ::dsn::serverlet("nfs") [this] { close_file(); }, std::chrono::milliseconds(FLAGS_file_close_timer_interval_ms_on_server)); - _recent_copy_data_size.init_app_counter("eon.nfs_server", - "recent_copy_data_size", - COUNTER_TYPE_VOLATILE_NUMBER, - "nfs server copy data size in the recent period"); - _recent_copy_fail_count.init_app_counter( - "eon.nfs_server", - "recent_copy_fail_count", - COUNTER_TYPE_VOLATILE_NUMBER, - "nfs server copy fail count count in the recent period"); - _send_token_buckets = std::make_unique(); register_cli_commands(); } @@ -156,9 +162,9 @@ void nfs_service_impl::internal_read_callback(error_code err, size_t sz, callbac if (err != ERR_OK) { LOG_ERROR("[nfs_service] read file {} failed, err = {}", cp.file_path, err); - _recent_copy_fail_count->increment(); + METRIC_VAR_INCREMENT(nfs_server_failed_copy_requests); } else { - _recent_copy_data_size->add(sz); + METRIC_VAR_INCREMENT_BY(nfs_server_copy_bytes, sz); } ::dsn::service::copy_response resp; diff --git a/src/nfs/nfs_server_impl.h b/src/nfs/nfs_server_impl.h index ece68ecb33..f2733b1a6d 100644 --- a/src/nfs/nfs_server_impl.h +++ b/src/nfs/nfs_server_impl.h @@ -35,7 +35,6 @@ #include "aio/file_io.h" #include "nfs_code_definition.h" #include "nfs_types.h" -#include "perf_counter/perf_counter_wrapper.h" #include "runtime/serverlet.h" #include "runtime/task/task.h" #include "runtime/task/task_tracker.h" @@ -43,6 +42,7 @@ #include "utils/command_manager.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" +#include "utils/metrics.h" #include "utils/token_buckets.h" #include "utils/zlocks.h" @@ -131,8 +131,8 @@ class nfs_service_impl : public ::dsn::serverlet std::unique_ptr _send_token_buckets; // rate limiter of send to remote - perf_counter_wrapper _recent_copy_data_size; - perf_counter_wrapper _recent_copy_fail_count; + METRIC_VAR_DECLARE_counter(nfs_server_copy_bytes); + METRIC_VAR_DECLARE_counter(nfs_server_failed_copy_requests); std::unique_ptr _nfs_max_send_rate_megabytes_cmd; diff --git a/src/nfs/test/CMakeLists.txt b/src/nfs/test/CMakeLists.txt index 0160389925..ff591b7e21 100644 --- a/src/nfs/test/CMakeLists.txt +++ b/src/nfs/test/CMakeLists.txt @@ -33,7 +33,7 @@ set(MY_PROJ_SRC "") # "GLOB" for non-recursive search set(MY_SRC_SEARCH_MODE "GLOB") -set(MY_PROJ_LIBS dsn_nfs dsn_runtime gtest dsn_aio rocksdb test_utils) +set(MY_PROJ_LIBS dsn_nfs dsn_runtime gtest dsn_aio dsn_http rocksdb test_utils) set(MY_BOOST_LIBS Boost::system Boost::filesystem) diff --git a/src/server/pegasus_server_write.cpp b/src/server/pegasus_server_write.cpp index d0a30b76cf..0b938b64a4 100644 --- a/src/server/pegasus_server_write.cpp +++ b/src/server/pegasus_server_write.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include