Skip to content

Commit

Permalink
feat(new_metrics): migrate server-level metrics for nfs (#1421)
Browse files Browse the repository at this point in the history
#1329

Migrate server-level metrics to new framework for both server/client sides
of nfs, including the data size in bytes that are requested by client or read
from local file in server, the number of nfs copy requests that fail for client
or server, the data size in bytes that are written to local file in client, the
number of failed writes to local file in client.

The old type in perf counters of all these metrics are volatile counter, while
all of them become accumulated counter for new metrics.
  • Loading branch information
empiredan committed Dec 11, 2023
1 parent 2d362f1 commit 14c8a10
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 49 deletions.
56 changes: 31 additions & 25 deletions src/nfs/nfs_client_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,36 @@

#include "nfs/nfs_code_definition.h"
#include "nfs/nfs_node.h"
#include "perf_counter/perf_counter.h"
#include "utils/blob.h"
#include "utils/command_manager.h"
#include "utils/filesystem.h"
#include "utils/flags.h"
#include "utils/fmt_logging.h"
#include "utils/string_conv.h"
#include "utils/string_view.h"
#include "utils/token_buckets.h"

METRIC_DEFINE_counter(server,
nfs_client_copy_bytes,
dsn::metric_unit::kBytes,
"The accumulated data size in bytes requested by client during nfs copy");

METRIC_DEFINE_counter(server,
nfs_client_failed_copy_requests,
dsn::metric_unit::kRequests,
"The number of failed nfs copy requests (requested by client)");

METRIC_DEFINE_counter(
server,
nfs_client_write_bytes,
dsn::metric_unit::kBytes,
"The accumulated data size in bytes that are written to local file in client");

METRIC_DEFINE_counter(server,
nfs_client_failed_writes,
dsn::metric_unit::kWrites,
"The number of failed writes to local file in client");

namespace dsn {
namespace service {
static uint32_t current_max_copy_rate_megabytes = 0;
Expand Down Expand Up @@ -96,27 +117,12 @@ nfs_client_impl::nfs_client_impl()
_concurrent_local_write_count(0),
_buffered_local_write_count(0),
_copy_requests_low(FLAGS_max_file_copy_request_count_per_file),
_high_priority_remaining_time(FLAGS_high_priority_speed_rate)
_high_priority_remaining_time(FLAGS_high_priority_speed_rate),
METRIC_VAR_INIT_server(nfs_client_copy_bytes),
METRIC_VAR_INIT_server(nfs_client_failed_copy_requests),
METRIC_VAR_INIT_server(nfs_client_write_bytes),
METRIC_VAR_INIT_server(nfs_client_failed_writes)
{
_recent_copy_data_size.init_app_counter("eon.nfs_client",
"recent_copy_data_size",
COUNTER_TYPE_VOLATILE_NUMBER,
"nfs client copy data size in the recent period");
_recent_copy_fail_count.init_app_counter(
"eon.nfs_client",
"recent_copy_fail_count",
COUNTER_TYPE_VOLATILE_NUMBER,
"nfs client copy fail count count in the recent period");
_recent_write_data_size.init_app_counter("eon.nfs_client",
"recent_write_data_size",
COUNTER_TYPE_VOLATILE_NUMBER,
"nfs client write data size in the recent period");
_recent_write_fail_count.init_app_counter(
"eon.nfs_client",
"recent_write_fail_count",
COUNTER_TYPE_VOLATILE_NUMBER,
"nfs client write fail count count in the recent period");

_copy_token_buckets = std::make_unique<utils::token_buckets>();

register_cli_commands();
Expand Down Expand Up @@ -337,7 +343,7 @@ void nfs_client_impl::end_copy(::dsn::error_code err,
}

if (err != ::dsn::ERR_OK) {
_recent_copy_fail_count->increment();
METRIC_VAR_INCREMENT(nfs_client_failed_copy_requests);

if (!fc->user_req->is_finished) {
if (reqc->retry_count > 0) {
Expand Down Expand Up @@ -373,7 +379,7 @@ void nfs_client_impl::end_copy(::dsn::error_code err,
}

else {
_recent_copy_data_size->add(resp.size);
METRIC_VAR_INCREMENT_BY(nfs_client_copy_bytes, resp.size);

reqc->response = resp;
reqc->is_ready_for_write = true;
Expand Down Expand Up @@ -507,15 +513,15 @@ void nfs_client_impl::end_write(error_code err, size_t sz, const copy_request_ex

bool completed = false;
if (err != ERR_OK) {
_recent_write_fail_count->increment();
METRIC_VAR_INCREMENT(nfs_client_failed_writes);

LOG_ERROR("[nfs_service] local write failed, dir = {}, file = {}, err = {}",
fc->user_req->file_size_req.dst_dir,
fc->file_name,
err);
completed = true;
} else {
_recent_write_data_size->add(sz);
METRIC_VAR_INCREMENT_BY(nfs_client_write_bytes, sz);

file_wrapper_ptr temp_holder;
zauto_lock l(fc->user_req->user_req_lock);
Expand Down
10 changes: 5 additions & 5 deletions src/nfs/nfs_client_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
#include "aio/file_io.h"
#include "nfs_code_definition.h"
#include "nfs_types.h"
#include "perf_counter/perf_counter_wrapper.h"
#include "runtime/rpc/rpc_address.h"
#include "runtime/task/async_calls.h"
#include "runtime/task/task.h"
Expand All @@ -50,6 +49,7 @@
#include "utils/autoref_ptr.h"
#include "utils/error_code.h"
#include "utils/fmt_logging.h"
#include "utils/metrics.h"
#include "utils/zlocks.h"

namespace dsn {
Expand Down Expand Up @@ -311,10 +311,10 @@ class nfs_client_impl
zlock _local_writes_lock;
std::deque<copy_request_ex_ptr> _local_writes;

perf_counter_wrapper _recent_copy_data_size;
perf_counter_wrapper _recent_copy_fail_count;
perf_counter_wrapper _recent_write_data_size;
perf_counter_wrapper _recent_write_fail_count;
METRIC_VAR_DECLARE_counter(nfs_client_copy_bytes);
METRIC_VAR_DECLARE_counter(nfs_client_failed_copy_requests);
METRIC_VAR_DECLARE_counter(nfs_client_write_bytes);
METRIC_VAR_DECLARE_counter(nfs_client_failed_writes);

std::unique_ptr<command_deregister> _nfs_max_copy_rate_megabytes_cmd;

Expand Down
34 changes: 20 additions & 14 deletions src/nfs/nfs_server_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,29 @@
#include <vector>

#include "nfs/nfs_code_definition.h"
#include "perf_counter/perf_counter.h"
#include "runtime/api_layer1.h"
#include "runtime/task/async_calls.h"
#include "utils/TokenBucket.h"
#include "utils/env.h"
#include "utils/autoref_ptr.h"
#include "utils/filesystem.h"
#include "utils/flags.h"
#include "utils/string_conv.h"
#include "utils/string_view.h"
#include "utils/utils.h"

METRIC_DEFINE_counter(
server,
nfs_server_copy_bytes,
dsn::metric_unit::kBytes,
"The accumulated data size in bytes that are read from local file in server during nfs copy");

METRIC_DEFINE_counter(
server,
nfs_server_failed_copy_requests,
dsn::metric_unit::kRequests,
"The number of nfs copy requests (received by server) that fail to read local file in server");

namespace dsn {
class disk_file;

Expand All @@ -59,24 +72,17 @@ DSN_TAG_VARIABLE(max_send_rate_megabytes_per_disk, FT_MUTABLE);
DSN_DECLARE_int32(file_close_timer_interval_ms_on_server);
DSN_DECLARE_int32(file_close_expire_time_ms);

nfs_service_impl::nfs_service_impl() : ::dsn::serverlet<nfs_service_impl>("nfs")
nfs_service_impl::nfs_service_impl()
: ::dsn::serverlet<nfs_service_impl>("nfs"),
METRIC_VAR_INIT_server(nfs_server_copy_bytes),
METRIC_VAR_INIT_server(nfs_server_failed_copy_requests)
{
_file_close_timer = ::dsn::tasking::enqueue_timer(
LPC_NFS_FILE_CLOSE_TIMER,
&_tracker,
[this] { close_file(); },
std::chrono::milliseconds(FLAGS_file_close_timer_interval_ms_on_server));

_recent_copy_data_size.init_app_counter("eon.nfs_server",
"recent_copy_data_size",
COUNTER_TYPE_VOLATILE_NUMBER,
"nfs server copy data size in the recent period");
_recent_copy_fail_count.init_app_counter(
"eon.nfs_server",
"recent_copy_fail_count",
COUNTER_TYPE_VOLATILE_NUMBER,
"nfs server copy fail count count in the recent period");

_send_token_buckets = std::make_unique<dsn::utils::token_buckets>();
register_cli_commands();
}
Expand Down Expand Up @@ -156,9 +162,9 @@ void nfs_service_impl::internal_read_callback(error_code err, size_t sz, callbac

if (err != ERR_OK) {
LOG_ERROR("[nfs_service] read file {} failed, err = {}", cp.file_path, err);
_recent_copy_fail_count->increment();
METRIC_VAR_INCREMENT(nfs_server_failed_copy_requests);
} else {
_recent_copy_data_size->add(sz);
METRIC_VAR_INCREMENT_BY(nfs_server_copy_bytes, sz);
}

::dsn::service::copy_response resp;
Expand Down
6 changes: 3 additions & 3 deletions src/nfs/nfs_server_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@
#include "aio/file_io.h"
#include "nfs_code_definition.h"
#include "nfs_types.h"
#include "perf_counter/perf_counter_wrapper.h"
#include "runtime/serverlet.h"
#include "runtime/task/task.h"
#include "runtime/task/task_tracker.h"
#include "utils/blob.h"
#include "utils/command_manager.h"
#include "utils/error_code.h"
#include "utils/fmt_logging.h"
#include "utils/metrics.h"
#include "utils/token_buckets.h"
#include "utils/zlocks.h"

Expand Down Expand Up @@ -131,8 +131,8 @@ class nfs_service_impl : public ::dsn::serverlet<nfs_service_impl>
std::unique_ptr<dsn::utils::token_buckets>
_send_token_buckets; // rate limiter of send to remote

perf_counter_wrapper _recent_copy_data_size;
perf_counter_wrapper _recent_copy_fail_count;
METRIC_VAR_DECLARE_counter(nfs_server_copy_bytes);
METRIC_VAR_DECLARE_counter(nfs_server_failed_copy_requests);

std::unique_ptr<command_deregister> _nfs_max_send_rate_megabytes_cmd;

Expand Down
2 changes: 1 addition & 1 deletion src/nfs/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ set(MY_PROJ_SRC "")
# "GLOB" for non-recursive search
set(MY_SRC_SEARCH_MODE "GLOB")

set(MY_PROJ_LIBS dsn_nfs dsn_runtime gtest dsn_aio rocksdb test_utils)
set(MY_PROJ_LIBS dsn_nfs dsn_runtime gtest dsn_aio dsn_http rocksdb test_utils)

set(MY_BOOST_LIBS Boost::system Boost::filesystem)

Expand Down
1 change: 0 additions & 1 deletion src/server/pegasus_server_write.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

#include <fmt/core.h>
#include <rocksdb/status.h>
#include <stdio.h>
#include <thrift/transport/TTransportException.h>
#include <algorithm>
#include <utility>
Expand Down

0 comments on commit 14c8a10

Please sign in to comment.