Skip to content

Commit

Permalink
feat(new_metrics): remove all table-level perf-counters for each repl…
Browse files Browse the repository at this point in the history
…ica (#1531)

#1327

Remove all table-level metrics that are measured by perf-counters. Later,
table-level metrics would be aggregated by Go Collector, if necessary.
  • Loading branch information
empiredan committed Dec 11, 2023
1 parent 71e5162 commit 8e3b1f0
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 63 deletions.
70 changes: 12 additions & 58 deletions src/replica/replica.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,13 @@
#include "replica.h"

#include <fmt/core.h>
#include <fmt/ostream.h>
#include <inttypes.h>
#include <rocksdb/status.h>
#include <functional>
#include <iosfwd>
#include <set>
#include <vector>

#include "backup/replica_backup_manager.h"
#include "bulk_load/replica_bulk_loader.h"
Expand All @@ -43,8 +47,6 @@
#include "duplication/replica_follower.h"
#include "mutation.h"
#include "mutation_log.h"
#include "perf_counter/perf_counter.h"
#include "perf_counter/perf_counters.h"
#include "replica/prepare_list.h"
#include "replica/replica_context.h"
#include "replica/replication_app_base.h"
Expand All @@ -53,8 +55,6 @@
#include "replica_stub.h"
#include "runtime/rpc/rpc_message.h"
#include "runtime/security/access_controller.h"
#include "runtime/task/task_code.h"
#include "runtime/task/task_spec.h"
#include "split/replica_split_manager.h"
#include "utils/filesystem.h"
#include "utils/fmt_logging.h"
Expand Down Expand Up @@ -338,9 +338,6 @@ replica::replica(replica_stub *stub,
_disk_migrator = std::make_unique<replica_disk_migrator>(this);
_replica_follower = std::make_unique<replica_follower>(this);

// init table level latency perf counters
init_table_level_latency_counters();

if (need_restore) {
// add an extra env for restore
_extra_envs.insert(
Expand Down Expand Up @@ -442,7 +439,6 @@ void replica::on_client_read(dsn::message_ex *request, bool ignore_throttling)
METRIC_VAR_INCREMENT(backup_requests);
}

uint64_t start_time_ns = dsn_now_ns();
CHECK(_app, "");
auto storage_error = _app->on_request(request);
if (dsn_unlikely(storage_error != ERR_OK)) {
Expand All @@ -460,12 +456,6 @@ void replica::on_client_read(dsn::message_ex *request, bool ignore_throttling)
}
return;
}

// If the corresponding perf counter exist, count the duration of this operation.
// rpc code of request is already checked in message_ex::rpc_code, so it will always be legal
if (_counters_table_level_latency[request->rpc_code()] != nullptr) {
_counters_table_level_latency[request->rpc_code()]->set(dsn_now_ns() - start_time_ns);
}
}

void replica::response_client_read(dsn::message_ex *request, error_code error)
Expand Down Expand Up @@ -563,26 +553,16 @@ void replica::execute_mutation(mutation_ptr &mu)
handle_local_failure(err);
}

if (status() == partition_status::PS_PRIMARY) {
ADD_CUSTOM_POINT(mu->_tracer, "completed");
mutation_ptr next = _primary_states.write_queue.check_possible_work(
static_cast<int>(_prepare_list->max_decree() - d));

if (next) {
init_prepare(next, false);
}
if (status() != partition_status::PS_PRIMARY) {
return;
}

// update table level latency perf-counters for primary partition
if (partition_status::PS_PRIMARY == status()) {
uint64_t now_ns = dsn_now_ns();
for (auto update : mu->data.updates) {
// If the corresponding perf counter exist, count the duration of this operation.
// code in update will always be legal
if (_counters_table_level_latency[update.code] != nullptr) {
_counters_table_level_latency[update.code]->set(now_ns - update.start_time_ns);
}
}
ADD_CUSTOM_POINT(mu->_tracer, "completed");
auto next = _primary_states.write_queue.check_possible_work(
static_cast<int>(_prepare_list->max_decree() - d));

if (next != nullptr) {
init_prepare(next, false);
}
}

Expand Down Expand Up @@ -698,32 +678,6 @@ manual_compaction_status::type replica::get_manual_compact_status() const
return _app->query_compact_status();
}

// Replicas on the server which serves for the same table will share the same perf-counter.
// For example counter `table.level.RPC_RRDB_RRDB_MULTI_PUT.latency(ns)@test_table` is shared by
// all the replicas for `test_table`.
void replica::init_table_level_latency_counters()
{
int max_task_code = task_code::max();
_counters_table_level_latency.resize(max_task_code + 1);

for (int code = 0; code <= max_task_code; code++) {
_counters_table_level_latency[code] = nullptr;
if (get_storage_rpc_req_codes().find(task_code(code)) !=
get_storage_rpc_req_codes().end()) {
std::string counter_str = fmt::format(
"table.level.{}.latency(ns)@{}", task_code(code).to_string(), _app_info.app_name);
_counters_table_level_latency[code] =
dsn::perf_counters::instance()
.get_app_counter("eon.replica",
counter_str.c_str(),
COUNTER_TYPE_NUMBER_PERCENTILES,
counter_str.c_str(),
true)
.get();
}
}
}

void replica::on_detect_hotkey(const detect_hotkey_request &req, detect_hotkey_response &resp)
{
_app->on_detect_hotkey(req, resp);
Expand Down
5 changes: 0 additions & 5 deletions src/replica/replica.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "common/replication_other_types.h"
#include "dsn.layer2_types.h"
Expand Down Expand Up @@ -69,7 +68,6 @@ class rocksdb_wrapper_test;

namespace dsn {
class gpid;
class perf_counter;
class rpc_address;

namespace dist {
Expand Down Expand Up @@ -478,8 +476,6 @@ class replica : public serverlet<replica>, public ref_counter, public replica_ba

manual_compaction_status::type get_manual_compact_status() const;

void init_table_level_latency_counters();

void on_detect_hotkey(const detect_hotkey_request &req, /*out*/ detect_hotkey_response &resp);

uint32_t query_data_version() const;
Expand Down Expand Up @@ -659,7 +655,6 @@ class replica : public serverlet<replica>, public ref_counter, public replica_ba
METRIC_VAR_DECLARE_counter(splitting_rejected_read_requests);
METRIC_VAR_DECLARE_counter(bulk_load_ingestion_rejected_write_requests);
METRIC_VAR_DECLARE_counter(dup_rejected_non_idempotent_write_requests);
std::vector<perf_counter *> _counters_table_level_latency;

METRIC_VAR_DECLARE_counter(learn_count);
METRIC_VAR_DECLARE_counter(learn_rounds);
Expand Down

0 comments on commit 8e3b1f0

Please sign in to comment.