feat(new_metrics): remove all table-level perf-counters for each repl…

…ica (#1531) #1327 Remove all table-level metrics that are measured by perf-counters. Later, table-level metrics would be aggregated by Go Collector, if necessary.
apache · Dec 11, 2023 · 8e3b1f0 · 8e3b1f0
1 parent 71e5162
commit 8e3b1f0
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 63 deletions.
diff --git a/src/replica/replica.cpp b/src/replica/replica.cpp
@@ -27,9 +27,13 @@
 #include "replica.h"
 
 #include <fmt/core.h>
+#include <fmt/ostream.h>
+#include <inttypes.h>
 #include <rocksdb/status.h>
 #include <functional>
+#include <iosfwd>
 #include <set>
+#include <vector>
 
 #include "backup/replica_backup_manager.h"
 #include "bulk_load/replica_bulk_loader.h"
@@ -43,8 +47,6 @@
 #include "duplication/replica_follower.h"
 #include "mutation.h"
 #include "mutation_log.h"
-#include "perf_counter/perf_counter.h"
-#include "perf_counter/perf_counters.h"
 #include "replica/prepare_list.h"
 #include "replica/replica_context.h"
 #include "replica/replication_app_base.h"
@@ -53,8 +55,6 @@
 #include "replica_stub.h"
 #include "runtime/rpc/rpc_message.h"
 #include "runtime/security/access_controller.h"
-#include "runtime/task/task_code.h"
-#include "runtime/task/task_spec.h"
 #include "split/replica_split_manager.h"
 #include "utils/filesystem.h"
 #include "utils/fmt_logging.h"
@@ -338,9 +338,6 @@ replica::replica(replica_stub *stub,
     _disk_migrator = std::make_unique<replica_disk_migrator>(this);
     _replica_follower = std::make_unique<replica_follower>(this);
 
-    // init table level latency perf counters
-    init_table_level_latency_counters();
-
     if (need_restore) {
         // add an extra env for restore
         _extra_envs.insert(
@@ -442,7 +439,6 @@ void replica::on_client_read(dsn::message_ex *request, bool ignore_throttling)
         METRIC_VAR_INCREMENT(backup_requests);
     }
 
-    uint64_t start_time_ns = dsn_now_ns();
     CHECK(_app, "");
     auto storage_error = _app->on_request(request);
     if (dsn_unlikely(storage_error != ERR_OK)) {
@@ -460,12 +456,6 @@ void replica::on_client_read(dsn::message_ex *request, bool ignore_throttling)
         }
         return;
     }
-
-    // If the corresponding perf counter exist, count the duration of this operation.
-    // rpc code of request is already checked in message_ex::rpc_code, so it will always be legal
-    if (_counters_table_level_latency[request->rpc_code()] != nullptr) {
-        _counters_table_level_latency[request->rpc_code()]->set(dsn_now_ns() - start_time_ns);
-    }
 }
 
 void replica::response_client_read(dsn::message_ex *request, error_code error)
@@ -563,26 +553,16 @@ void replica::execute_mutation(mutation_ptr &mu)
         handle_local_failure(err);
     }
 
-    if (status() == partition_status::PS_PRIMARY) {
-        ADD_CUSTOM_POINT(mu->_tracer, "completed");
-        mutation_ptr next = _primary_states.write_queue.check_possible_work(
-            static_cast<int>(_prepare_list->max_decree() - d));
-
-        if (next) {
-            init_prepare(next, false);
-        }
+    if (status() != partition_status::PS_PRIMARY) {
+        return;
     }
 
-    // update table level latency perf-counters for primary partition
-    if (partition_status::PS_PRIMARY == status()) {
-        uint64_t now_ns = dsn_now_ns();
-        for (auto update : mu->data.updates) {
-            // If the corresponding perf counter exist, count the duration of this operation.
-            // code in update will always be legal
-            if (_counters_table_level_latency[update.code] != nullptr) {
-                _counters_table_level_latency[update.code]->set(now_ns - update.start_time_ns);
-            }
-        }
+    ADD_CUSTOM_POINT(mu->_tracer, "completed");
+    auto next = _primary_states.write_queue.check_possible_work(
+        static_cast<int>(_prepare_list->max_decree() - d));
+
+    if (next != nullptr) {
+        init_prepare(next, false);
     }
 }
 
@@ -698,32 +678,6 @@ manual_compaction_status::type replica::get_manual_compact_status() const
     return _app->query_compact_status();
 }
 
-// Replicas on the server which serves for the same table will share the same perf-counter.
-// For example counter `table.level.RPC_RRDB_RRDB_MULTI_PUT.latency(ns)@test_table` is shared by
-// all the replicas for `test_table`.
-void replica::init_table_level_latency_counters()
-{
-    int max_task_code = task_code::max();
-    _counters_table_level_latency.resize(max_task_code + 1);
-
-    for (int code = 0; code <= max_task_code; code++) {
-        _counters_table_level_latency[code] = nullptr;
-        if (get_storage_rpc_req_codes().find(task_code(code)) !=
-            get_storage_rpc_req_codes().end()) {
-            std::string counter_str = fmt::format(
-                "table.level.{}.latency(ns)@{}", task_code(code).to_string(), _app_info.app_name);
-            _counters_table_level_latency[code] =
-                dsn::perf_counters::instance()
-                    .get_app_counter("eon.replica",
-                                     counter_str.c_str(),
-                                     COUNTER_TYPE_NUMBER_PERCENTILES,
-                                     counter_str.c_str(),
-                                     true)
-                    .get();
-        }
-    }
-}
-
 void replica::on_detect_hotkey(const detect_hotkey_request &req, detect_hotkey_response &resp)
 {
     _app->on_detect_hotkey(req, resp);

diff --git a/src/replica/replica.h b/src/replica/replica.h
@@ -34,7 +34,6 @@
 #include <memory>
 #include <string>
 #include <utility>
-#include <vector>
 
 #include "common/replication_other_types.h"
 #include "dsn.layer2_types.h"
@@ -69,7 +68,6 @@ class rocksdb_wrapper_test;
 
 namespace dsn {
 class gpid;
-class perf_counter;
 class rpc_address;
 
 namespace dist {
@@ -478,8 +476,6 @@ class replica : public serverlet<replica>, public ref_counter, public replica_ba
 
     manual_compaction_status::type get_manual_compact_status() const;
 
-    void init_table_level_latency_counters();
-
     void on_detect_hotkey(const detect_hotkey_request &req, /*out*/ detect_hotkey_response &resp);
 
     uint32_t query_data_version() const;
@@ -659,7 +655,6 @@ class replica : public serverlet<replica>, public ref_counter, public replica_ba
     METRIC_VAR_DECLARE_counter(splitting_rejected_read_requests);
     METRIC_VAR_DECLARE_counter(bulk_load_ingestion_rejected_write_requests);
     METRIC_VAR_DECLARE_counter(dup_rejected_non_idempotent_write_requests);
-    std::vector<perf_counter *> _counters_table_level_latency;
 
     METRIC_VAR_DECLARE_counter(learn_count);
     METRIC_VAR_DECLARE_counter(learn_rounds);