From 813164dcacfbc6c0182e7be825fc14a6121887d1 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Thu, 7 Nov 2019 14:26:35 +0800 Subject: [PATCH 01/18] get_app_partition_stat --- src/shell/command_helper.h | 61 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 278566829b..1f7ace8d52 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -657,6 +657,67 @@ inline bool decode_node_perf_counter_info(const dsn::rpc_address &node_addr, return true; } +// rows: key-app name, value-perf counters for each partition +inline bool get_app_partition_stat(shell_context *sc, + std::map> &rows) +{ + // get apps and nodes + std::vector<::dsn::app_info> apps; + std::vector nodes; + if (!get_apps_and_nodes(sc, apps, nodes)) { + return false; + } + + // get app_id --> app_name + std::map app_id_name; + for (::dsn::app_info &app : apps) { + app_id_name[app.app_id] = app.app_name; + rows[app.app_name].resize(app.partition_count); + } + + // get app_id --> partitions + std::map> app_partitions; + if (!get_app_partitions(sc, apps, app_partitions)) { + return false; + } + + // get all of the perf counters with format ".*@.*" + ::dsn::command command; + command.cmd = "perf-counters"; + char tmp[256]; + sprintf(tmp, ".*@.*"); + command.arguments.emplace_back(tmp); + std::vector> results; + call_remote_command(sc, nodes, command, results); + + for (int i = 0; i < nodes.size(); ++i) { + // decode info of perf-counters on node i + dsn::perf_counter_info info; + if (!decode_node_perf_counter_info(nodes[i].address, results[i], info)) { + return false; + } + + for (dsn::perf_counter_metric &m : info.counters) { + // get app_id/partition_id/counter_name from the name of perf-counter + int32_t app_id_x, partition_index_x; + std::string counter_name; + if (!parse_app_pegasus_perf_counter_name( + m.name, app_id_x, partition_index_x, counter_name)) { + continue; + } + + // only on primary partition will be counted + auto find = app_partitions.find(app_id_x); + if (find != app_partitions.end() && + find->second[partition_index_x].primary == nodes[i].address) { + update_app_pegasus_perf_counter( + rows[app_id_name[app_id_x]][partition_index_x], counter_name, m.value); + } + } + } + return true; +} + inline bool get_app_stat(shell_context *sc, const std::string &app_name, std::vector &rows) { From c66556bccff792d8181efb95edb45fe8c24a7b19 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Thu, 7 Nov 2019 15:29:53 +0800 Subject: [PATCH 02/18] hotspot --- src/base/pegasus_const.h | 1 + src/server/info_collector.cpp | 101 ++++++++-------------------------- src/server/info_collector.h | 43 ++++++++++++++- src/shell/command_helper.h | 75 ++++++++++++++++++++++++- 4 files changed, 141 insertions(+), 79 deletions(-) diff --git a/src/base/pegasus_const.h b/src/base/pegasus_const.h index 6d096f7b83..6889bf53d6 100644 --- a/src/base/pegasus_const.h +++ b/src/base/pegasus_const.h @@ -11,6 +11,7 @@ namespace pegasus { const int SCAN_CONTEXT_ID_VALID_MIN = 0; const int SCAN_CONTEXT_ID_COMPLETED = -1; const int SCAN_CONTEXT_ID_NOT_EXIST = -2; +const int HOTSPOT_MAX_MIN_RATIO_THRESHOLD = 10; extern const std::string ROCKSDB_ENV_RESTORE_FORCE_RESTORE; extern const std::string ROCKSDB_ENV_RESTORE_POLICY_NAME; diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 15a86b8452..8b73e32aed 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -130,88 +130,33 @@ void info_collector::stop() { _tracker.cancel_outstanding_tasks(); } void info_collector::on_app_stat() { ddebug("start to stat apps"); - std::vector rows; - if (!get_app_stat(&_shell_context, "", rows)) { + std::map> all_rows; + if (!get_app_partition_stat(&_shell_context, all_rows)) { derror("call get_app_stat() failed"); return; } - std::vector read_qps; - std::vector write_qps; - rows.resize(rows.size() + 1); - read_qps.resize(rows.size()); - write_qps.resize(rows.size()); - row_data &all = rows.back(); - all.row_name = "_all_"; - for (int i = 0; i < rows.size() - 1; ++i) { - row_data &row = rows[i]; - all.get_qps += row.get_qps; - all.multi_get_qps += row.multi_get_qps; - all.put_qps += row.put_qps; - all.multi_put_qps += row.multi_put_qps; - all.remove_qps += row.remove_qps; - all.multi_remove_qps += row.multi_remove_qps; - all.incr_qps += row.incr_qps; - all.check_and_set_qps += row.check_and_set_qps; - all.check_and_mutate_qps += row.check_and_mutate_qps; - all.scan_qps += row.scan_qps; - all.recent_read_cu += row.recent_read_cu; - all.recent_write_cu += row.recent_write_cu; - all.recent_expire_count += row.recent_expire_count; - all.recent_filter_count += row.recent_filter_count; - all.recent_abnormal_count += row.recent_abnormal_count; - all.recent_write_throttling_delay_count += row.recent_write_throttling_delay_count; - all.recent_write_throttling_reject_count += row.recent_write_throttling_reject_count; - all.storage_mb += row.storage_mb; - all.storage_count += row.storage_count; - all.rdb_block_cache_hit_count += row.rdb_block_cache_hit_count; - all.rdb_block_cache_total_count += row.rdb_block_cache_total_count; - all.rdb_index_and_filter_blocks_mem_usage += row.rdb_index_and_filter_blocks_mem_usage; - all.rdb_memtable_mem_usage += row.rdb_memtable_mem_usage; - read_qps[i] = row.get_qps + row.multi_get_qps + row.scan_qps; - write_qps[i] = row.put_qps + row.multi_put_qps + row.remove_qps + row.multi_remove_qps + - row.incr_qps + row.check_and_set_qps + row.check_and_mutate_qps; - } - read_qps[read_qps.size() - 1] = all.get_qps + all.multi_get_qps + all.scan_qps; - write_qps[read_qps.size() - 1] = all.put_qps + all.multi_put_qps + all.remove_qps + - all.multi_remove_qps + all.incr_qps + all.check_and_set_qps + - all.check_and_mutate_qps; - for (int i = 0; i < rows.size(); ++i) { - row_data &row = rows[i]; - AppStatCounters *counters = get_app_counters(row.row_name); - counters->get_qps->set(row.get_qps); - counters->multi_get_qps->set(row.multi_get_qps); - counters->put_qps->set(row.put_qps); - counters->multi_put_qps->set(row.multi_put_qps); - counters->remove_qps->set(row.remove_qps); - counters->multi_remove_qps->set(row.multi_remove_qps); - counters->incr_qps->set(row.incr_qps); - counters->check_and_set_qps->set(row.check_and_set_qps); - counters->check_and_mutate_qps->set(row.check_and_mutate_qps); - counters->scan_qps->set(row.scan_qps); - counters->recent_read_cu->set(row.recent_read_cu); - counters->recent_write_cu->set(row.recent_write_cu); - counters->recent_expire_count->set(row.recent_expire_count); - counters->recent_filter_count->set(row.recent_filter_count); - counters->recent_abnormal_count->set(row.recent_abnormal_count); - counters->recent_write_throttling_delay_count->set(row.recent_write_throttling_delay_count); - counters->recent_write_throttling_reject_count->set( - row.recent_write_throttling_reject_count); - counters->storage_mb->set(row.storage_mb); - counters->storage_count->set(row.storage_count); - counters->rdb_block_cache_hit_rate->set( - std::abs(row.rdb_block_cache_total_count) < 1e-6 - ? 0 - : row.rdb_block_cache_hit_count / row.rdb_block_cache_total_count * 1000000); - counters->rdb_index_and_filter_blocks_mem_usage->set( - row.rdb_index_and_filter_blocks_mem_usage); - counters->rdb_memtable_mem_usage->set(row.rdb_memtable_mem_usage); - counters->read_qps->set(read_qps[i]); - counters->write_qps->set(write_qps[i]); + + row_data all_data; + all_data.row_name = "_all_"; + for (auto app_rows : all_rows) { + // get data for app + row_data app_data; + for (auto partition_row : app_rows.second) { + app_data.merge(partition_row); + } + get_app_counters(app_rows.first)->set(app_data); + + // get data for all of these rows + all_data.merge(app_data); } + + // get perf-counters for all of the apps + AppStatCounters *all_counters = get_app_counters(all_data.row_name); + all_counters->set(all_data); ddebug("stat apps succeed, app_count = %d, total_read_qps = %.2f, total_write_qps = %.2f", - (int)(rows.size() - 1), - read_qps[read_qps.size() - 1], - write_qps[read_qps.size() - 1]); + (int)(all_rows.size() - 1), + all_counters->read_qps.get(), + all_counters->write_qps.get()); } info_collector::AppStatCounters *info_collector::get_app_counters(const std::string &app_name) @@ -257,6 +202,8 @@ info_collector::AppStatCounters *info_collector::get_app_counters(const std::str INIT_COUNTER(rdb_memtable_mem_usage); INIT_COUNTER(read_qps); INIT_COUNTER(write_qps); + INIT_COUNTER(qps_max_min_ratio); + INIT_COUNTER(cu_max_min_ratio); _app_stat_counters[app_name] = counters; return counters; } diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 69e55e3904..4fe427299a 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -17,6 +17,7 @@ #include #include #include +#include #include "../shell/commands.h" @@ -30,6 +31,45 @@ class info_collector public: struct AppStatCounters { + void set(const row_data &row) + { + get_qps->set(row.get_qps); + multi_get_qps->set(row.multi_get_qps); + put_qps->set(row.put_qps); + multi_put_qps->set(row.multi_put_qps); + remove_qps->set(row.remove_qps); + multi_remove_qps->set(row.multi_remove_qps); + incr_qps->set(row.incr_qps); + check_and_set_qps->set(row.check_and_set_qps); + check_and_mutate_qps->set(row.check_and_mutate_qps); + scan_qps->set(row.scan_qps); + recent_read_cu->set(row.recent_read_cu); + recent_write_cu->set(row.recent_write_cu); + recent_expire_count->set(row.recent_expire_count); + recent_filter_count->set(row.recent_filter_count); + recent_abnormal_count->set(row.recent_abnormal_count); + recent_write_throttling_delay_count->set(row.recent_write_throttling_delay_count); + recent_write_throttling_reject_count->set(row.recent_write_throttling_reject_count); + storage_mb->set(row.storage_mb); + storage_count->set(row.storage_count); + rdb_block_cache_hit_rate->set(std::abs(row.rdb_block_cache_total_count) < 1e-6 + ? 0 + : row.rdb_block_cache_hit_count / + row.rdb_block_cache_total_count * 1000000); + rdb_index_and_filter_blocks_mem_usage->set(row.rdb_index_and_filter_blocks_mem_usage); + rdb_memtable_mem_usage->set(row.rdb_memtable_mem_usage); + read_qps->set(row.get_read_qps()); + write_qps->set(row.get_write_qps()); + + double qps_ratio = row.max_qps / row.min_qps; + double cu_ratio = row.max_cu / row.min_cu; + qps_max_min_ratio->set(qps_ratio); + cu_max_min_ratio->set(cu_ratio); + if (qps_ratio >= HOTSPOT_MAX_MIN_RATIO_THRESHOLD || cu_ratio >= HOTSPOT_MAX_MIN_RATIO_THRESHOLD) { + ddebug("the ratio of max/min is larger than 10 for qps or cu."); + } + } + ::dsn::perf_counter_wrapper get_qps; ::dsn::perf_counter_wrapper multi_get_qps; ::dsn::perf_counter_wrapper put_qps; @@ -50,11 +90,12 @@ class info_collector ::dsn::perf_counter_wrapper storage_mb; ::dsn::perf_counter_wrapper storage_count; ::dsn::perf_counter_wrapper rdb_block_cache_hit_rate; - ::dsn::perf_counter_wrapper rdb_block_cache_mem_usage; ::dsn::perf_counter_wrapper rdb_index_and_filter_blocks_mem_usage; ::dsn::perf_counter_wrapper rdb_memtable_mem_usage; ::dsn::perf_counter_wrapper read_qps; ::dsn::perf_counter_wrapper write_qps; + ::dsn::perf_counter_wrapper qps_max_min_ratio; + ::dsn::perf_counter_wrapper cu_max_min_ratio; }; info_collector(); diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 1f7ace8d52..f7b20256d9 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -512,9 +512,72 @@ inline bool parse_app_pegasus_perf_counter_name(const std::string &name, struct row_data { + void merge(const row_data &row) + { + get_qps += row.get_qps; + multi_get_qps += row.multi_get_qps; + put_qps += row.put_qps; + multi_put_qps += row.multi_put_qps; + remove_qps += row.remove_qps; + multi_remove_qps += row.multi_remove_qps; + incr_qps += row.incr_qps; + check_and_set_qps += row.check_and_set_qps; + check_and_mutate_qps += row.check_and_mutate_qps; + scan_qps += row.scan_qps; + recent_read_cu += row.recent_read_cu; + recent_write_cu += row.recent_write_cu; + recent_expire_count += row.recent_expire_count; + recent_filter_count += row.recent_filter_count; + recent_abnormal_count += row.recent_abnormal_count; + recent_write_throttling_delay_count += row.recent_write_throttling_delay_count; + recent_write_throttling_reject_count += row.recent_write_throttling_reject_count; + storage_mb += row.storage_mb; + storage_count += row.storage_count; + rdb_block_cache_hit_count += row.rdb_block_cache_hit_count; + rdb_block_cache_total_count += row.rdb_block_cache_total_count; + rdb_index_and_filter_blocks_mem_usage += row.rdb_index_and_filter_blocks_mem_usage; + rdb_memtable_mem_usage += row.rdb_memtable_mem_usage; + + // get max_qps、min_qps and the partition id of max_qps + double row_total_qps = row.get_total_qps(); + if (max_qps < row_total_qps) { + max_qps = row_total_qps; + max_qps_partition_id = row.partition_id; + } else if (min_qps > row_total_qps) { + min_qps = row_total_qps; + } + + // get max_cu、min_cu and the partition id of max_cu + double row_total_cu = row.get_total_cu(); + if (max_cu < row_total_cu) { + max_cu = row_total_cu; + max_cu_partition_id = row.partition_id; + } else if (min_cu > row_total_cu) { + min_cu = row_total_cu; + } + } + + double get_read_qps() const { + return get_qps + multi_get_qps + scan_qps; + } + + double get_write_qps() const { + return put_qps + multi_put_qps + remove_qps + multi_remove_qps + incr_qps + + check_and_set_qps + check_and_mutate_qps; + } + + double get_total_qps() const { + return this->get_read_qps() + this->get_write_qps(); + } + + double get_total_cu() const { + return recent_read_cu + recent_write_cu; + } + std::string row_name; int32_t app_id = 0; int32_t partition_count = 0; + int32_t partition_id = 0; double get_qps = 0; double multi_get_qps = 0; double put_qps = 0; @@ -538,6 +601,14 @@ struct row_data double rdb_block_cache_total_count = 0; double rdb_index_and_filter_blocks_mem_usage = 0; double rdb_memtable_mem_usage = 0; + + // used when merging + double max_qps = 0; + double min_qps = 1; // set min_qps to 1, in order to avoid the divisor to be zero + double max_cu = 0; + double min_cu = 1; + int max_qps_partition_id = 0; + int max_cu_partition_id = 0; }; inline bool @@ -778,7 +849,9 @@ get_app_stat(shell_context *sc, const std::string &app_name, std::vector Date: Thu, 7 Nov 2019 20:02:01 +0800 Subject: [PATCH 03/18] modify get_app_partition_stat --- src/shell/command_helper.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 1f7ace8d52..0ddc2bd6e5 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -710,8 +710,11 @@ inline bool get_app_partition_stat(shell_context *sc, auto find = app_partitions.find(app_id_x); if (find != app_partitions.end() && find->second[partition_index_x].primary == nodes[i].address) { - update_app_pegasus_perf_counter( - rows[app_id_name[app_id_x]][partition_index_x], counter_name, m.value); + const std::string app_name = app_id_name[app_id_x]; + row_data &row = rows[app_name][partition_index_x]; + row.row_name = std::to_string(partition_index_x); + row.app_id = app_id_x; + update_app_pegasus_perf_counter(row, counter_name, m.value); } } } From 8aee24dfd35aa1cc70d5e54f3dd420168380efa0 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Fri, 8 Nov 2019 10:43:07 +0800 Subject: [PATCH 04/18] hotspot --- src/server/info_collector.cpp | 27 ++-- src/server/info_collector.h | 223 +++++++++++++++++++++++++++------- src/shell/command_helper.h | 63 +--------- 3 files changed, 197 insertions(+), 116 deletions(-) diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 8b73e32aed..6040d9ca69 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -136,27 +136,24 @@ void info_collector::on_app_stat() return; } - row_data all_data; - all_data.row_name = "_all_"; + row_statistics all_stat("_all_"); for (auto app_rows : all_rows) { - // get data for app - row_data app_data; + // get statistics data for app + row_statistics app_stat(app_rows.first); for (auto partition_row : app_rows.second) { - app_data.merge(partition_row); + app_stat.calc(partition_row); } - get_app_counters(app_rows.first)->set(app_data); + get_app_counters(app_rows.first)->set(app_stat); - // get data for all of these rows - all_data.merge(app_data); + // get row data statistics for all of the apps + all_stat.merge(app_stat); } + get_app_counters(all_stat.app_name)->set(all_stat); - // get perf-counters for all of the apps - AppStatCounters *all_counters = get_app_counters(all_data.row_name); - all_counters->set(all_data); ddebug("stat apps succeed, app_count = %d, total_read_qps = %.2f, total_write_qps = %.2f", (int)(all_rows.size() - 1), - all_counters->read_qps.get(), - all_counters->write_qps.get()); + all_stat.get_read_qps(), + all_stat.get_write_qps()); } info_collector::AppStatCounters *info_collector::get_app_counters(const std::string &app_name) @@ -202,8 +199,8 @@ info_collector::AppStatCounters *info_collector::get_app_counters(const std::str INIT_COUNTER(rdb_memtable_mem_usage); INIT_COUNTER(read_qps); INIT_COUNTER(write_qps); - INIT_COUNTER(qps_max_min_ratio); - INIT_COUNTER(cu_max_min_ratio); + INIT_COUNTER(qps_max_min_scale); + INIT_COUNTER(cu_max_min_scale); _app_stat_counters[app_name] = counters; return counters; } diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 26f276c397..e1c1d7f9b9 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -24,54 +24,195 @@ namespace pegasus { namespace server { class result_writer; -static const int HOTSPOT_MAX_MIN_RATIO_THRESHOLD = 10; +static const int HOTSPOT_MAX_MIN_SCALE_THRESHOLD = 10; class info_collector { public: + struct row_statistics + { + row_statistics(const std::string &app_name) : app_name(app_name) {} + + double get_read_qps() const { return get_qps + multi_get_qps + scan_qps; } + + double get_write_qps() const + { + return put_qps + multi_put_qps + remove_qps + multi_remove_qps + incr_qps + + check_and_set_qps + check_and_mutate_qps; + } + + void calc(const row_data &row) + { + get_qps += row.get_qps; + multi_get_qps += row.multi_get_qps; + put_qps += row.put_qps; + multi_put_qps += row.multi_put_qps; + remove_qps += row.remove_qps; + multi_remove_qps += row.multi_remove_qps; + incr_qps += row.incr_qps; + check_and_set_qps += row.check_and_set_qps; + check_and_mutate_qps += row.check_and_mutate_qps; + scan_qps += row.scan_qps; + recent_read_cu += row.recent_read_cu; + recent_write_cu += row.recent_write_cu; + recent_expire_count += row.recent_expire_count; + recent_filter_count += row.recent_filter_count; + recent_abnormal_count += row.recent_abnormal_count; + recent_write_throttling_delay_count += row.recent_write_throttling_delay_count; + recent_write_throttling_reject_count += row.recent_write_throttling_reject_count; + storage_mb += row.storage_mb; + storage_count += row.storage_count; + rdb_block_cache_hit_count += row.rdb_block_cache_hit_count; + rdb_block_cache_total_count += row.rdb_block_cache_total_count; + rdb_index_and_filter_blocks_mem_usage += row.rdb_index_and_filter_blocks_mem_usage; + rdb_memtable_mem_usage += row.rdb_memtable_mem_usage; + + // get max_total_qps、min_total_qps and the id of this partition which has max_total_qps + double row_total_qps = row.get_total_qps(); + if (max_total_qps < row_total_qps) { + max_total_qps = row_total_qps; + max_qps_partition_id = row.row_name; + } else if (min_total_qps > row_total_qps) { + min_total_qps = row_total_qps; + } + + // get max_total_cu、min_total_cu and the id of this partition which has max_total_cu + double row_total_cu = row.get_total_cu(); + if (max_total_cu < row_total_cu) { + max_total_cu = row_total_cu; + max_cu_partition_id = row.row_name; + } else if (min_total_cu > row_total_cu) { + min_total_cu = row_total_cu; + } + } + + void merge(const row_statistics &row_stat) + { + get_qps += row_stat.get_qps; + multi_get_qps += row_stat.multi_get_qps; + put_qps += row_stat.put_qps; + multi_put_qps += row_stat.multi_put_qps; + remove_qps += row_stat.remove_qps; + multi_remove_qps += row_stat.multi_remove_qps; + incr_qps += row_stat.incr_qps; + check_and_set_qps += row_stat.check_and_set_qps; + check_and_mutate_qps += row_stat.check_and_mutate_qps; + scan_qps += row_stat.scan_qps; + recent_read_cu += row_stat.recent_read_cu; + recent_write_cu += row_stat.recent_write_cu; + recent_expire_count += row_stat.recent_expire_count; + recent_filter_count += row_stat.recent_filter_count; + recent_abnormal_count += row_stat.recent_abnormal_count; + recent_write_throttling_delay_count += row_stat.recent_write_throttling_delay_count; + recent_write_throttling_reject_count += row_stat.recent_write_throttling_reject_count; + storage_mb += row_stat.storage_mb; + storage_count += row_stat.storage_count; + rdb_block_cache_hit_count += row_stat.rdb_block_cache_hit_count; + rdb_block_cache_total_count += row_stat.rdb_block_cache_total_count; + rdb_index_and_filter_blocks_mem_usage += row_stat.rdb_index_and_filter_blocks_mem_usage; + rdb_memtable_mem_usage += row_stat.rdb_memtable_mem_usage; + + // We only need max_total_qps/min_total_qps/max_total_cu/min_total_cu in the same app + if (this->app_name == row_stat.app_name) { + // get max_total_qps、min_total_qps and the id of this partition which has + // max_total_qps + if (max_total_qps < row_stat.max_total_qps) { + max_total_qps = row_stat.max_total_qps; + max_qps_partition_id = row_stat.max_qps_partition_id; + } + min_total_qps = std::min(min_total_qps, row_stat.min_total_qps); + + // get max_total_cu、min_total_cu and the id of this partition which has + // max_total_cu + if (max_total_cu < row_stat.max_total_cu) { + max_total_cu = row_stat.max_total_cu; + max_cu_partition_id = row_stat.max_cu_partition_id; + } + min_total_cu = std::min(min_total_cu, row_stat.min_total_cu); + } + } + + std::string app_name; + double get_qps = 0; + double multi_get_qps = 0; + double put_qps = 0; + double multi_put_qps = 0; + double remove_qps = 0; + double multi_remove_qps = 0; + double incr_qps = 0; + double check_and_set_qps = 0; + double check_and_mutate_qps = 0; + double scan_qps = 0; + double recent_read_cu = 0; + double recent_write_cu = 0; + double recent_expire_count = 0; + double recent_filter_count = 0; + double recent_abnormal_count = 0; + double recent_write_throttling_delay_count = 0; + double recent_write_throttling_reject_count = 0; + double storage_mb = 0; + double storage_count = 0; + double rdb_block_cache_hit_count = 0; + double rdb_block_cache_total_count = 0; + double rdb_index_and_filter_blocks_mem_usage = 0; + double rdb_memtable_mem_usage = 0; + + // used when merging + double max_total_qps = 0; + double min_total_qps = INT_MAX; + double max_total_cu = 0; + double min_total_cu = INT_MAX; + std::string max_qps_partition_id = 0; + std::string max_cu_partition_id = 0; + }; + struct AppStatCounters { - void set(const row_data &row) + void set(const row_statistics &row_stat) { - get_qps->set(row.get_qps); - multi_get_qps->set(row.multi_get_qps); - put_qps->set(row.put_qps); - multi_put_qps->set(row.multi_put_qps); - remove_qps->set(row.remove_qps); - multi_remove_qps->set(row.multi_remove_qps); - incr_qps->set(row.incr_qps); - check_and_set_qps->set(row.check_and_set_qps); - check_and_mutate_qps->set(row.check_and_mutate_qps); - scan_qps->set(row.scan_qps); - recent_read_cu->set(row.recent_read_cu); - recent_write_cu->set(row.recent_write_cu); - recent_expire_count->set(row.recent_expire_count); - recent_filter_count->set(row.recent_filter_count); - recent_abnormal_count->set(row.recent_abnormal_count); - recent_write_throttling_delay_count->set(row.recent_write_throttling_delay_count); - recent_write_throttling_reject_count->set(row.recent_write_throttling_reject_count); - storage_mb->set(row.storage_mb); - storage_count->set(row.storage_count); - rdb_block_cache_hit_rate->set(std::abs(row.rdb_block_cache_total_count) < 1e-6 + get_qps->set(row_stat.get_qps); + multi_get_qps->set(row_stat.multi_get_qps); + put_qps->set(row_stat.put_qps); + multi_put_qps->set(row_stat.multi_put_qps); + remove_qps->set(row_stat.remove_qps); + multi_remove_qps->set(row_stat.multi_remove_qps); + incr_qps->set(row_stat.incr_qps); + check_and_set_qps->set(row_stat.check_and_set_qps); + check_and_mutate_qps->set(row_stat.check_and_mutate_qps); + scan_qps->set(row_stat.scan_qps); + recent_read_cu->set(row_stat.recent_read_cu); + recent_write_cu->set(row_stat.recent_write_cu); + recent_expire_count->set(row_stat.recent_expire_count); + recent_filter_count->set(row_stat.recent_filter_count); + recent_abnormal_count->set(row_stat.recent_abnormal_count); + recent_write_throttling_delay_count->set(row_stat.recent_write_throttling_delay_count); + recent_write_throttling_reject_count->set( + row_stat.recent_write_throttling_reject_count); + storage_mb->set(row_stat.storage_mb); + storage_count->set(row_stat.storage_count); + rdb_block_cache_hit_rate->set(std::abs(row_stat.rdb_block_cache_total_count) < 1e-6 ? 0 - : row.rdb_block_cache_hit_count / - row.rdb_block_cache_total_count * 1000000); - rdb_index_and_filter_blocks_mem_usage->set(row.rdb_index_and_filter_blocks_mem_usage); - rdb_memtable_mem_usage->set(row.rdb_memtable_mem_usage); - read_qps->set(row.get_read_qps()); - write_qps->set(row.get_write_qps()); - - double qps_ratio = row.max_qps / row.min_qps; - double cu_ratio = row.max_cu / row.min_cu; - qps_max_min_ratio->set(qps_ratio); - cu_max_min_ratio->set(cu_ratio); - if (qps_ratio >= HOTSPOT_MAX_MIN_RATIO_THRESHOLD) { - ddebug("the ratio of max/min is larger than 10 for qps(partition id: %s)", - row.max_qps_partition_id.c_str()); + : row_stat.rdb_block_cache_hit_count / + row_stat.rdb_block_cache_total_count * 1000000); + rdb_index_and_filter_blocks_mem_usage->set( + row_stat.rdb_index_and_filter_blocks_mem_usage); + rdb_memtable_mem_usage->set(row_stat.rdb_memtable_mem_usage); + read_qps->set(row_stat.get_read_qps()); + write_qps->set(row_stat.get_write_qps()); + + double qps_scale = row_stat.max_total_qps / std::max(row_stat.min_total_qps, 1.0); + double cu_scale = row_stat.max_total_cu / std::max(row_stat.min_total_cu, 1.0); + qps_max_min_scale->set(qps_scale); + cu_max_min_scale->set(cu_scale); + if (qps_scale >= HOTSPOT_MAX_MIN_SCALE_THRESHOLD) { + ddebug("There is a hotspot about qps in %s(%s))", + row_stat.app_name.c_str(), + row_stat.max_qps_partition_id.c_str()); } - if (cu_ratio >= HOTSPOT_MAX_MIN_RATIO_THRESHOLD) { - ddebug("the ratio of max/min is larger than 10 for cu(partition id: %s)", - row.max_cu_partition_id.c_str()); + if (cu_scale >= HOTSPOT_MAX_MIN_SCALE_THRESHOLD) { + ddebug("There is a hotspot about cu in %s(%s))", + row_stat.app_name.c_str(), + row_stat.max_cu_partition_id.c_str()); } } @@ -99,8 +240,8 @@ class info_collector ::dsn::perf_counter_wrapper rdb_memtable_mem_usage; ::dsn::perf_counter_wrapper read_qps; ::dsn::perf_counter_wrapper write_qps; - ::dsn::perf_counter_wrapper qps_max_min_ratio; - ::dsn::perf_counter_wrapper cu_max_min_ratio; + ::dsn::perf_counter_wrapper qps_max_min_scale; + ::dsn::perf_counter_wrapper cu_max_min_scale; }; info_collector(); diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 343e2f38fb..8a45c71084 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -512,61 +512,12 @@ inline bool parse_app_pegasus_perf_counter_name(const std::string &name, struct row_data { - void merge(const row_data &row) + double get_total_qps() const { - get_qps += row.get_qps; - multi_get_qps += row.multi_get_qps; - put_qps += row.put_qps; - multi_put_qps += row.multi_put_qps; - remove_qps += row.remove_qps; - multi_remove_qps += row.multi_remove_qps; - incr_qps += row.incr_qps; - check_and_set_qps += row.check_and_set_qps; - check_and_mutate_qps += row.check_and_mutate_qps; - scan_qps += row.scan_qps; - recent_read_cu += row.recent_read_cu; - recent_write_cu += row.recent_write_cu; - recent_expire_count += row.recent_expire_count; - recent_filter_count += row.recent_filter_count; - recent_abnormal_count += row.recent_abnormal_count; - recent_write_throttling_delay_count += row.recent_write_throttling_delay_count; - recent_write_throttling_reject_count += row.recent_write_throttling_reject_count; - storage_mb += row.storage_mb; - storage_count += row.storage_count; - rdb_block_cache_hit_count += row.rdb_block_cache_hit_count; - rdb_block_cache_total_count += row.rdb_block_cache_total_count; - rdb_index_and_filter_blocks_mem_usage += row.rdb_index_and_filter_blocks_mem_usage; - rdb_memtable_mem_usage += row.rdb_memtable_mem_usage; - - // get max_qps、min_qps and the id of this partition which has max_qps - double row_total_qps = row.get_total_qps(); - if (max_qps < row_total_qps) { - max_qps = row_total_qps; - max_qps_partition_id = row.row_name; - } else if (min_qps > row_total_qps) { - min_qps = row_total_qps; - } - - // get max_cu、min_cu and the id of this partition which has max_cu - double row_total_cu = row.get_total_cu(); - if (max_cu < row_total_cu) { - max_cu = row_total_cu; - max_cu_partition_id = row.row_name; - } else if (min_cu > row_total_cu) { - min_cu = row_total_cu; - } - } - - double get_read_qps() const { return get_qps + multi_get_qps + scan_qps; } - - double get_write_qps() const - { - return put_qps + multi_put_qps + remove_qps + multi_remove_qps + incr_qps + - check_and_set_qps + check_and_mutate_qps; + return get_qps + multi_get_qps + scan_qps + put_qps + multi_put_qps + remove_qps + + multi_remove_qps + incr_qps + check_and_set_qps + check_and_mutate_qps; } - double get_total_qps() const { return this->get_read_qps() + this->get_write_qps(); } - double get_total_cu() const { return recent_read_cu + recent_write_cu; } std::string row_name; @@ -595,14 +546,6 @@ struct row_data double rdb_block_cache_total_count = 0; double rdb_index_and_filter_blocks_mem_usage = 0; double rdb_memtable_mem_usage = 0; - - // used when merging - double max_qps = 0; - double min_qps = 1; // set min_qps to 1, in order to avoid the divisor to be zero - double max_cu = 0; - double min_cu = 1; - std::string max_qps_partition_id = 0; - std::string max_cu_partition_id = 0; }; inline bool From a3a575837f0c116b3f6773492dfe468bb1bee3dd Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Fri, 8 Nov 2019 11:54:26 +0800 Subject: [PATCH 05/18] hotspot --- src/server/info_collector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 6040d9ca69..594cb41ef6 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -143,7 +143,7 @@ void info_collector::on_app_stat() for (auto partition_row : app_rows.second) { app_stat.calc(partition_row); } - get_app_counters(app_rows.first)->set(app_stat); + get_app_counters(app_stat.app_name)->set(app_stat); // get row data statistics for all of the apps all_stat.merge(app_stat); From cc1c49f627bcf2f5e539444a4b2e20a0576f5cb4 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Fri, 8 Nov 2019 12:01:50 +0800 Subject: [PATCH 06/18] hot spot --- src/server/info_collector.cpp | 8 +- src/server/info_collector.h | 138 +++++++++++++++++----------------- 2 files changed, 74 insertions(+), 72 deletions(-) diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 594cb41ef6..b0f13bdb0a 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -139,14 +139,14 @@ void info_collector::on_app_stat() row_statistics all_stat("_all_"); for (auto app_rows : all_rows) { // get statistics data for app - row_statistics app_stat(app_rows.first); + row_statistics app_stats(app_rows.first); for (auto partition_row : app_rows.second) { - app_stat.calc(partition_row); + app_stats.calc(partition_row); } - get_app_counters(app_stat.app_name)->set(app_stat); + get_app_counters(app_stats.app_name)->set(app_stats); // get row data statistics for all of the apps - all_stat.merge(app_stat); + all_stat.merge(app_stats); } get_app_counters(all_stat.app_name)->set(all_stat); diff --git a/src/server/info_collector.h b/src/server/info_collector.h index e1c1d7f9b9..5888fe3c98 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -86,49 +86,50 @@ class info_collector } } - void merge(const row_statistics &row_stat) + void merge(const row_statistics &row_stats) { - get_qps += row_stat.get_qps; - multi_get_qps += row_stat.multi_get_qps; - put_qps += row_stat.put_qps; - multi_put_qps += row_stat.multi_put_qps; - remove_qps += row_stat.remove_qps; - multi_remove_qps += row_stat.multi_remove_qps; - incr_qps += row_stat.incr_qps; - check_and_set_qps += row_stat.check_and_set_qps; - check_and_mutate_qps += row_stat.check_and_mutate_qps; - scan_qps += row_stat.scan_qps; - recent_read_cu += row_stat.recent_read_cu; - recent_write_cu += row_stat.recent_write_cu; - recent_expire_count += row_stat.recent_expire_count; - recent_filter_count += row_stat.recent_filter_count; - recent_abnormal_count += row_stat.recent_abnormal_count; - recent_write_throttling_delay_count += row_stat.recent_write_throttling_delay_count; - recent_write_throttling_reject_count += row_stat.recent_write_throttling_reject_count; - storage_mb += row_stat.storage_mb; - storage_count += row_stat.storage_count; - rdb_block_cache_hit_count += row_stat.rdb_block_cache_hit_count; - rdb_block_cache_total_count += row_stat.rdb_block_cache_total_count; - rdb_index_and_filter_blocks_mem_usage += row_stat.rdb_index_and_filter_blocks_mem_usage; - rdb_memtable_mem_usage += row_stat.rdb_memtable_mem_usage; + get_qps += row_stats.get_qps; + multi_get_qps += row_stats.multi_get_qps; + put_qps += row_stats.put_qps; + multi_put_qps += row_stats.multi_put_qps; + remove_qps += row_stats.remove_qps; + multi_remove_qps += row_stats.multi_remove_qps; + incr_qps += row_stats.incr_qps; + check_and_set_qps += row_stats.check_and_set_qps; + check_and_mutate_qps += row_stats.check_and_mutate_qps; + scan_qps += row_stats.scan_qps; + recent_read_cu += row_stats.recent_read_cu; + recent_write_cu += row_stats.recent_write_cu; + recent_expire_count += row_stats.recent_expire_count; + recent_filter_count += row_stats.recent_filter_count; + recent_abnormal_count += row_stats.recent_abnormal_count; + recent_write_throttling_delay_count += row_stats.recent_write_throttling_delay_count; + recent_write_throttling_reject_count += row_stats.recent_write_throttling_reject_count; + storage_mb += row_stats.storage_mb; + storage_count += row_stats.storage_count; + rdb_block_cache_hit_count += row_stats.rdb_block_cache_hit_count; + rdb_block_cache_total_count += row_stats.rdb_block_cache_total_count; + rdb_index_and_filter_blocks_mem_usage += + row_stats.rdb_index_and_filter_blocks_mem_usage; + rdb_memtable_mem_usage += row_stats.rdb_memtable_mem_usage; // We only need max_total_qps/min_total_qps/max_total_cu/min_total_cu in the same app - if (this->app_name == row_stat.app_name) { + if (this->app_name == row_stats.app_name) { // get max_total_qps、min_total_qps and the id of this partition which has // max_total_qps - if (max_total_qps < row_stat.max_total_qps) { - max_total_qps = row_stat.max_total_qps; - max_qps_partition_id = row_stat.max_qps_partition_id; + if (max_total_qps < row_stats.max_total_qps) { + max_total_qps = row_stats.max_total_qps; + max_qps_partition_id = row_stats.max_qps_partition_id; } - min_total_qps = std::min(min_total_qps, row_stat.min_total_qps); + min_total_qps = std::min(min_total_qps, row_stats.min_total_qps); // get max_total_cu、min_total_cu and the id of this partition which has // max_total_cu - if (max_total_cu < row_stat.max_total_cu) { - max_total_cu = row_stat.max_total_cu; - max_cu_partition_id = row_stat.max_cu_partition_id; + if (max_total_cu < row_stats.max_total_cu) { + max_total_cu = row_stats.max_total_cu; + max_cu_partition_id = row_stats.max_cu_partition_id; } - min_total_cu = std::min(min_total_cu, row_stat.min_total_cu); + min_total_cu = std::min(min_total_cu, row_stats.min_total_cu); } } @@ -168,51 +169,52 @@ class info_collector struct AppStatCounters { - void set(const row_statistics &row_stat) + void set(const row_statistics &row_stats) { - get_qps->set(row_stat.get_qps); - multi_get_qps->set(row_stat.multi_get_qps); - put_qps->set(row_stat.put_qps); - multi_put_qps->set(row_stat.multi_put_qps); - remove_qps->set(row_stat.remove_qps); - multi_remove_qps->set(row_stat.multi_remove_qps); - incr_qps->set(row_stat.incr_qps); - check_and_set_qps->set(row_stat.check_and_set_qps); - check_and_mutate_qps->set(row_stat.check_and_mutate_qps); - scan_qps->set(row_stat.scan_qps); - recent_read_cu->set(row_stat.recent_read_cu); - recent_write_cu->set(row_stat.recent_write_cu); - recent_expire_count->set(row_stat.recent_expire_count); - recent_filter_count->set(row_stat.recent_filter_count); - recent_abnormal_count->set(row_stat.recent_abnormal_count); - recent_write_throttling_delay_count->set(row_stat.recent_write_throttling_delay_count); + get_qps->set(row_stats.get_qps); + multi_get_qps->set(row_stats.multi_get_qps); + put_qps->set(row_stats.put_qps); + multi_put_qps->set(row_stats.multi_put_qps); + remove_qps->set(row_stats.remove_qps); + multi_remove_qps->set(row_stats.multi_remove_qps); + incr_qps->set(row_stats.incr_qps); + check_and_set_qps->set(row_stats.check_and_set_qps); + check_and_mutate_qps->set(row_stats.check_and_mutate_qps); + scan_qps->set(row_stats.scan_qps); + recent_read_cu->set(row_stats.recent_read_cu); + recent_write_cu->set(row_stats.recent_write_cu); + recent_expire_count->set(row_stats.recent_expire_count); + recent_filter_count->set(row_stats.recent_filter_count); + recent_abnormal_count->set(row_stats.recent_abnormal_count); + recent_write_throttling_delay_count->set(row_stats.recent_write_throttling_delay_count); recent_write_throttling_reject_count->set( - row_stat.recent_write_throttling_reject_count); - storage_mb->set(row_stat.storage_mb); - storage_count->set(row_stat.storage_count); - rdb_block_cache_hit_rate->set(std::abs(row_stat.rdb_block_cache_total_count) < 1e-6 + row_stats.recent_write_throttling_reject_count); + storage_mb->set(row_stats.storage_mb); + storage_count->set(row_stats.storage_count); + rdb_block_cache_hit_rate->set(std::abs(row_stats.rdb_block_cache_total_count) < 1e-6 ? 0 - : row_stat.rdb_block_cache_hit_count / - row_stat.rdb_block_cache_total_count * 1000000); + : row_stats.rdb_block_cache_hit_count / + row_stats.rdb_block_cache_total_count * + 1000000); rdb_index_and_filter_blocks_mem_usage->set( - row_stat.rdb_index_and_filter_blocks_mem_usage); - rdb_memtable_mem_usage->set(row_stat.rdb_memtable_mem_usage); - read_qps->set(row_stat.get_read_qps()); - write_qps->set(row_stat.get_write_qps()); + row_stats.rdb_index_and_filter_blocks_mem_usage); + rdb_memtable_mem_usage->set(row_stats.rdb_memtable_mem_usage); + read_qps->set(row_stats.get_read_qps()); + write_qps->set(row_stats.get_write_qps()); - double qps_scale = row_stat.max_total_qps / std::max(row_stat.min_total_qps, 1.0); - double cu_scale = row_stat.max_total_cu / std::max(row_stat.min_total_cu, 1.0); + double qps_scale = row_stats.max_total_qps / std::max(row_stats.min_total_qps, 1.0); + double cu_scale = row_stats.max_total_cu / std::max(row_stats.min_total_cu, 1.0); qps_max_min_scale->set(qps_scale); cu_max_min_scale->set(cu_scale); if (qps_scale >= HOTSPOT_MAX_MIN_SCALE_THRESHOLD) { - ddebug("There is a hotspot about qps in %s(%s))", - row_stat.app_name.c_str(), - row_stat.max_qps_partition_id.c_str()); + ddebug("There is a hot spot about qps in %s(%s))", + row_stats.app_name.c_str(), + row_stats.max_qps_partition_id.c_str()); } if (cu_scale >= HOTSPOT_MAX_MIN_SCALE_THRESHOLD) { - ddebug("There is a hotspot about cu in %s(%s))", - row_stat.app_name.c_str(), - row_stat.max_cu_partition_id.c_str()); + ddebug("There is a hot spot about cu in %s(%s))", + row_stats.app_name.c_str(), + row_stats.max_cu_partition_id.c_str()); } } From ddc52bca0fd333aed287259278f6d2100db2e2a9 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Fri, 8 Nov 2019 15:04:00 +0800 Subject: [PATCH 07/18] hot spot --- src/server/info_collector.cpp | 11 ++++++----- src/server/info_collector.h | 14 +++++--------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index b0f13bdb0a..42a7715ef7 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -135,8 +135,9 @@ void info_collector::on_app_stat() derror("call get_app_stat() failed"); return; } + ddebug("after stat app partitions"); - row_statistics all_stat("_all_"); + row_statistics all_stats("_all_"); for (auto app_rows : all_rows) { // get statistics data for app row_statistics app_stats(app_rows.first); @@ -146,14 +147,14 @@ void info_collector::on_app_stat() get_app_counters(app_stats.app_name)->set(app_stats); // get row data statistics for all of the apps - all_stat.merge(app_stats); + all_stats.merge(app_stats); } - get_app_counters(all_stat.app_name)->set(all_stat); + get_app_counters(all_stats.app_name)->set(all_stats); ddebug("stat apps succeed, app_count = %d, total_read_qps = %.2f, total_write_qps = %.2f", (int)(all_rows.size() - 1), - all_stat.get_read_qps(), - all_stat.get_write_qps()); + all_stats.get_read_qps(), + all_stats.get_write_qps()); } info_collector::AppStatCounters *info_collector::get_app_counters(const std::string &app_name) diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 5888fe3c98..1b023e83fa 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -31,7 +31,7 @@ class info_collector public: struct row_statistics { - row_statistics(const std::string &app_name) : app_name(app_name) {} + row_statistics(const std::string &app_name) { this->app_name = app_name; } double get_read_qps() const { return get_qps + multi_get_qps + scan_qps; } @@ -69,20 +69,18 @@ class info_collector // get max_total_qps、min_total_qps and the id of this partition which has max_total_qps double row_total_qps = row.get_total_qps(); + min_total_qps = std::min(min_total_qps, row_total_qps); if (max_total_qps < row_total_qps) { max_total_qps = row_total_qps; max_qps_partition_id = row.row_name; - } else if (min_total_qps > row_total_qps) { - min_total_qps = row_total_qps; } // get max_total_cu、min_total_cu and the id of this partition which has max_total_cu double row_total_cu = row.get_total_cu(); + min_total_cu = std::min(min_total_cu, row_total_cu); if (max_total_cu < row_total_cu) { max_total_cu = row_total_cu; max_cu_partition_id = row.row_name; - } else if (min_total_cu > row_total_cu) { - min_total_cu = row_total_cu; } } @@ -115,16 +113,14 @@ class info_collector // We only need max_total_qps/min_total_qps/max_total_cu/min_total_cu in the same app if (this->app_name == row_stats.app_name) { - // get max_total_qps、min_total_qps and the id of this partition which has - // max_total_qps + // get max_total_qps、min_total_qps and id of the partition which has max_total_qps if (max_total_qps < row_stats.max_total_qps) { max_total_qps = row_stats.max_total_qps; max_qps_partition_id = row_stats.max_qps_partition_id; } min_total_qps = std::min(min_total_qps, row_stats.min_total_qps); - // get max_total_cu、min_total_cu and the id of this partition which has - // max_total_cu + // get max_total_cu、min_total_cu and id of the partition which has max_total_cu if (max_total_cu < row_stats.max_total_cu) { max_total_cu = row_stats.max_total_cu; max_cu_partition_id = row_stats.max_cu_partition_id; From 89e6f5f2a650121fd3b6b75a0ab6f562e78f4ae7 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Fri, 8 Nov 2019 16:36:55 +0800 Subject: [PATCH 08/18] hot spot --- src/server/info_collector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 1b023e83fa..206ab35768 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -159,8 +159,8 @@ class info_collector double min_total_qps = INT_MAX; double max_total_cu = 0; double min_total_cu = INT_MAX; - std::string max_qps_partition_id = 0; - std::string max_cu_partition_id = 0; + std::string max_qps_partition_id; + std::string max_cu_partition_id; }; struct AppStatCounters From 7750438f85e9434e450546d3dd7ba2ec36e014c4 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Fri, 8 Nov 2019 17:04:18 +0800 Subject: [PATCH 09/18] hot spot --- src/server/info_collector.cpp | 4 +- src/server/info_collector.h | 211 ++++++++++++++++++---------------- 2 files changed, 111 insertions(+), 104 deletions(-) diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 42a7715ef7..2944d21562 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -153,8 +153,8 @@ void info_collector::on_app_stat() ddebug("stat apps succeed, app_count = %d, total_read_qps = %.2f, total_write_qps = %.2f", (int)(all_rows.size() - 1), - all_stats.get_read_qps(), - all_stats.get_write_qps()); + all_stats.get_total_read_qps(), + all_stats.get_total_write_qps()); } info_collector::AppStatCounters *info_collector::get_app_counters(const std::string &app_name) diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 206ab35768..c69138dc53 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -33,39 +33,43 @@ class info_collector { row_statistics(const std::string &app_name) { this->app_name = app_name; } - double get_read_qps() const { return get_qps + multi_get_qps + scan_qps; } + double get_total_read_qps() const + { + return total_get_qps + total_multi_get_qps + total_scan_qps; + } - double get_write_qps() const + double get_total_write_qps() const { - return put_qps + multi_put_qps + remove_qps + multi_remove_qps + incr_qps + - check_and_set_qps + check_and_mutate_qps; + return total_put_qps + total_multi_put_qps + total_remove_qps + total_multi_remove_qps + + total_incr_qps + total_check_and_set_qps + total_check_and_mutate_qps; } void calc(const row_data &row) { - get_qps += row.get_qps; - multi_get_qps += row.multi_get_qps; - put_qps += row.put_qps; - multi_put_qps += row.multi_put_qps; - remove_qps += row.remove_qps; - multi_remove_qps += row.multi_remove_qps; - incr_qps += row.incr_qps; - check_and_set_qps += row.check_and_set_qps; - check_and_mutate_qps += row.check_and_mutate_qps; - scan_qps += row.scan_qps; - recent_read_cu += row.recent_read_cu; - recent_write_cu += row.recent_write_cu; - recent_expire_count += row.recent_expire_count; - recent_filter_count += row.recent_filter_count; - recent_abnormal_count += row.recent_abnormal_count; - recent_write_throttling_delay_count += row.recent_write_throttling_delay_count; - recent_write_throttling_reject_count += row.recent_write_throttling_reject_count; - storage_mb += row.storage_mb; - storage_count += row.storage_count; - rdb_block_cache_hit_count += row.rdb_block_cache_hit_count; - rdb_block_cache_total_count += row.rdb_block_cache_total_count; - rdb_index_and_filter_blocks_mem_usage += row.rdb_index_and_filter_blocks_mem_usage; - rdb_memtable_mem_usage += row.rdb_memtable_mem_usage; + total_get_qps += row.get_qps; + total_multi_get_qps += row.multi_get_qps; + total_put_qps += row.put_qps; + total_multi_put_qps += row.multi_put_qps; + total_remove_qps += row.remove_qps; + total_multi_remove_qps += row.multi_remove_qps; + total_incr_qps += row.incr_qps; + total_check_and_set_qps += row.check_and_set_qps; + total_check_and_mutate_qps += row.check_and_mutate_qps; + total_scan_qps += row.scan_qps; + total_recent_read_cu += row.recent_read_cu; + total_recent_write_cu += row.recent_write_cu; + total_recent_expire_count += row.recent_expire_count; + total_recent_filter_count += row.recent_filter_count; + total_recent_abnormal_count += row.recent_abnormal_count; + total_recent_write_throttling_delay_count += row.recent_write_throttling_delay_count; + total_recent_write_throttling_reject_count += row.recent_write_throttling_reject_count; + total_storage_mb += row.storage_mb; + total_storage_count += row.storage_count; + total_rdb_block_cache_hit_count += row.rdb_block_cache_hit_count; + total_rdb_block_cache_total_count += row.rdb_block_cache_total_count; + total_rdb_index_and_filter_blocks_mem_usage += + row.rdb_index_and_filter_blocks_mem_usage; + total_rdb_memtable_mem_usage += row.rdb_memtable_mem_usage; // get max_total_qps、min_total_qps and the id of this partition which has max_total_qps double row_total_qps = row.get_total_qps(); @@ -86,30 +90,32 @@ class info_collector void merge(const row_statistics &row_stats) { - get_qps += row_stats.get_qps; - multi_get_qps += row_stats.multi_get_qps; - put_qps += row_stats.put_qps; - multi_put_qps += row_stats.multi_put_qps; - remove_qps += row_stats.remove_qps; - multi_remove_qps += row_stats.multi_remove_qps; - incr_qps += row_stats.incr_qps; - check_and_set_qps += row_stats.check_and_set_qps; - check_and_mutate_qps += row_stats.check_and_mutate_qps; - scan_qps += row_stats.scan_qps; - recent_read_cu += row_stats.recent_read_cu; - recent_write_cu += row_stats.recent_write_cu; - recent_expire_count += row_stats.recent_expire_count; - recent_filter_count += row_stats.recent_filter_count; - recent_abnormal_count += row_stats.recent_abnormal_count; - recent_write_throttling_delay_count += row_stats.recent_write_throttling_delay_count; - recent_write_throttling_reject_count += row_stats.recent_write_throttling_reject_count; - storage_mb += row_stats.storage_mb; - storage_count += row_stats.storage_count; - rdb_block_cache_hit_count += row_stats.rdb_block_cache_hit_count; - rdb_block_cache_total_count += row_stats.rdb_block_cache_total_count; - rdb_index_and_filter_blocks_mem_usage += - row_stats.rdb_index_and_filter_blocks_mem_usage; - rdb_memtable_mem_usage += row_stats.rdb_memtable_mem_usage; + total_get_qps += row_stats.total_get_qps; + total_multi_get_qps += row_stats.total_multi_get_qps; + total_put_qps += row_stats.total_put_qps; + total_multi_put_qps += row_stats.total_multi_put_qps; + total_remove_qps += row_stats.total_remove_qps; + total_multi_remove_qps += row_stats.total_multi_remove_qps; + total_incr_qps += row_stats.total_incr_qps; + total_check_and_set_qps += row_stats.total_check_and_set_qps; + total_check_and_mutate_qps += row_stats.total_check_and_mutate_qps; + total_scan_qps += row_stats.total_scan_qps; + total_recent_read_cu += row_stats.total_recent_read_cu; + total_recent_write_cu += row_stats.total_recent_write_cu; + total_recent_expire_count += row_stats.total_recent_expire_count; + total_recent_filter_count += row_stats.total_recent_filter_count; + total_recent_abnormal_count += row_stats.total_recent_abnormal_count; + total_recent_write_throttling_delay_count += + row_stats.total_recent_write_throttling_delay_count; + total_recent_write_throttling_reject_count += + row_stats.total_recent_write_throttling_reject_count; + total_storage_mb += row_stats.total_storage_mb; + total_storage_count += row_stats.total_storage_count; + total_rdb_block_cache_hit_count += row_stats.total_rdb_block_cache_hit_count; + total_rdb_block_cache_total_count += row_stats.total_rdb_block_cache_total_count; + total_rdb_index_and_filter_blocks_mem_usage += + row_stats.total_rdb_index_and_filter_blocks_mem_usage; + total_rdb_memtable_mem_usage += row_stats.total_rdb_memtable_mem_usage; // We only need max_total_qps/min_total_qps/max_total_cu/min_total_cu in the same app if (this->app_name == row_stats.app_name) { @@ -130,29 +136,29 @@ class info_collector } std::string app_name; - double get_qps = 0; - double multi_get_qps = 0; - double put_qps = 0; - double multi_put_qps = 0; - double remove_qps = 0; - double multi_remove_qps = 0; - double incr_qps = 0; - double check_and_set_qps = 0; - double check_and_mutate_qps = 0; - double scan_qps = 0; - double recent_read_cu = 0; - double recent_write_cu = 0; - double recent_expire_count = 0; - double recent_filter_count = 0; - double recent_abnormal_count = 0; - double recent_write_throttling_delay_count = 0; - double recent_write_throttling_reject_count = 0; - double storage_mb = 0; - double storage_count = 0; - double rdb_block_cache_hit_count = 0; - double rdb_block_cache_total_count = 0; - double rdb_index_and_filter_blocks_mem_usage = 0; - double rdb_memtable_mem_usage = 0; + double total_get_qps = 0; + double total_multi_get_qps = 0; + double total_put_qps = 0; + double total_multi_put_qps = 0; + double total_remove_qps = 0; + double total_multi_remove_qps = 0; + double total_incr_qps = 0; + double total_check_and_set_qps = 0; + double total_check_and_mutate_qps = 0; + double total_scan_qps = 0; + double total_recent_read_cu = 0; + double total_recent_write_cu = 0; + double total_recent_expire_count = 0; + double total_recent_filter_count = 0; + double total_recent_abnormal_count = 0; + double total_recent_write_throttling_delay_count = 0; + double total_recent_write_throttling_reject_count = 0; + double total_storage_mb = 0; + double total_storage_count = 0; + double total_rdb_block_cache_hit_count = 0; + double total_rdb_block_cache_total_count = 0; + double total_rdb_index_and_filter_blocks_mem_usage = 0; + double total_rdb_memtable_mem_usage = 0; // used when merging double max_total_qps = 0; @@ -167,36 +173,37 @@ class info_collector { void set(const row_statistics &row_stats) { - get_qps->set(row_stats.get_qps); - multi_get_qps->set(row_stats.multi_get_qps); - put_qps->set(row_stats.put_qps); - multi_put_qps->set(row_stats.multi_put_qps); - remove_qps->set(row_stats.remove_qps); - multi_remove_qps->set(row_stats.multi_remove_qps); - incr_qps->set(row_stats.incr_qps); - check_and_set_qps->set(row_stats.check_and_set_qps); - check_and_mutate_qps->set(row_stats.check_and_mutate_qps); - scan_qps->set(row_stats.scan_qps); - recent_read_cu->set(row_stats.recent_read_cu); - recent_write_cu->set(row_stats.recent_write_cu); - recent_expire_count->set(row_stats.recent_expire_count); - recent_filter_count->set(row_stats.recent_filter_count); - recent_abnormal_count->set(row_stats.recent_abnormal_count); - recent_write_throttling_delay_count->set(row_stats.recent_write_throttling_delay_count); + get_qps->set(row_stats.total_get_qps); + multi_get_qps->set(row_stats.total_multi_get_qps); + put_qps->set(row_stats.total_put_qps); + multi_put_qps->set(row_stats.total_multi_put_qps); + remove_qps->set(row_stats.total_remove_qps); + multi_remove_qps->set(row_stats.total_multi_remove_qps); + incr_qps->set(row_stats.total_incr_qps); + check_and_set_qps->set(row_stats.total_check_and_set_qps); + check_and_mutate_qps->set(row_stats.total_check_and_mutate_qps); + scan_qps->set(row_stats.total_scan_qps); + recent_read_cu->set(row_stats.total_recent_read_cu); + recent_write_cu->set(row_stats.total_recent_write_cu); + recent_expire_count->set(row_stats.total_recent_expire_count); + recent_filter_count->set(row_stats.total_recent_filter_count); + recent_abnormal_count->set(row_stats.total_recent_abnormal_count); + recent_write_throttling_delay_count->set( + row_stats.total_recent_write_throttling_delay_count); recent_write_throttling_reject_count->set( - row_stats.recent_write_throttling_reject_count); - storage_mb->set(row_stats.storage_mb); - storage_count->set(row_stats.storage_count); - rdb_block_cache_hit_rate->set(std::abs(row_stats.rdb_block_cache_total_count) < 1e-6 - ? 0 - : row_stats.rdb_block_cache_hit_count / - row_stats.rdb_block_cache_total_count * - 1000000); + row_stats.total_recent_write_throttling_reject_count); + storage_mb->set(row_stats.total_storage_mb); + storage_count->set(row_stats.total_storage_count); + rdb_block_cache_hit_rate->set( + std::abs(row_stats.total_rdb_block_cache_total_count) < 1e-6 + ? 0 + : row_stats.total_rdb_block_cache_hit_count / + row_stats.total_rdb_block_cache_total_count * 1000000); rdb_index_and_filter_blocks_mem_usage->set( - row_stats.rdb_index_and_filter_blocks_mem_usage); - rdb_memtable_mem_usage->set(row_stats.rdb_memtable_mem_usage); - read_qps->set(row_stats.get_read_qps()); - write_qps->set(row_stats.get_write_qps()); + row_stats.total_rdb_index_and_filter_blocks_mem_usage); + rdb_memtable_mem_usage->set(row_stats.total_rdb_memtable_mem_usage); + read_qps->set(row_stats.get_total_read_qps()); + write_qps->set(row_stats.get_total_write_qps()); double qps_scale = row_stats.max_total_qps / std::max(row_stats.min_total_qps, 1.0); double cu_scale = row_stats.max_total_cu / std::max(row_stats.min_total_cu, 1.0); From 98ee1fae62185a7fb6194508eee8af48e9630f28 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Fri, 8 Nov 2019 17:53:50 +0800 Subject: [PATCH 10/18] hotspot --- config-shell.ini.20518 | 69 +++++++++++++++++++++++++++++++++++ src/server/info_collector.cpp | 1 - src/server/info_collector.h | 2 - 3 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 config-shell.ini.20518 diff --git a/config-shell.ini.20518 b/config-shell.ini.20518 new file mode 100644 index 0000000000..35ee9c8f92 --- /dev/null +++ b/config-shell.ini.20518 @@ -0,0 +1,69 @@ +[apps..default] +run = true +count = 1 +;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536 +;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536 +;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536 + +[apps.mimic] +type = dsn.app.mimic +arguments = +pools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER +run = true +count = 1 + +[core] +;tool = simulator +tool = nativerun +;toollets = tracer +;toollets = tracer, profiler, fault_injector +pause_on_start = false + +;aio_factory_name = dsn::tools::native_aio_provider + +logging_start_level = LOG_LEVEL_DEBUG +logging_factory_name = dsn::tools::simple_logger +;logging_factory_name = dsn::tools::screen_logger +logging_flush_on_exit = false + +enable_default_app_mimic = true + +data_dir = ./pegasus_shell.data + +[tools.simple_logger] +short_header = false +fast_flush = true +max_number_of_log_files_on_disk = 10 +stderr_start_level = LOG_LEVEL_FATAL + +[tools.simulator] +random_seed = 0 + +[network] +; how many network threads for network library(used by asio) +io_service_worker_count = 4 + +; specification for each thread pool +[threadpool..default] +worker_count = 4 +partitioned = false +worker_priority = THREAD_xPRIORITY_NORMAL + +[threadpool.THREAD_POOL_DEFAULT] +name = default +worker_count = 8 + +[threadpool.THREAD_POOL_META_SERVER] +name = meta_server + +[task..default] +is_trace = false +is_profile = false +allow_inline = false +rpc_call_header_format = NET_HDR_DSN +rpc_call_channel = RPC_CHANNEL_TCP +rpc_timeout_milliseconds = 10000 + +[pegasus.clusters] +onebox = 127.0.0.1:34601,127.0.0.1:34602,127.0.0.1:34603 + diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 2944d21562..b3fd3c2bda 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -135,7 +135,6 @@ void info_collector::on_app_stat() derror("call get_app_stat() failed"); return; } - ddebug("after stat app partitions"); row_statistics all_stats("_all_"); for (auto app_rows : all_rows) { diff --git a/src/server/info_collector.h b/src/server/info_collector.h index c69138dc53..16dcfa29e3 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -159,8 +159,6 @@ class info_collector double total_rdb_block_cache_total_count = 0; double total_rdb_index_and_filter_blocks_mem_usage = 0; double total_rdb_memtable_mem_usage = 0; - - // used when merging double max_total_qps = 0; double min_total_qps = INT_MAX; double max_total_cu = 0; From 30a1fd473450fd5960e07fb03491d5cf68fbbe00 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Fri, 8 Nov 2019 18:39:53 +0800 Subject: [PATCH 11/18] hot spot --- config-shell.ini.20518 | 69 ------------------------------------- src/server/info_collector.h | 10 +++--- 2 files changed, 6 insertions(+), 73 deletions(-) delete mode 100644 config-shell.ini.20518 diff --git a/config-shell.ini.20518 b/config-shell.ini.20518 deleted file mode 100644 index 35ee9c8f92..0000000000 --- a/config-shell.ini.20518 +++ /dev/null @@ -1,69 +0,0 @@ -[apps..default] -run = true -count = 1 -;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536 -;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536 -;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536 - -[apps.mimic] -type = dsn.app.mimic -arguments = -pools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER -run = true -count = 1 - -[core] -;tool = simulator -tool = nativerun -;toollets = tracer -;toollets = tracer, profiler, fault_injector -pause_on_start = false - -;aio_factory_name = dsn::tools::native_aio_provider - -logging_start_level = LOG_LEVEL_DEBUG -logging_factory_name = dsn::tools::simple_logger -;logging_factory_name = dsn::tools::screen_logger -logging_flush_on_exit = false - -enable_default_app_mimic = true - -data_dir = ./pegasus_shell.data - -[tools.simple_logger] -short_header = false -fast_flush = true -max_number_of_log_files_on_disk = 10 -stderr_start_level = LOG_LEVEL_FATAL - -[tools.simulator] -random_seed = 0 - -[network] -; how many network threads for network library(used by asio) -io_service_worker_count = 4 - -; specification for each thread pool -[threadpool..default] -worker_count = 4 -partitioned = false -worker_priority = THREAD_xPRIORITY_NORMAL - -[threadpool.THREAD_POOL_DEFAULT] -name = default -worker_count = 8 - -[threadpool.THREAD_POOL_META_SERVER] -name = meta_server - -[task..default] -is_trace = false -is_profile = false -allow_inline = false -rpc_call_header_format = NET_HDR_DSN -rpc_call_channel = RPC_CHANNEL_TCP -rpc_timeout_milliseconds = 10000 - -[pegasus.clusters] -onebox = 127.0.0.1:34601,127.0.0.1:34602,127.0.0.1:34603 - diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 16dcfa29e3..e821a56a0d 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -208,14 +208,16 @@ class info_collector qps_max_min_scale->set(qps_scale); cu_max_min_scale->set(cu_scale); if (qps_scale >= HOTSPOT_MAX_MIN_SCALE_THRESHOLD) { - ddebug("There is a hot spot about qps in %s(%s))", + ddebug("There is a hot spot about qps in app %s(partition id: %s), max/min scale=%d", row_stats.app_name.c_str(), - row_stats.max_qps_partition_id.c_str()); + row_stats.max_qps_partition_id.c_str(), + qps_scale); } if (cu_scale >= HOTSPOT_MAX_MIN_SCALE_THRESHOLD) { - ddebug("There is a hot spot about cu in %s(%s))", + ddebug("There is a hot spot about cu in app %s(partition id: %s), max/min scale=%d", row_stats.app_name.c_str(), - row_stats.max_cu_partition_id.c_str()); + row_stats.max_cu_partition_id.c_str(), + cu_scale); } } From db88ea35ec192348865b57829440457e70e20532 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Fri, 8 Nov 2019 19:45:08 +0800 Subject: [PATCH 12/18] hot spot --- src/server/info_collector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/info_collector.h b/src/server/info_collector.h index e821a56a0d..1302e3835f 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -120,18 +120,18 @@ class info_collector // We only need max_total_qps/min_total_qps/max_total_cu/min_total_cu in the same app if (this->app_name == row_stats.app_name) { // get max_total_qps、min_total_qps and id of the partition which has max_total_qps + min_total_qps = std::min(min_total_qps, row_stats.min_total_qps); if (max_total_qps < row_stats.max_total_qps) { max_total_qps = row_stats.max_total_qps; max_qps_partition_id = row_stats.max_qps_partition_id; } - min_total_qps = std::min(min_total_qps, row_stats.min_total_qps); // get max_total_cu、min_total_cu and id of the partition which has max_total_cu + min_total_cu = std::min(min_total_cu, row_stats.min_total_cu); if (max_total_cu < row_stats.max_total_cu) { max_total_cu = row_stats.max_total_cu; max_cu_partition_id = row_stats.max_cu_partition_id; } - min_total_cu = std::min(min_total_cu, row_stats.min_total_cu); } } From 7fa659e216be1c772e58c5329320734348f989b9 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Tue, 19 Nov 2019 17:05:58 +0800 Subject: [PATCH 13/18] merge master --- src/shell/command_helper.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 66662bc279..c1e5ecade3 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -787,14 +787,8 @@ get_app_stat(shell_context *sc, const std::string &app_name, std::vector>>>>>> upstream/master continue; } auto find = app_partitions.find(app_id_x); From f7e29ef148c6ee85d4a14347b6dd2c682f9d025c Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Tue, 19 Nov 2019 17:15:27 +0800 Subject: [PATCH 14/18] hotspot --- rdsn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdsn b/rdsn index a15faf64f2..7e3b0421a0 160000 --- a/rdsn +++ b/rdsn @@ -1 +1 @@ -Subproject commit a15faf64f2978fba94017fc09d6422d75df67246 +Subproject commit 7e3b0421a0a40b92102357ed46581c8fbc2b4db2 From 7f34a935ad7d0c26c936e1d3ece49af30da8a3e9 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Tue, 19 Nov 2019 17:49:25 +0800 Subject: [PATCH 15/18] format --- src/server/info_collector.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 1302e3835f..8c66cfd85e 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -208,10 +208,11 @@ class info_collector qps_max_min_scale->set(qps_scale); cu_max_min_scale->set(cu_scale); if (qps_scale >= HOTSPOT_MAX_MIN_SCALE_THRESHOLD) { - ddebug("There is a hot spot about qps in app %s(partition id: %s), max/min scale=%d", - row_stats.app_name.c_str(), - row_stats.max_qps_partition_id.c_str(), - qps_scale); + ddebug( + "There is a hot spot about qps in app %s(partition id: %s), max/min scale=%d", + row_stats.app_name.c_str(), + row_stats.max_qps_partition_id.c_str(), + qps_scale); } if (cu_scale >= HOTSPOT_MAX_MIN_SCALE_THRESHOLD) { ddebug("There is a hot spot about cu in app %s(partition id: %s), max/min scale=%d", From b18dede017ad3531d6d1b4536e0c983e142268f6 Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Tue, 19 Nov 2019 19:10:15 +0800 Subject: [PATCH 16/18] rdsn --- rdsn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdsn b/rdsn index 7e3b0421a0..a15faf64f2 160000 --- a/rdsn +++ b/rdsn @@ -1 +1 @@ -Subproject commit 7e3b0421a0a40b92102357ed46581c8fbc2b4db2 +Subproject commit a15faf64f2978fba94017fc09d6422d75df67246 From 2b018478c431d9ea441a5a3ddbbd6d72db72915b Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Wed, 20 Nov 2019 11:10:33 +0800 Subject: [PATCH 17/18] update rdsn --- rdsn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdsn b/rdsn index a15faf64f2..7e3b0421a0 160000 --- a/rdsn +++ b/rdsn @@ -1 +1 @@ -Subproject commit a15faf64f2978fba94017fc09d6422d75df67246 +Subproject commit 7e3b0421a0a40b92102357ed46581c8fbc2b4db2 From e2dfa845a07f3b4e6895966081da5fe05078b3ef Mon Sep 17 00:00:00 2001 From: zhaoliwei Date: Wed, 27 Nov 2019 15:24:44 +0800 Subject: [PATCH 18/18] hotspot --- src/server/info_collector.cpp | 6 +- src/server/info_collector.h | 141 +-------------------------------- src/server/table_stats.h | 142 ++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 142 deletions(-) create mode 100644 src/server/table_stats.h diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index b3fd3c2bda..1b3811a297 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -136,12 +136,12 @@ void info_collector::on_app_stat() return; } - row_statistics all_stats("_all_"); + table_stats all_stats("_all_"); for (auto app_rows : all_rows) { // get statistics data for app - row_statistics app_stats(app_rows.first); + table_stats app_stats(app_rows.first); for (auto partition_row : app_rows.second) { - app_stats.calc(partition_row); + app_stats.aggregate(partition_row); } get_app_counters(app_stats.app_name)->set(app_stats); diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 8c66cfd85e..6f2c228c27 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -19,6 +19,7 @@ #include #include "../shell/commands.h" +#include "table_stats.h" namespace pegasus { namespace server { @@ -29,147 +30,9 @@ static const int HOTSPOT_MAX_MIN_SCALE_THRESHOLD = 10; class info_collector { public: - struct row_statistics - { - row_statistics(const std::string &app_name) { this->app_name = app_name; } - - double get_total_read_qps() const - { - return total_get_qps + total_multi_get_qps + total_scan_qps; - } - - double get_total_write_qps() const - { - return total_put_qps + total_multi_put_qps + total_remove_qps + total_multi_remove_qps + - total_incr_qps + total_check_and_set_qps + total_check_and_mutate_qps; - } - - void calc(const row_data &row) - { - total_get_qps += row.get_qps; - total_multi_get_qps += row.multi_get_qps; - total_put_qps += row.put_qps; - total_multi_put_qps += row.multi_put_qps; - total_remove_qps += row.remove_qps; - total_multi_remove_qps += row.multi_remove_qps; - total_incr_qps += row.incr_qps; - total_check_and_set_qps += row.check_and_set_qps; - total_check_and_mutate_qps += row.check_and_mutate_qps; - total_scan_qps += row.scan_qps; - total_recent_read_cu += row.recent_read_cu; - total_recent_write_cu += row.recent_write_cu; - total_recent_expire_count += row.recent_expire_count; - total_recent_filter_count += row.recent_filter_count; - total_recent_abnormal_count += row.recent_abnormal_count; - total_recent_write_throttling_delay_count += row.recent_write_throttling_delay_count; - total_recent_write_throttling_reject_count += row.recent_write_throttling_reject_count; - total_storage_mb += row.storage_mb; - total_storage_count += row.storage_count; - total_rdb_block_cache_hit_count += row.rdb_block_cache_hit_count; - total_rdb_block_cache_total_count += row.rdb_block_cache_total_count; - total_rdb_index_and_filter_blocks_mem_usage += - row.rdb_index_and_filter_blocks_mem_usage; - total_rdb_memtable_mem_usage += row.rdb_memtable_mem_usage; - - // get max_total_qps、min_total_qps and the id of this partition which has max_total_qps - double row_total_qps = row.get_total_qps(); - min_total_qps = std::min(min_total_qps, row_total_qps); - if (max_total_qps < row_total_qps) { - max_total_qps = row_total_qps; - max_qps_partition_id = row.row_name; - } - - // get max_total_cu、min_total_cu and the id of this partition which has max_total_cu - double row_total_cu = row.get_total_cu(); - min_total_cu = std::min(min_total_cu, row_total_cu); - if (max_total_cu < row_total_cu) { - max_total_cu = row_total_cu; - max_cu_partition_id = row.row_name; - } - } - - void merge(const row_statistics &row_stats) - { - total_get_qps += row_stats.total_get_qps; - total_multi_get_qps += row_stats.total_multi_get_qps; - total_put_qps += row_stats.total_put_qps; - total_multi_put_qps += row_stats.total_multi_put_qps; - total_remove_qps += row_stats.total_remove_qps; - total_multi_remove_qps += row_stats.total_multi_remove_qps; - total_incr_qps += row_stats.total_incr_qps; - total_check_and_set_qps += row_stats.total_check_and_set_qps; - total_check_and_mutate_qps += row_stats.total_check_and_mutate_qps; - total_scan_qps += row_stats.total_scan_qps; - total_recent_read_cu += row_stats.total_recent_read_cu; - total_recent_write_cu += row_stats.total_recent_write_cu; - total_recent_expire_count += row_stats.total_recent_expire_count; - total_recent_filter_count += row_stats.total_recent_filter_count; - total_recent_abnormal_count += row_stats.total_recent_abnormal_count; - total_recent_write_throttling_delay_count += - row_stats.total_recent_write_throttling_delay_count; - total_recent_write_throttling_reject_count += - row_stats.total_recent_write_throttling_reject_count; - total_storage_mb += row_stats.total_storage_mb; - total_storage_count += row_stats.total_storage_count; - total_rdb_block_cache_hit_count += row_stats.total_rdb_block_cache_hit_count; - total_rdb_block_cache_total_count += row_stats.total_rdb_block_cache_total_count; - total_rdb_index_and_filter_blocks_mem_usage += - row_stats.total_rdb_index_and_filter_blocks_mem_usage; - total_rdb_memtable_mem_usage += row_stats.total_rdb_memtable_mem_usage; - - // We only need max_total_qps/min_total_qps/max_total_cu/min_total_cu in the same app - if (this->app_name == row_stats.app_name) { - // get max_total_qps、min_total_qps and id of the partition which has max_total_qps - min_total_qps = std::min(min_total_qps, row_stats.min_total_qps); - if (max_total_qps < row_stats.max_total_qps) { - max_total_qps = row_stats.max_total_qps; - max_qps_partition_id = row_stats.max_qps_partition_id; - } - - // get max_total_cu、min_total_cu and id of the partition which has max_total_cu - min_total_cu = std::min(min_total_cu, row_stats.min_total_cu); - if (max_total_cu < row_stats.max_total_cu) { - max_total_cu = row_stats.max_total_cu; - max_cu_partition_id = row_stats.max_cu_partition_id; - } - } - } - - std::string app_name; - double total_get_qps = 0; - double total_multi_get_qps = 0; - double total_put_qps = 0; - double total_multi_put_qps = 0; - double total_remove_qps = 0; - double total_multi_remove_qps = 0; - double total_incr_qps = 0; - double total_check_and_set_qps = 0; - double total_check_and_mutate_qps = 0; - double total_scan_qps = 0; - double total_recent_read_cu = 0; - double total_recent_write_cu = 0; - double total_recent_expire_count = 0; - double total_recent_filter_count = 0; - double total_recent_abnormal_count = 0; - double total_recent_write_throttling_delay_count = 0; - double total_recent_write_throttling_reject_count = 0; - double total_storage_mb = 0; - double total_storage_count = 0; - double total_rdb_block_cache_hit_count = 0; - double total_rdb_block_cache_total_count = 0; - double total_rdb_index_and_filter_blocks_mem_usage = 0; - double total_rdb_memtable_mem_usage = 0; - double max_total_qps = 0; - double min_total_qps = INT_MAX; - double max_total_cu = 0; - double min_total_cu = INT_MAX; - std::string max_qps_partition_id; - std::string max_cu_partition_id; - }; - struct AppStatCounters { - void set(const row_statistics &row_stats) + void set(const table_stats &row_stats) { get_qps->set(row_stats.total_get_qps); multi_get_qps->set(row_stats.total_multi_get_qps); diff --git a/src/server/table_stats.h b/src/server/table_stats.h new file mode 100644 index 0000000000..25a795da0c --- /dev/null +++ b/src/server/table_stats.h @@ -0,0 +1,142 @@ +// Copyright (c) 2017, Xiaomi, Inc. All rights reserved. +// This source code is licensed under the Apache License Version 2.0, which +// can be found in the LICENSE file in the root directory of this source tree. + +#pragma once + +struct table_stats +{ + table_stats(const std::string &app_name) { this->app_name = app_name; } + + double get_total_read_qps() const + { + return total_get_qps + total_multi_get_qps + total_scan_qps; + } + + double get_total_write_qps() const + { + return total_put_qps + total_multi_put_qps + total_remove_qps + total_multi_remove_qps + + total_incr_qps + total_check_and_set_qps + total_check_and_mutate_qps; + } + + void aggregate(const row_data &row) + { + total_get_qps += row.get_qps; + total_multi_get_qps += row.multi_get_qps; + total_put_qps += row.put_qps; + total_multi_put_qps += row.multi_put_qps; + total_remove_qps += row.remove_qps; + total_multi_remove_qps += row.multi_remove_qps; + total_incr_qps += row.incr_qps; + total_check_and_set_qps += row.check_and_set_qps; + total_check_and_mutate_qps += row.check_and_mutate_qps; + total_scan_qps += row.scan_qps; + total_recent_read_cu += row.recent_read_cu; + total_recent_write_cu += row.recent_write_cu; + total_recent_expire_count += row.recent_expire_count; + total_recent_filter_count += row.recent_filter_count; + total_recent_abnormal_count += row.recent_abnormal_count; + total_recent_write_throttling_delay_count += row.recent_write_throttling_delay_count; + total_recent_write_throttling_reject_count += row.recent_write_throttling_reject_count; + total_storage_mb += row.storage_mb; + total_storage_count += row.storage_count; + total_rdb_block_cache_hit_count += row.rdb_block_cache_hit_count; + total_rdb_block_cache_total_count += row.rdb_block_cache_total_count; + total_rdb_index_and_filter_blocks_mem_usage += row.rdb_index_and_filter_blocks_mem_usage; + total_rdb_memtable_mem_usage += row.rdb_memtable_mem_usage; + + // get max_total_qps、min_total_qps and the id of this partition which has max_total_qps + double row_total_qps = row.get_total_qps(); + min_total_qps = std::min(min_total_qps, row_total_qps); + if (max_total_qps < row_total_qps) { + max_total_qps = row_total_qps; + max_qps_partition_id = row.row_name; + } + + // get max_total_cu、min_total_cu and the id of this partition which has max_total_cu + double row_total_cu = row.get_total_cu(); + min_total_cu = std::min(min_total_cu, row_total_cu); + if (max_total_cu < row_total_cu) { + max_total_cu = row_total_cu; + max_cu_partition_id = row.row_name; + } + } + + void merge(const table_stats &row_stats) + { + total_get_qps += row_stats.total_get_qps; + total_multi_get_qps += row_stats.total_multi_get_qps; + total_put_qps += row_stats.total_put_qps; + total_multi_put_qps += row_stats.total_multi_put_qps; + total_remove_qps += row_stats.total_remove_qps; + total_multi_remove_qps += row_stats.total_multi_remove_qps; + total_incr_qps += row_stats.total_incr_qps; + total_check_and_set_qps += row_stats.total_check_and_set_qps; + total_check_and_mutate_qps += row_stats.total_check_and_mutate_qps; + total_scan_qps += row_stats.total_scan_qps; + total_recent_read_cu += row_stats.total_recent_read_cu; + total_recent_write_cu += row_stats.total_recent_write_cu; + total_recent_expire_count += row_stats.total_recent_expire_count; + total_recent_filter_count += row_stats.total_recent_filter_count; + total_recent_abnormal_count += row_stats.total_recent_abnormal_count; + total_recent_write_throttling_delay_count += + row_stats.total_recent_write_throttling_delay_count; + total_recent_write_throttling_reject_count += + row_stats.total_recent_write_throttling_reject_count; + total_storage_mb += row_stats.total_storage_mb; + total_storage_count += row_stats.total_storage_count; + total_rdb_block_cache_hit_count += row_stats.total_rdb_block_cache_hit_count; + total_rdb_block_cache_total_count += row_stats.total_rdb_block_cache_total_count; + total_rdb_index_and_filter_blocks_mem_usage += + row_stats.total_rdb_index_and_filter_blocks_mem_usage; + total_rdb_memtable_mem_usage += row_stats.total_rdb_memtable_mem_usage; + + // We only need max_total_qps/min_total_qps/max_total_cu/min_total_cu in the same app + if (this->app_name == row_stats.app_name) { + // get max_total_qps、min_total_qps and id of the partition which has max_total_qps + min_total_qps = std::min(min_total_qps, row_stats.min_total_qps); + if (max_total_qps < row_stats.max_total_qps) { + max_total_qps = row_stats.max_total_qps; + max_qps_partition_id = row_stats.max_qps_partition_id; + } + + // get max_total_cu、min_total_cu and id of the partition which has max_total_cu + min_total_cu = std::min(min_total_cu, row_stats.min_total_cu); + if (max_total_cu < row_stats.max_total_cu) { + max_total_cu = row_stats.max_total_cu; + max_cu_partition_id = row_stats.max_cu_partition_id; + } + } + } + + std::string app_name; + double total_get_qps = 0; + double total_multi_get_qps = 0; + double total_put_qps = 0; + double total_multi_put_qps = 0; + double total_remove_qps = 0; + double total_multi_remove_qps = 0; + double total_incr_qps = 0; + double total_check_and_set_qps = 0; + double total_check_and_mutate_qps = 0; + double total_scan_qps = 0; + double total_recent_read_cu = 0; + double total_recent_write_cu = 0; + double total_recent_expire_count = 0; + double total_recent_filter_count = 0; + double total_recent_abnormal_count = 0; + double total_recent_write_throttling_delay_count = 0; + double total_recent_write_throttling_reject_count = 0; + double total_storage_mb = 0; + double total_storage_count = 0; + double total_rdb_block_cache_hit_count = 0; + double total_rdb_block_cache_total_count = 0; + double total_rdb_index_and_filter_blocks_mem_usage = 0; + double total_rdb_memtable_mem_usage = 0; + double max_total_qps = 0; + double min_total_qps = INT_MAX; + double max_total_cu = 0; + double min_total_cu = INT_MAX; + std::string max_qps_partition_id; + std::string max_cu_partition_id; +};