diff --git a/resources/gflags.json b/resources/gflags.json index 59d164ea12d..52fd60b2786 100644 --- a/resources/gflags.json +++ b/resources/gflags.json @@ -4,7 +4,6 @@ "v", "heartbeat_interval_secs", "meta_client_retry_times", - "slow_op_threshold_ms", "clean_wal_interval_secs", "wal_ttl", "clean_wal_interval_secs", diff --git a/src/common/base/CMakeLists.txt b/src/common/base/CMakeLists.txt index 22db6d19c43..d32bebf795c 100644 --- a/src/common/base/CMakeLists.txt +++ b/src/common/base/CMakeLists.txt @@ -13,7 +13,6 @@ nebula_add_library( Status.cpp SanitizerOptions.cpp SignalHandler.cpp - SlowOpTracker.cpp ${gdb_debug_script} ) diff --git a/src/common/base/SlowOpTracker.cpp b/src/common/base/SlowOpTracker.cpp deleted file mode 100644 index 0986643d9fc..00000000000 --- a/src/common/base/SlowOpTracker.cpp +++ /dev/null @@ -1,11 +0,0 @@ -/* Copyright (c) 2019 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include "common/base/SlowOpTracker.h" - -#include "common/base/Base.h" -#include "common/time/WallClock.h" - -DEFINE_int64(slow_op_threshold_ms, 100, "default threshold for slow operation"); diff --git a/src/common/base/SlowOpTracker.h b/src/common/base/SlowOpTracker.h deleted file mode 100644 index 63c86597298..00000000000 --- a/src/common/base/SlowOpTracker.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright (c) 2019 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#ifndef COMMON_BASE_SLOWOPTRACKER_H_ -#define COMMON_BASE_SLOWOPTRACKER_H_ - -#include "common/base/Base.h" -#include "common/time/WallClock.h" - -DECLARE_int64(slow_op_threshold_ms); - -namespace nebula { - -class SlowOpTracker { - public: - SlowOpTracker() : startMs_(time::WallClock::fastNowInMilliSec()) {} - - ~SlowOpTracker() = default; - - bool slow(int64_t threshold = 0) { - dur_ = time::WallClock::fastNowInMilliSec() - startMs_; - if (dur_ < 0) { - dur_ = 0; - } - return threshold > 0 ? dur_ > threshold : dur_ > FLAGS_slow_op_threshold_ms; - } - - void output(const std::string& prefix, const std::string& msg) { - LOG(INFO) << prefix << "total time:" << dur_ << "ms, " << msg; - } - - private: - int64_t startMs_ = 0; - int64_t dur_ = 0; -}; - -} // namespace nebula -#endif // COMMON_BASE_SLOWOPTRACKER_H_ diff --git a/src/common/base/test/CMakeLists.txt b/src/common/base/test/CMakeLists.txt index 15f89608d67..bfe1e4d5dd0 100644 --- a/src/common/base/test/CMakeLists.txt +++ b/src/common/base/test/CMakeLists.txt @@ -79,13 +79,6 @@ nebula_add_test( LIBRARIES gtest gtest_main ) -nebula_add_test( - NAME slow_op_tracker_test - SOURCES SlowOpTrackerTest.cpp - OBJECTS $ $ - LIBRARIES gtest gtest_main -) - nebula_add_test( NAME lru_test SOURCES ConcurrentLRUCacheTest.cpp diff --git a/src/common/base/test/SlowOpTrackerTest.cpp b/src/common/base/test/SlowOpTrackerTest.cpp deleted file mode 100644 index 5d12762be1e..00000000000 --- a/src/common/base/test/SlowOpTrackerTest.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright (c) 2018 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include - -#include "common/base/Base.h" -#include "common/base/SlowOpTracker.h" - -namespace nebula { - -TEST(SlowOpTrackerTest, SimpleTest) { - SlowOpTracker tracker; - usleep(500000); - CHECK(tracker.slow()); - tracker.output("PREFIX", "This is a prefix msg"); -} - -} // namespace nebula - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - folly::init(&argc, &argv, true); - google::SetStderrLogging(google::INFO); - - return RUN_ALL_TESTS(); -} diff --git a/src/common/meta/GflagsManager.cpp b/src/common/meta/GflagsManager.cpp index 4661dabef95..9c2ba419c96 100644 --- a/src/common/meta/GflagsManager.cpp +++ b/src/common/meta/GflagsManager.cpp @@ -55,7 +55,6 @@ std::unordered_map> GflagsManager {"heartbeat_interval_secs", {cpp2::ConfigMode::MUTABLE, false}}, {"agent_heartbeat_interval_secs", {cpp2::ConfigMode::MUTABLE, false}}, {"meta_client_retry_times", {cpp2::ConfigMode::MUTABLE, false}}, - {"slow_op_threshold_ms", {cpp2::ConfigMode::MUTABLE, false}}, {"wal_ttl", {cpp2::ConfigMode::MUTABLE, false}}, {"clean_wal_interval_secs", {cpp2::ConfigMode::MUTABLE, false}}, {"custom_filter_interval_secs", {cpp2::ConfigMode::MUTABLE, false}}, diff --git a/src/kvstore/raftex/Host.cpp b/src/kvstore/raftex/Host.cpp index 685c70ebd28..499752f846e 100644 --- a/src/kvstore/raftex/Host.cpp +++ b/src/kvstore/raftex/Host.cpp @@ -10,7 +10,10 @@ #include #include "common/network/NetworkUtils.h" +#include "common/stats/StatsManager.h" +#include "common/time/WallClock.h" #include "kvstore/raftex/RaftPart.h" +#include "kvstore/stats/KVStats.h" #include "kvstore/wal/FileBasedWal.h" DEFINE_uint32(max_appendlog_batch_size, @@ -158,9 +161,12 @@ void Host::setResponse(const cpp2::AppendLogResponse& r) { void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptr req) { using TransportException = apache::thrift::transport::TTransportException; + auto beforeRpcUs = time::WallClock::fastNowInMicroSec(); sendAppendLogRequest(eb, req) .via(eb) - .thenValue([eb, self = shared_from_this()](cpp2::AppendLogResponse&& resp) { + .thenValue([eb, beforeRpcUs, self = shared_from_this()](cpp2::AppendLogResponse&& resp) { + stats::StatsManager::addValue(kAppendLogLatencyUs, + time::WallClock::fastNowInMicroSec() - beforeRpcUs); VLOG_IF(1, FLAGS_trace_raft) << self->idStr_ << "AppendLogResponse " << "code " << apache::thrift::util::enumNameSafe(resp.get_error_code()) << ", currTerm " @@ -333,6 +339,7 @@ nebula::cpp2::ErrorCode Host::startSendSnapshot() { << ", firstLogId in wal = " << part_->wal()->firstLogId() << ", lastLogId in wal = " << part_->wal()->lastLogId(); sendingSnapshot_ = true; + stats::StatsManager::addValue(kNumSendSnapshot); part_->snapshot_->sendSnapshot(part_, addr_) .thenValue([self = shared_from_this()](auto&& status) { std::lock_guard g(self->lock_); diff --git a/src/kvstore/raftex/RaftPart.cpp b/src/kvstore/raftex/RaftPart.cpp index 34d3c7e7f42..d2eb5ac2e51 100644 --- a/src/kvstore/raftex/RaftPart.cpp +++ b/src/kvstore/raftex/RaftPart.cpp @@ -12,11 +12,11 @@ #include "common/base/Base.h" #include "common/base/CollectNSucceeded.h" -#include "common/base/SlowOpTracker.h" #include "common/network/NetworkUtils.h" #include "common/stats/StatsManager.h" #include "common/thread/NamedThread.h" #include "common/thrift/ThriftClientManager.h" +#include "common/time/ScopedTimer.h" #include "common/time/WallClock.h" #include "common/utils/LogStrListIterator.h" #include "interface/gen-cpp2/RaftexServiceAsyncClient.h" @@ -741,18 +741,18 @@ void RaftPart::appendLogsInternal(AppendLogsIterator iter, TermID termId) { prevLogTerm = lastLogTerm_; committed = committedLogId_; // Step 1: Write WAL - SlowOpTracker tracker; - if (!wal_->appendLogs(iter)) { - VLOG_EVERY_N(2, 1000) << idStr_ << "Failed to write into WAL"; - res = nebula::cpp2::ErrorCode::E_RAFT_WAL_FAIL; - lastLogId_ = wal_->lastLogId(); - lastLogTerm_ = wal_->lastLogTerm(); - break; + { + SCOPED_TIMER(&execTime_); + if (!wal_->appendLogs(iter)) { + VLOG_EVERY_N(2, 1000) << idStr_ << "Failed to write into WAL"; + res = nebula::cpp2::ErrorCode::E_RAFT_WAL_FAIL; + lastLogId_ = wal_->lastLogId(); + lastLogTerm_ = wal_->lastLogTerm(); + break; + } } + stats::StatsManager::addValue(kAppendWalLatencyUs, execTime_); lastId = wal_->lastLogId(); - if (tracker.slow()) { - tracker.output(idStr_, folly::stringPrintf("Write WAL, total %ld", lastId - prevLogId + 1)); - } VLOG(4) << idStr_ << "Succeeded writing logs [" << iter.firstLogId() << ", " << lastId << "] to WAL"; } while (false); @@ -797,7 +797,7 @@ void RaftPart::replicateLogs(folly::EventBase* eb, << iter.firstLogId() << ", " << lastLogId << "] to all peer hosts"; lastMsgSentDur_.reset(); - SlowOpTracker tracker; + auto beforeAppendLogUs = time::WallClock::fastNowInMicroSec(); collectNSucceeded(gen::from(hosts) | gen::map([self = shared_from_this(), eb, @@ -830,13 +830,11 @@ void RaftPart::replicateLogs(folly::EventBase* eb, prevLogId, prevLogTerm, pHosts = std::move(hosts), - tracker](folly::Try&& result) mutable { + beforeAppendLogUs](folly::Try&& result) mutable { VLOG(4) << self->idStr_ << "Received enough response"; CHECK(!result.hasException()); - if (tracker.slow()) { - tracker.output(self->idStr_, - folly::stringPrintf("Total send logs: %ld", lastLogId - prevLogId + 1)); - } + stats::StatsManager::addValue(kReplicateLogLatencyUs, + time::WallClock::fastNowInMicroSec() - beforeAppendLogUs); self->processAppendLogResponses(*result, eb, std::move(it), @@ -918,7 +916,6 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps, { auto walIt = wal_->iterator(committedId + 1, lastLogId); - SlowOpTracker tracker; // Step 3: Commit the batch auto [code, lastCommitId, lastCommitTerm] = commitLogs(std::move(walIt), true); if (code == nebula::cpp2::ErrorCode::SUCCEEDED) { @@ -938,10 +935,6 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps, } else { LOG(FATAL) << idStr_ << "Failed to commit logs"; } - if (tracker.slow()) { - tracker.output(idStr_, - folly::stringPrintf("Total commit: %ld", committedLogId_ - committedId)); - } VLOG(4) << idStr_ << "Leader succeeded in committing the logs " << committedId + 1 << " to " << lastLogId; } @@ -1190,6 +1183,7 @@ folly::Future RaftPart::leaderElection(bool isPreVote) { auto proposedTerm = voteReq.get_term(); auto resps = ElectionResponses(); + stats::StatsManager::addValue(kNumStartElect); if (hosts.empty()) { auto ret = handleElectionResponses(resps, hosts, proposedTerm, isPreVote); inElection_ = false; @@ -1467,7 +1461,7 @@ void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, // Reset the last message time lastMsgRecvDur_.reset(); isBlindFollower_ = false; - stats::StatsManager::addValue(kNumRaftVotes); + stats::StatsManager::addValue(kNumGrantVotes); return; } @@ -1590,7 +1584,13 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, std::make_move_iterator(req.get_log_str_list().begin() + diffIndex), std::make_move_iterator(req.get_log_str_list().end())); RaftLogIterator logIter(firstId, std::move(logEntries)); - if (wal_->appendLogs(logIter)) { + bool result = false; + { + SCOPED_TIMER(&execTime_); + result = wal_->appendLogs(logIter); + } + stats::StatsManager::addValue(kAppendWalLatencyUs, execTime_); + if (result) { CHECK_EQ(lastId, wal_->lastLogId()); lastLogId_ = wal_->lastLogId(); lastLogTerm_ = wal_->lastLogTerm(); diff --git a/src/kvstore/stats/KVStats.cpp b/src/kvstore/stats/KVStats.cpp index 6b8996f9b58..a6a152e404e 100644 --- a/src/kvstore/stats/KVStats.cpp +++ b/src/kvstore/stats/KVStats.cpp @@ -12,17 +12,30 @@ namespace nebula { stats::CounterId kCommitLogLatencyUs; stats::CounterId kCommitSnapshotLatencyUs; +stats::CounterId kAppendWalLatencyUs; +stats::CounterId kReplicateLogLatencyUs; +stats::CounterId kAppendLogLatencyUs; stats::CounterId kTransferLeaderLatencyUs; -stats::CounterId kNumRaftVotes; +stats::CounterId kNumStartElect; +stats::CounterId kNumGrantVotes; +stats::CounterId kNumSendSnapshot; void initKVStats() { kCommitLogLatencyUs = stats::StatsManager::registerHisto( "commit_log_latency_us", 1000, 0, 2000, "avg, p75, p95, p99, p999"); kCommitSnapshotLatencyUs = stats::StatsManager::registerHisto( "commit_snapshot_latency_us", 1000, 0, 2000, "avg, p75, p95, p99, p999"); + kAppendWalLatencyUs = stats::StatsManager::registerHisto( + "append_wal_latency_us", 1000, 0, 2000, "avg, p75, p95, p99, p999"); + kReplicateLogLatencyUs = stats::StatsManager::registerHisto( + "replicate_log_latency_us", 1000, 0, 2000, "avg, p75, p95, p99, p999"); + kAppendLogLatencyUs = stats::StatsManager::registerHisto( + "append_log_latency_us", 1000, 0, 2000, "avg, p75, p95, p99, p999"); kTransferLeaderLatencyUs = stats::StatsManager::registerHisto( "transfer_leader_latency_us", 1000, 0, 2000, "avg, p75, p95, p99, p999"); - kNumRaftVotes = stats::StatsManager::registerStats("num_raft_votes", "rate, sum"); + kNumStartElect = stats::StatsManager::registerStats("num_start_elect", "rate, sum"); + kNumGrantVotes = stats::StatsManager::registerStats("num_grant_votes", "rate, sum"); + kNumSendSnapshot = stats::StatsManager::registerStats("num_send_snapshot", "rate, sum"); } } // namespace nebula diff --git a/src/kvstore/stats/KVStats.h b/src/kvstore/stats/KVStats.h index 0cda92a301f..b1bcd283f94 100644 --- a/src/kvstore/stats/KVStats.h +++ b/src/kvstore/stats/KVStats.h @@ -14,8 +14,13 @@ namespace nebula { // Raft related stats extern stats::CounterId kCommitLogLatencyUs; extern stats::CounterId kCommitSnapshotLatencyUs; +extern stats::CounterId kAppendWalLatencyUs; +extern stats::CounterId kReplicateLogLatencyUs; +extern stats::CounterId kAppendLogLatencyUs; extern stats::CounterId kTransferLeaderLatencyUs; -extern stats::CounterId kNumRaftVotes; +extern stats::CounterId kNumStartElect; +extern stats::CounterId kNumGrantVotes; +extern stats::CounterId kNumSendSnapshot; void initKVStats(); diff --git a/tests/admin/test_configs.py b/tests/admin/test_configs.py index 600d4f0eee2..b9c1e4129f6 100644 --- a/tests/admin/test_configs.py +++ b/tests/admin/test_configs.py @@ -66,7 +66,6 @@ def test_configs(self): expected_result = [ ['GRAPH', 'v', 'int', 'MUTABLE', v], ['GRAPH', 'minloglevel', 'int', 'MUTABLE', 0], - ['GRAPH', 'slow_op_threshold_ms', 'int', 'MUTABLE', 100], ['GRAPH', 'heartbeat_interval_secs', 'int', 'MUTABLE', 1], ['GRAPH', 'meta_client_retry_times', 'int', 'MUTABLE', 3], ['GRAPH', 'accept_partial_success', 'bool', 'MUTABLE', False], @@ -86,7 +85,6 @@ def test_configs(self): ['STORAGE', 'wal_ttl', 'int', 'MUTABLE', 14400], ['STORAGE', 'minloglevel', 'int', 'MUTABLE', 0], ['STORAGE', 'custom_filter_interval_secs', 'int', 'MUTABLE', 86400], - ['STORAGE', 'slow_op_threshold_ms', 'int', 'MUTABLE', 100], ['STORAGE', 'heartbeat_interval_secs', 'int', 'MUTABLE', 1], ['STORAGE', 'meta_client_retry_times', 'int', 'MUTABLE', 3], ['STORAGE', 'rocksdb_db_options', 'map', 'MUTABLE', {}],