From 03b8b67c40f6139bc14244655c7670cac48b9785 Mon Sep 17 00:00:00 2001 From: Ted Yu Date: Thu, 28 Jan 2021 09:39:34 -0800 Subject: [PATCH] [#6996] ybase: List the host for pending remote bootstrap Summary: When pending remote bootstrap assertion is raised, currently there is not much information on which host(s) is pending remote bootstrap: ``` 21238 | | F | 50.310855 | 5315 | ts_tablet_manager.cc:1599 | P PEER_B: Waited for 30.000sms. Still had 1 pending remote bootstraps -- | -- | -- | -- | -- | -- | -- 21239 | Fatal failure details written to /nfusr/centos-gcp-cloud/jenkins-worker-4b0/jenkins/jenkins-github-yugabyte-db-phabricator-73645/build/asan-clang-dynamic-ninja/yb-test-logs/tests-client__snapshot-txn-test/SnapshotTxnTest_RemoteBootstrapOnStart.fatal_failure_details.2021-01-23T01_39_12.pid5315.txt 21240 | F20210123 01:39:12 ../../src/yb/tserver/ts_tablet_manager.cc:1599] P PEER_B: Waited for 30.000sms. Still had 1 pending remote bootstraps 21241 | @ 0x7f47b08f9c34 yb::LogFatalHandlerSink::send(int, char const*, char const*, int, tm const*, char const*, unsigned long) (src/yb/util/logging.cc:474) 21242 | @ 0x7f47af1fa9c2 21243 | @ 0x7f47af1caf31 21244 | @ 0x7f47af1ceb7c 21245 | @ 0x7f47af1ce15d 21246 | @ 0x7f47c762c2ac yb::tserver::TSTabletManager::StartShutdown() (src/yb/tserver/ts_tablet_manager.cc:1599) 21247 | @ 0x7f47c75494cc yb::tserver::TabletServer::Shutdown() (src/yb/tserver/tablet_server.cc:366) ``` This revision adds host information (source address) so that troubleshooting is easier. Test Plan: Run test suite via Jenkins Reviewers: bogdan Reviewed By: bogdan Subscribers: ybase Differential Revision: https://phabricator.dev.yugabyte.com/D10454 --- src/yb/tserver/ts_tablet_manager.cc | 23 +++++++++++++++++++++-- src/yb/tserver/ts_tablet_manager.h | 1 + 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/yb/tserver/ts_tablet_manager.cc b/src/yb/tserver/ts_tablet_manager.cc index df8fd9c4926a..52129ff83944 100644 --- a/src/yb/tserver/ts_tablet_manager.cc +++ b/src/yb/tserver/ts_tablet_manager.cc @@ -1122,7 +1122,15 @@ Status TSTabletManager::StartRemoteBootstrap(const StartRemoteBootstrapRequestPB // - first mark as closing // - then wait for num_tablets_being_remote_bootstrapped_ == 0 ++num_tablets_being_remote_bootstrapped_; - auto decrement_num_rbs_se = ScopeExit([this](){ + auto private_addr = req.source_private_addr()[0].host(); + auto decrement_num_rbs_se = ScopeExit([this, &private_addr](){ + { + std::lock_guard lock(mutex_); + auto iter = bootstrap_source_addresses_.find(private_addr); + if (iter != bootstrap_source_addresses_.end()) { + bootstrap_source_addresses_.erase(iter); + } + } --num_tablets_being_remote_bootstrapped_; }); @@ -1143,6 +1151,7 @@ Status TSTabletManager::StartRemoteBootstrap(const StartRemoteBootstrapRequestPB scoped_refptr deleter; { std::lock_guard lock(mutex_); + bootstrap_source_addresses_.emplace(private_addr); if (ClosingUnlocked()) { auto result = STATUS_FORMAT( IllegalState, "StartRemoteBootstrap in wrong state: $0", @@ -1596,9 +1605,19 @@ void TSTabletManager::StartShutdown() { while (int remaining_rbs = num_tablets_being_remote_bootstrapped_ > 0) { if (waited >= next_report_time) { if (waited >= kMaxWait) { + std::string addr = ""; + for (auto iter = bootstrap_source_addresses_.begin(); + iter != bootstrap_source_addresses_.end(); + iter++) { + if (iter == bootstrap_source_addresses_.begin()) { + addr += *iter; + } else { + addr += "," + *iter; + } + } LOG_WITH_PREFIX(DFATAL) << "Waited for " << waited << "ms. Still had " - << remaining_rbs << " pending remote bootstraps"; + << remaining_rbs << " pending remote bootstraps: " + addr; } else { LOG_WITH_PREFIX(WARNING) << "Still waiting for " << remaining_rbs diff --git a/src/yb/tserver/ts_tablet_manager.h b/src/yb/tserver/ts_tablet_manager.h index 462f3396f182..818182674c97 100644 --- a/src/yb/tserver/ts_tablet_manager.h +++ b/src/yb/tserver/ts_tablet_manager.h @@ -582,6 +582,7 @@ class TSTabletManager : public tserver::TabletPeerLookupIf, public tablet::Table std::shared_ptr log_cache_gc_; std::shared_ptr block_based_table_mem_tracker_; + std::unordered_set bootstrap_source_addresses_; std::atomic num_tablets_being_remote_bootstrapped_{0};