From f3cdba015b6b4830813783cc036b58c1c29e2d4f Mon Sep 17 00:00:00 2001 From: Myth Date: Fri, 9 Dec 2022 08:39:51 +0800 Subject: [PATCH 1/7] Add timeout when replica connect master --- src/cluster/replication.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/cluster/replication.cc b/src/cluster/replication.cc index b757819f5e4..39c1c3ac9ae 100644 --- a/src/cluster/replication.cc +++ b/src/cluster/replication.cc @@ -236,18 +236,21 @@ void ReplicationThread::CallbacksStateMachine::Start() { handlers_.emplace_front(CallbacksStateMachine::WRITE, "auth write", authWriteCB); } + uint64_t connect_timestamp = 0, connect_timeout_ms = 3100; + while (!repl_->stop_flag_ && bev == nullptr) { - Status s = Util::SockConnect(repl_->host_, repl_->port_, &cfd); + if (Util::GetTimeStampMS() - connect_timestamp < 1000) { + sleep(1); + } + Status s = Util::SockConnect(repl_->host_, repl_->port_, &cfd, connect_timeout_ms); if (!s.IsOK()) { LOG(ERROR) << "[replication] Failed to connect the master, err: " << s.Msg(); - sleep(1); continue; } bev = bufferevent_socket_new(repl_->base_, cfd, BEV_OPT_CLOSE_ON_FREE); if (bev == nullptr) { close(cfd); LOG(ERROR) << "[replication] Failed to create the event socket"; - sleep(1); continue; } } From e10f6f539eb9b3e6a9de158a73cbe3c77347ea36 Mon Sep 17 00:00:00 2001 From: Myth Date: Fri, 9 Dec 2022 10:59:05 +0800 Subject: [PATCH 2/7] Add time_util.h --- src/cluster/replication.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cluster/replication.cc b/src/cluster/replication.cc index 39c1c3ac9ae..76401ef70e6 100644 --- a/src/cluster/replication.cc +++ b/src/cluster/replication.cc @@ -42,6 +42,7 @@ #include "status.h" #include "storage/batch_debugger.h" #include "thread_util.h" +#include "time_util.h" Status FeedSlaveThread::Start() { try { From cd3d799041302b12f89f375d3baa06a21a673a07 Mon Sep 17 00:00:00 2001 From: Myth Date: Fri, 9 Dec 2022 11:12:06 +0800 Subject: [PATCH 3/7] update connect_timestamp --- src/cluster/replication.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cluster/replication.cc b/src/cluster/replication.cc index 76401ef70e6..dd6d950aa73 100644 --- a/src/cluster/replication.cc +++ b/src/cluster/replication.cc @@ -243,6 +243,7 @@ void ReplicationThread::CallbacksStateMachine::Start() { if (Util::GetTimeStampMS() - connect_timestamp < 1000) { sleep(1); } + connect_timestamp = Util::GetTimeStampMS(); Status s = Util::SockConnect(repl_->host_, repl_->port_, &cfd, connect_timeout_ms); if (!s.IsOK()) { LOG(ERROR) << "[replication] Failed to connect the master, err: " << s.Msg(); From c930571726eee44b98d01aa26fa3a609e5c07484 Mon Sep 17 00:00:00 2001 From: Myth Date: Fri, 9 Dec 2022 12:50:04 +0800 Subject: [PATCH 4/7] add some comments --- src/cluster/replication.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/cluster/replication.cc b/src/cluster/replication.cc index dd6d950aa73..66fa823e288 100644 --- a/src/cluster/replication.cc +++ b/src/cluster/replication.cc @@ -237,13 +237,14 @@ void ReplicationThread::CallbacksStateMachine::Start() { handlers_.emplace_front(CallbacksStateMachine::WRITE, "auth write", authWriteCB); } - uint64_t connect_timestamp = 0, connect_timeout_ms = 3100; + uint64_t last_connect_timestamp = 0, connect_timeout_ms = 3100; while (!repl_->stop_flag_ && bev == nullptr) { - if (Util::GetTimeStampMS() - connect_timestamp < 1000) { + if (Util::GetTimeStampMS() - last_connect_timestamp < 1000) { + // prevent frequent re-connect when the master is down with the connection refused error sleep(1); } - connect_timestamp = Util::GetTimeStampMS(); + last_connect_timestamp = Util::GetTimeStampMS(); Status s = Util::SockConnect(repl_->host_, repl_->port_, &cfd, connect_timeout_ms); if (!s.IsOK()) { LOG(ERROR) << "[replication] Failed to connect the master, err: " << s.Msg(); From 06edc97d1534a6e03996832c20047564031b2664 Mon Sep 17 00:00:00 2001 From: Myth Date: Mon, 12 Dec 2022 16:45:40 +0800 Subject: [PATCH 5/7] fix ScopeExit and SockConnect bug --- src/common/io_util.cc | 4 +--- src/common/scope_exit.h | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/common/io_util.cc b/src/common/io_util.cc index 99e16f32e15..8f15339fa9c 100644 --- a/src/common/io_util.cc +++ b/src/common/io_util.cc @@ -151,9 +151,7 @@ Status SockConnect(const std::string &host, uint32_t port, int *fd, uint64_t con sin.sin_port = htons(port); fcntl(*fd, F_SETFL, O_NONBLOCK); - if (connect(*fd, reinterpret_cast(&sin), sizeof(sin))) { - return Status::FromErrno(); - } + connect(*fd, reinterpret_cast(&sin), sizeof(sin)); auto retmask = Util::aeWait(*fd, AE_WRITABLE, conn_timeout); if ((retmask & AE_WRITABLE) == 0 || (retmask & AE_ERROR) != 0 || (retmask & AE_HUP) != 0) { diff --git a/src/common/scope_exit.h b/src/common/scope_exit.h index 93e012595a2..2668091f54a 100644 --- a/src/common/scope_exit.h +++ b/src/common/scope_exit.h @@ -35,9 +35,9 @@ struct ScopeExit { if (enabled_) f_(); } - void Enable() { enabled_ = false; } + void Enable() { enabled_ = true; } - void Disable() { enabled_ = true; } + void Disable() { enabled_ = false; } bool enabled_; F f_; From a4b02376842597b6a3c4bcd37ef1a95b6ff96008 Mon Sep 17 00:00:00 2001 From: Myth Date: Mon, 12 Dec 2022 17:02:02 +0800 Subject: [PATCH 6/7] fix clang-tidy --- src/cluster/replication.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster/replication.cc b/src/cluster/replication.cc index 66fa823e288..becc936aaef 100644 --- a/src/cluster/replication.cc +++ b/src/cluster/replication.cc @@ -237,7 +237,7 @@ void ReplicationThread::CallbacksStateMachine::Start() { handlers_.emplace_front(CallbacksStateMachine::WRITE, "auth write", authWriteCB); } - uint64_t last_connect_timestamp = 0, connect_timeout_ms = 3100; + int last_connect_timestamp = 0, connect_timeout_ms = 3100; while (!repl_->stop_flag_ && bev == nullptr) { if (Util::GetTimeStampMS() - last_connect_timestamp < 1000) { From c566b5b987e914bff215dc986d4db94666c120ef Mon Sep 17 00:00:00 2001 From: Myth Date: Mon, 12 Dec 2022 17:06:40 +0800 Subject: [PATCH 7/7] fix clang-tidy --- src/cluster/replication.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cluster/replication.cc b/src/cluster/replication.cc index becc936aaef..bd24d997bdf 100644 --- a/src/cluster/replication.cc +++ b/src/cluster/replication.cc @@ -237,7 +237,8 @@ void ReplicationThread::CallbacksStateMachine::Start() { handlers_.emplace_front(CallbacksStateMachine::WRITE, "auth write", authWriteCB); } - int last_connect_timestamp = 0, connect_timeout_ms = 3100; + uint64_t last_connect_timestamp = 0; + int connect_timeout_ms = 3100; while (!repl_->stop_flag_ && bev == nullptr) { if (Util::GetTimeStampMS() - last_connect_timestamp < 1000) {