From 1da880979412c70f88f34e4c89be3055966edde8 Mon Sep 17 00:00:00 2001 From: Benjamin Wang Date: Fri, 9 Aug 2024 11:26:15 +0100 Subject: [PATCH] Skip leadership check if the etcd instance is active processing heartbeat Signed-off-by: Benjamin Wang --- server/etcdserver/raft.go | 14 ++++++++++++-- server/etcdserver/server.go | 16 ++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/server/etcdserver/raft.go b/server/etcdserver/raft.go index d397612af9c4..fd4b5dac3371 100644 --- a/server/etcdserver/raft.go +++ b/server/etcdserver/raft.go @@ -80,7 +80,9 @@ type toApply struct { type raftNode struct { lg *zap.Logger - tickMu *sync.Mutex + tickMu *sync.RWMutex + // timestamp of the latest tick + latestTickTs time.Time raftNodeConfig // a chan to send/receive snapshot @@ -132,8 +134,9 @@ func newRaftNode(cfg raftNodeConfig) *raftNode { raft.SetLogger(lg) r := &raftNode{ lg: cfg.lg, - tickMu: new(sync.Mutex), + tickMu: new(sync.RWMutex), raftNodeConfig: cfg, + latestTickTs: time.Now(), // set up contention detectors for raft heartbeat message. // expect to send a heartbeat within 2 heartbeat intervals. td: contention.NewTimeoutDetector(2 * cfg.heartbeat), @@ -155,9 +158,16 @@ func newRaftNode(cfg raftNodeConfig) *raftNode { func (r *raftNode) tick() { r.tickMu.Lock() r.Tick() + r.latestTickTs = time.Now() r.tickMu.Unlock() } +func (r *raftNode) getLatestTickTs() time.Time { + r.tickMu.RLock() + defer r.tickMu.RUnlock() + return r.latestTickTs +} + // start prepares and starts raftNode in a new goroutine. It is no longer safe // to modify the fields after it has been started. func (r *raftNode) start(rh *raftReadyHandler) { diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 6708f71bf9af..0600a31b8960 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -904,10 +904,26 @@ func (s *EtcdServer) revokeExpiredLeases(leases []*lease.Lease) { }) } +// isActive checks if the etcd instance is still actively processing the +// heartbeat message (ticks). It returns false if no heartbeat has been +// received within 3 * tickMs. +func (s *EtcdServer) isActive() bool { + latestTickTs := s.r.getLatestTickTs() + threshold := 3 * time.Duration(s.Cfg.TickMs) * time.Millisecond + return latestTickTs.Add(threshold).After(time.Now()) +} + // ensureLeadership checks whether current member is still the leader. func (s *EtcdServer) ensureLeadership() bool { lg := s.Logger() + if s.isActive() { + lg.Debug("The member is active, skip checking leadership", + zap.Time("latestTickTs", s.r.getLatestTickTs()), + zap.Time("now", time.Now())) + return true + } + ctx, cancel := context.WithTimeout(s.ctx, s.Cfg.ReqTimeout()) defer cancel() if err := s.linearizableReadNotify(ctx); err != nil {