From 8f8957260616abd05cad2d6de74ceab59bd2b7f6 Mon Sep 17 00:00:00 2001 From: Mate Szalay-Beko Date: Tue, 9 Mar 2021 13:32:16 +0100 Subject: [PATCH] ZOOKEEPER-4220: Potential redundant connection attempts during leader election We have a logic in the server code, that would try to connect to an other quorum member, based on its server ID. We identify the address assigned to this ID first based on the last committed quorum configuration. If the connection attempt fails (or the server is not known in the committed configuration) then we try to find the address based on the last proposed quorum configuration. But we should do the second connection attempt, only if the address in the last proposed configuration differs from the address in the last committed configuration. Otherwise we would just retry to connect to the same address that failed just right before. In the current code we have a bug, because we compare the address object references (use "!=") instead of comparing the objects themselves (using "not equals"). In certain edge cases (e.g. when the last proposed and last committed addresses are the same, but the address is unreachable) this bug can lead to unnecessary retry of connection attempts. The normal behaviour would be to mark this connection attempt to be failed and wait for e.g. the next election round or wait for the other server to come online and initiate a connection to us. Author: Mate Szalay-Beko Reviewers: Andor Molnar , Damien Diederen Closes #1615 from symat/ZOOKEEPER-4220 --- .../apache/zookeeper/server/quorum/QuorumCnxManager.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 066ea9fcd79..64673f5b2c8 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -722,9 +722,10 @@ synchronized void connectOne(long sid){ if (connectOne(sid, lastCommittedView.get(sid).electionAddr)) return; } - if (lastSeenQV != null && lastProposedView.containsKey(sid) - && (!knownId || (lastProposedView.get(sid).electionAddr != - lastCommittedView.get(sid).electionAddr))) { + if (lastSeenQV != null + && lastProposedView.containsKey(sid) + && (!knownId + || !lastProposedView.get(sid).electionAddr.equals(lastCommittedView.get(sid).electionAddr))) { knownId = true; LOG.debug("Server {} knows {} already, it is in the lastProposedView", self.getId(), sid); if (connectOne(sid, lastProposedView.get(sid).electionAddr))