From 6167101081845286502ec64249825d7ce8ebce05 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 22 Jan 2020 12:05:39 +0000 Subject: [PATCH 1/5] Log when probe succeeds but full connection fails It is permitted for nodes to accept transport connections at addresses other than their publish address, which allows a good deal of flexibility when configuring discovery. However, it is not unusual for users to misconfigure nodes to pick a publish address which is inaccessible to other nodes. We see this happen a lot if the nodes are on different networks separated by a proxy, or if the nodes are running in Docker with the wrong kind of network config. In this case we offer no useful feedback to the user unless they enable TRACE-level logs. It's particularly tricky to diagnose because if we test connectivity between the nodes (using their discovery addresses) then all will appear well. This commit adds a WARN-level log if this kind of misconfiguration is detected: the probe connection has succeeded (to indicate that we are really talking to a healthy Elasticsearch node) but the followup connection attempt fails. It also tidies up some loose ends in `HandshakingTransportAddressConnector`, removing some TODOs that need not be completed, and registering its accidentally-unregistered timeout settings. --- .../org/elasticsearch/ExceptionsHelper.java | 33 ++++++++ .../common/settings/ClusterSettings.java | 5 +- .../HandshakingTransportAddressConnector.java | 29 +++++-- .../elasticsearch/ExceptionsHelperTests.java | 23 ++++++ ...shakingTransportAddressConnectorTests.java | 80 +++++++++++++++++-- 5 files changed, 153 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/ExceptionsHelper.java b/server/src/main/java/org/elasticsearch/ExceptionsHelper.java index 71b47d646417e..4a17450874c6e 100644 --- a/server/src/main/java/org/elasticsearch/ExceptionsHelper.java +++ b/server/src/main/java/org/elasticsearch/ExceptionsHelper.java @@ -274,6 +274,39 @@ public static void maybeDieOnAnotherThread(final Throwable throwable) { }); } + /** + * Get the message of this exception together with its causes, up to a maximum depth of 10. + */ + public static String getMessageIncludingCauses(final Throwable throwable) { + final StringBuilder stringBuilder = new StringBuilder(); + final StringBuilder closingStringBuilder = new StringBuilder(); + final Set seen = Collections.newSetFromMap(new IdentityHashMap<>()); + Throwable current = throwable; + while (closingStringBuilder.length() < 10) { + stringBuilder.append(current.getMessage()); + + if (seen.add(current) == false) { + stringBuilder.append(" [...loop...]"); + current = null; + break; + } + + current = current.getCause(); + if (current == null) { + break; + } + + stringBuilder.append(" ["); + closingStringBuilder.append(']'); + } + + if (current != null) { + stringBuilder.append("..."); + } + + return stringBuilder.append(closingStringBuilder).toString(); + } + /** * Deduplicate the failures by exception message and index. */ diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index f8ea133c0ed63..fcf0c0fe4e61b 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -65,6 +65,7 @@ import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.discovery.DiscoveryModule; +import org.elasticsearch.discovery.HandshakingTransportAddressConnector; import org.elasticsearch.discovery.PeerFinder; import org.elasticsearch.discovery.SeedHostsResolver; import org.elasticsearch.discovery.SettingsBasedSeedHostsProvider; @@ -466,7 +467,9 @@ public void apply(Settings value, Settings current, Settings previous) { TransportAddVotingConfigExclusionsAction.MAXIMUM_VOTING_CONFIG_EXCLUSIONS_SETTING, ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING, ClusterBootstrapService.UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING, - LagDetector.CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING); + LagDetector.CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING, + HandshakingTransportAddressConnector.PROBE_CONNECT_TIMEOUT_SETTING, + HandshakingTransportAddressConnector.PROBE_HANDSHAKE_TIMEOUT_SETTING); static List> BUILT_IN_SETTING_UPGRADERS = Collections.emptyList(); diff --git a/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java b/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java index 2f111f003ed84..b495d5088057b 100644 --- a/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java +++ b/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java @@ -22,6 +22,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRunnable; @@ -74,7 +75,8 @@ public void connectToRemoteMasterNode(TransportAddress transportAddress, ActionL @Override protected void doRun() { - // TODO if transportService is already connected to this address then skip the handshaking + // We could skip this if the transportService were already connected to the given address, but the savings would be minimal + // so we open a new connection anyway. final DiscoveryNode targetNode = new DiscoveryNode("", transportAddress.toString(), UUIDs.randomBase64UUID(Randomness.get()), // generated deterministically for reproducible tests @@ -99,17 +101,28 @@ protected void innerOnResponse(DiscoveryNode remoteNode) { IOUtils.closeWhileHandlingException(connection); if (remoteNode.equals(transportService.getLocalNode())) { - // TODO cache this result for some time? forever? listener.onFailure(new ConnectTransportException(remoteNode, "local node found")); } else if (remoteNode.isMasterNode() == false) { - // TODO cache this result for some time? listener.onFailure(new ConnectTransportException(remoteNode, "non-master-eligible node found")); } else { - transportService.connectToNode(remoteNode, ActionListener.delegateFailure(listener, - (l, ignored) -> { - logger.trace("[{}] full connection successful: {}", thisConnectionAttempt, remoteNode); - listener.onResponse(remoteNode); - })); + transportService.connectToNode(remoteNode, ActionListener.wrap(ignored -> { + logger.trace("[{}] completed full connection with [{}]", thisConnectionAttempt, remoteNode); + listener.onResponse(remoteNode); + }, e -> { + // we opened a connection and successfully performed a handshake, so we're definitely talking to + // a master-eligible node with a matching cluster name and a good version, but the attempt to + // open a full connection to its publish address failed; a common reason is that the remote + // node is listening on 0.0.0.0 but has made an inappropriate choice for its publish address. + if (logger.isDebugEnabled()) { + logger.warn(new ParameterizedMessage( + "[{}] completed handshake with [{}] but followup connection failed", + thisConnectionAttempt, remoteNode), e); + } else { + logger.warn("[{}] completed handshake with [{}] but followup connection failed: {}", + thisConnectionAttempt, remoteNode, ExceptionsHelper.getMessageIncludingCauses(e)); + } + listener.onFailure(e); + })); } } catch (Exception e) { listener.onFailure(e); diff --git a/server/src/test/java/org/elasticsearch/ExceptionsHelperTests.java b/server/src/test/java/org/elasticsearch/ExceptionsHelperTests.java index 5e8cc4f602706..ec1e09eb98562 100644 --- a/server/src/test/java/org/elasticsearch/ExceptionsHelperTests.java +++ b/server/src/test/java/org/elasticsearch/ExceptionsHelperTests.java @@ -232,4 +232,27 @@ public void testCauseCycle() { ExceptionsHelper.unwrap(e1, IOException.class); ExceptionsHelper.unwrapCorruption(e1); } + + public void testGetMessageIncludingCauses() { + assertThat(ExceptionsHelper.getMessageIncludingCauses(new Exception("root cause")), equalTo("root cause")); + assertThat(ExceptionsHelper.getMessageIncludingCauses(new Exception("wrapper", new Exception("root cause"))), + equalTo("wrapper [root cause]")); + + Exception exception = new Exception("root cause"); + for (int i = 0; i < 9; i++) { + exception = new Exception("wrapper " + i, exception); + } + assertThat(ExceptionsHelper.getMessageIncludingCauses(exception), equalTo( + "wrapper 8 [wrapper 7 [wrapper 6 [wrapper 5 [wrapper 4 [wrapper 3 [wrapper 2 [wrapper 1 [wrapper 0 [root cause]]]]]]]]]")); + + exception = new Exception("too many wrappers", exception); + assertThat(ExceptionsHelper.getMessageIncludingCauses(exception), equalTo( + "too many wrappers [wrapper 8 [wrapper 7 [wrapper 6 [wrapper 5 [wrapper 4 [wrapper 3 [wrapper 2 [wrapper 1 [wrapper 0 " + + "[...]]]]]]]]]]")); + + Exception e1 = new Exception("A"); + Exception e2 = new Exception("B", e1); + e1.initCause(e2); + assertThat(ExceptionsHelper.getMessageIncludingCauses(e1), equalTo("A [B [A [...loop...]]]")); + } } diff --git a/server/src/test/java/org/elasticsearch/discovery/HandshakingTransportAddressConnectorTests.java b/server/src/test/java/org/elasticsearch/discovery/HandshakingTransportAddressConnectorTests.java index d697cd5468620..7608fcc4f03af 100644 --- a/server/src/test/java/org/elasticsearch/discovery/HandshakingTransportAddressConnectorTests.java +++ b/server/src/test/java/org/elasticsearch/discovery/HandshakingTransportAddressConnectorTests.java @@ -19,16 +19,27 @@ package org.elasticsearch.discovery; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.util.SetOnce; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.MockLogAppender; +import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.test.transport.MockTransport; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.TransportException; import org.elasticsearch.transport.TransportRequest; import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.TransportService.HandshakeResponse; @@ -44,10 +55,13 @@ import static org.elasticsearch.discovery.HandshakingTransportAddressConnector.PROBE_HANDSHAKE_TIMEOUT_SETTING; import static org.elasticsearch.node.Node.NODE_NAME_SETTING; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.oneOf; public class HandshakingTransportAddressConnectorTests extends ESTestCase { private DiscoveryNode remoteNode; + private TransportAddress discoveryAddress; private TransportService transportService; private ThreadPool threadPool; private String remoteClusterName; @@ -55,6 +69,8 @@ public class HandshakingTransportAddressConnectorTests extends ESTestCase { private DiscoveryNode localNode; private boolean dropHandshake; + @Nullable // unless we want the full connection to fail + private TransportException fullConnectionFailure; @Before public void startServices() { @@ -66,17 +82,24 @@ public void startServices() { threadPool = new TestThreadPool("node", settings); remoteNode = null; + discoveryAddress = null; remoteClusterName = null; dropHandshake = false; + fullConnectionFailure = null; final MockTransport mockTransport = new MockTransport() { @Override protected void onSendRequest(long requestId, String action, TransportRequest request, DiscoveryNode node) { super.onSendRequest(requestId, action, request, node); assertThat(action, equalTo(TransportService.HANDSHAKE_ACTION_NAME)); - assertEquals(remoteNode.getAddress(), node.getAddress()); + assertThat(discoveryAddress, notNullValue()); + assertThat(node.getAddress(), oneOf(discoveryAddress, remoteNode.getAddress())); if (dropHandshake == false) { - handleResponse(requestId, new HandshakeResponse(remoteNode, new ClusterName(remoteClusterName), Version.CURRENT)); + if (fullConnectionFailure != null && node.getAddress().equals(remoteNode.getAddress())) { + handleError(requestId, fullConnectionFailure); + } else { + handleResponse(requestId, new HandshakeResponse(remoteNode, new ClusterName(remoteClusterName), Version.CURRENT)); + } } } }; @@ -91,7 +114,7 @@ protected void onSendRequest(long requestId, String action, TransportRequest req } @After - public void stopServices() throws InterruptedException { + public void stopServices() { transportService.stop(); terminate(threadPool); } @@ -102,8 +125,9 @@ public void testConnectsToMasterNode() throws InterruptedException { remoteNode = new DiscoveryNode("remote-node", buildNewFakeTransportAddress(), Version.CURRENT); remoteClusterName = "local-cluster"; + discoveryAddress = getDiscoveryAddress(); - handshakingTransportAddressConnector.connectToRemoteMasterNode(remoteNode.getAddress(), new ActionListener() { + handshakingTransportAddressConnector.connectToRemoteMasterNode(discoveryAddress, new ActionListener() { @Override public void onResponse(DiscoveryNode discoveryNode) { receivedNode.set(discoveryNode); @@ -120,44 +144,84 @@ public void onFailure(Exception e) { assertEquals(remoteNode, receivedNode.get()); } + @TestLogging(reason="ensure logging happens", value="org.elasticsearch.discovery.HandshakingTransportAddressConnector:INFO") + public void testLogsFullConnectionFailureAfterSuccessfulHandshake() throws Exception { + + remoteNode = new DiscoveryNode("remote-node", buildNewFakeTransportAddress(), Version.CURRENT); + remoteClusterName = "local-cluster"; + discoveryAddress = buildNewFakeTransportAddress(); + + fullConnectionFailure = new ConnectTransportException(remoteNode, "simulated", new ElasticsearchException("root cause")); + + FailureListener failureListener = new FailureListener(); + + MockLogAppender mockAppender = new MockLogAppender(); + mockAppender.start(); + mockAppender.addExpectation( + new MockLogAppender.SeenEventExpectation( + "message", + HandshakingTransportAddressConnector.class.getCanonicalName(), + Level.INFO, + "*completed handshake with [*] but followup connection failed: * simulated [root cause]*")); + Logger targetLogger = LogManager.getLogger(HandshakingTransportAddressConnector.class); + Loggers.addAppender(targetLogger, mockAppender); + + try { + handshakingTransportAddressConnector.connectToRemoteMasterNode(discoveryAddress, failureListener); + failureListener.assertFailure(); + mockAppender.assertAllExpectationsMatched(); + } finally { + Loggers.removeAppender(targetLogger, mockAppender); + mockAppender.stop(); + } + } + public void testDoesNotConnectToNonMasterNode() throws InterruptedException { remoteNode = new DiscoveryNode("remote-node", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); + discoveryAddress = getDiscoveryAddress(); remoteClusterName = "local-cluster"; FailureListener failureListener = new FailureListener(); - handshakingTransportAddressConnector.connectToRemoteMasterNode(remoteNode.getAddress(), failureListener); + handshakingTransportAddressConnector.connectToRemoteMasterNode(discoveryAddress, failureListener); failureListener.assertFailure(); } public void testDoesNotConnectToLocalNode() throws Exception { remoteNode = localNode; + discoveryAddress = getDiscoveryAddress(); remoteClusterName = "local-cluster"; FailureListener failureListener = new FailureListener(); - handshakingTransportAddressConnector.connectToRemoteMasterNode(remoteNode.getAddress(), failureListener); + handshakingTransportAddressConnector.connectToRemoteMasterNode(discoveryAddress, failureListener); failureListener.assertFailure(); } public void testDoesNotConnectToDifferentCluster() throws InterruptedException { remoteNode = new DiscoveryNode("remote-node", buildNewFakeTransportAddress(), Version.CURRENT); + discoveryAddress = getDiscoveryAddress(); remoteClusterName = "another-cluster"; FailureListener failureListener = new FailureListener(); - handshakingTransportAddressConnector.connectToRemoteMasterNode(remoteNode.getAddress(), failureListener); + handshakingTransportAddressConnector.connectToRemoteMasterNode(discoveryAddress, failureListener); failureListener.assertFailure(); } public void testHandshakeTimesOut() throws InterruptedException { remoteNode = new DiscoveryNode("remote-node", buildNewFakeTransportAddress(), Version.CURRENT); + discoveryAddress = getDiscoveryAddress(); remoteClusterName = "local-cluster"; dropHandshake = true; FailureListener failureListener = new FailureListener(); - handshakingTransportAddressConnector.connectToRemoteMasterNode(remoteNode.getAddress(), failureListener); + handshakingTransportAddressConnector.connectToRemoteMasterNode(discoveryAddress, failureListener); Thread.sleep(PROBE_HANDSHAKE_TIMEOUT_SETTING.get(Settings.EMPTY).millis()); failureListener.assertFailure(); } + private TransportAddress getDiscoveryAddress() { + return randomBoolean() ? remoteNode.getAddress() : buildNewFakeTransportAddress(); + } + private class FailureListener implements ActionListener { final CountDownLatch completionLatch = new CountDownLatch(1); From b10b474fced35881d31057204aa062aeb22a3acb Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 22 Jan 2020 13:15:32 +0000 Subject: [PATCH 2/5] Meh let's always log the stack trace --- .../org/elasticsearch/ExceptionsHelper.java | 33 ------------------- .../HandshakingTransportAddressConnector.java | 11 ++----- .../elasticsearch/ExceptionsHelperTests.java | 23 ------------- 3 files changed, 3 insertions(+), 64 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/ExceptionsHelper.java b/server/src/main/java/org/elasticsearch/ExceptionsHelper.java index 4a17450874c6e..71b47d646417e 100644 --- a/server/src/main/java/org/elasticsearch/ExceptionsHelper.java +++ b/server/src/main/java/org/elasticsearch/ExceptionsHelper.java @@ -274,39 +274,6 @@ public static void maybeDieOnAnotherThread(final Throwable throwable) { }); } - /** - * Get the message of this exception together with its causes, up to a maximum depth of 10. - */ - public static String getMessageIncludingCauses(final Throwable throwable) { - final StringBuilder stringBuilder = new StringBuilder(); - final StringBuilder closingStringBuilder = new StringBuilder(); - final Set seen = Collections.newSetFromMap(new IdentityHashMap<>()); - Throwable current = throwable; - while (closingStringBuilder.length() < 10) { - stringBuilder.append(current.getMessage()); - - if (seen.add(current) == false) { - stringBuilder.append(" [...loop...]"); - current = null; - break; - } - - current = current.getCause(); - if (current == null) { - break; - } - - stringBuilder.append(" ["); - closingStringBuilder.append(']'); - } - - if (current != null) { - stringBuilder.append("..."); - } - - return stringBuilder.append(closingStringBuilder).toString(); - } - /** * Deduplicate the failures by exception message and index. */ diff --git a/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java b/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java index b495d5088057b..cce2e40b90086 100644 --- a/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java +++ b/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java @@ -113,14 +113,9 @@ protected void innerOnResponse(DiscoveryNode remoteNode) { // a master-eligible node with a matching cluster name and a good version, but the attempt to // open a full connection to its publish address failed; a common reason is that the remote // node is listening on 0.0.0.0 but has made an inappropriate choice for its publish address. - if (logger.isDebugEnabled()) { - logger.warn(new ParameterizedMessage( - "[{}] completed handshake with [{}] but followup connection failed", - thisConnectionAttempt, remoteNode), e); - } else { - logger.warn("[{}] completed handshake with [{}] but followup connection failed: {}", - thisConnectionAttempt, remoteNode, ExceptionsHelper.getMessageIncludingCauses(e)); - } + logger.warn(new ParameterizedMessage( + "[{}] completed handshake with [{}] but followup connection failed", + thisConnectionAttempt, remoteNode), e); listener.onFailure(e); })); } diff --git a/server/src/test/java/org/elasticsearch/ExceptionsHelperTests.java b/server/src/test/java/org/elasticsearch/ExceptionsHelperTests.java index ec1e09eb98562..5e8cc4f602706 100644 --- a/server/src/test/java/org/elasticsearch/ExceptionsHelperTests.java +++ b/server/src/test/java/org/elasticsearch/ExceptionsHelperTests.java @@ -232,27 +232,4 @@ public void testCauseCycle() { ExceptionsHelper.unwrap(e1, IOException.class); ExceptionsHelper.unwrapCorruption(e1); } - - public void testGetMessageIncludingCauses() { - assertThat(ExceptionsHelper.getMessageIncludingCauses(new Exception("root cause")), equalTo("root cause")); - assertThat(ExceptionsHelper.getMessageIncludingCauses(new Exception("wrapper", new Exception("root cause"))), - equalTo("wrapper [root cause]")); - - Exception exception = new Exception("root cause"); - for (int i = 0; i < 9; i++) { - exception = new Exception("wrapper " + i, exception); - } - assertThat(ExceptionsHelper.getMessageIncludingCauses(exception), equalTo( - "wrapper 8 [wrapper 7 [wrapper 6 [wrapper 5 [wrapper 4 [wrapper 3 [wrapper 2 [wrapper 1 [wrapper 0 [root cause]]]]]]]]]")); - - exception = new Exception("too many wrappers", exception); - assertThat(ExceptionsHelper.getMessageIncludingCauses(exception), equalTo( - "too many wrappers [wrapper 8 [wrapper 7 [wrapper 6 [wrapper 5 [wrapper 4 [wrapper 3 [wrapper 2 [wrapper 1 [wrapper 0 " + - "[...]]]]]]]]]]")); - - Exception e1 = new Exception("A"); - Exception e2 = new Exception("B", e1); - e1.initCause(e2); - assertThat(ExceptionsHelper.getMessageIncludingCauses(e1), equalTo("A [B [A [...loop...]]]")); - } } From 1bd1c8ede0a6cc1936c4613fca968b88b728982e Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 22 Jan 2020 13:15:52 +0000 Subject: [PATCH 3/5] Fix level and message --- .../discovery/HandshakingTransportAddressConnectorTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/discovery/HandshakingTransportAddressConnectorTests.java b/server/src/test/java/org/elasticsearch/discovery/HandshakingTransportAddressConnectorTests.java index 7608fcc4f03af..5a83c5555949d 100644 --- a/server/src/test/java/org/elasticsearch/discovery/HandshakingTransportAddressConnectorTests.java +++ b/server/src/test/java/org/elasticsearch/discovery/HandshakingTransportAddressConnectorTests.java @@ -161,8 +161,8 @@ public void testLogsFullConnectionFailureAfterSuccessfulHandshake() throws Excep new MockLogAppender.SeenEventExpectation( "message", HandshakingTransportAddressConnector.class.getCanonicalName(), - Level.INFO, - "*completed handshake with [*] but followup connection failed: * simulated [root cause]*")); + Level.WARN, + "*completed handshake with [*] but followup connection failed*")); Logger targetLogger = LogManager.getLogger(HandshakingTransportAddressConnector.class); Loggers.addAppender(targetLogger, mockAppender); From e787138641585b3e090d89ef49de78cd7e0d0165 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 22 Jan 2020 13:33:58 +0000 Subject: [PATCH 4/5] =?UTF-8?q?=F0=9F=98=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../discovery/HandshakingTransportAddressConnector.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java b/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java index cce2e40b90086..40cb43ade780d 100644 --- a/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java +++ b/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java @@ -22,7 +22,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRunnable; From bd87bcba8108c9b9041b8cf459b47e9f517a599b Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 22 Jan 2020 16:16:05 +0000 Subject: [PATCH 5/5] Raw ActionListener, no trappy exception handling --- .../HandshakingTransportAddressConnector.java | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java b/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java index 40cb43ade780d..727729b021b4b 100644 --- a/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java +++ b/server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java @@ -104,19 +104,26 @@ protected void innerOnResponse(DiscoveryNode remoteNode) { } else if (remoteNode.isMasterNode() == false) { listener.onFailure(new ConnectTransportException(remoteNode, "non-master-eligible node found")); } else { - transportService.connectToNode(remoteNode, ActionListener.wrap(ignored -> { - logger.trace("[{}] completed full connection with [{}]", thisConnectionAttempt, remoteNode); - listener.onResponse(remoteNode); - }, e -> { - // we opened a connection and successfully performed a handshake, so we're definitely talking to - // a master-eligible node with a matching cluster name and a good version, but the attempt to - // open a full connection to its publish address failed; a common reason is that the remote - // node is listening on 0.0.0.0 but has made an inappropriate choice for its publish address. - logger.warn(new ParameterizedMessage( - "[{}] completed handshake with [{}] but followup connection failed", - thisConnectionAttempt, remoteNode), e); - listener.onFailure(e); - })); + transportService.connectToNode(remoteNode, new ActionListener<>() { + @Override + public void onResponse(Void ignored) { + logger.trace("[{}] completed full connection with [{}]", thisConnectionAttempt, remoteNode); + listener.onResponse(remoteNode); + } + + @Override + public void onFailure(Exception e) { + // we opened a connection and successfully performed a handshake, so we're definitely + // talking to a master-eligible node with a matching cluster name and a good version, but + // the attempt to open a full connection to its publish address failed; a common reason is + // that the remote node is listening on 0.0.0.0 but has made an inappropriate choice for its + // publish address. + logger.warn(new ParameterizedMessage( + "[{}] completed handshake with [{}] but followup connection failed", + thisConnectionAttempt, remoteNode), e); + listener.onFailure(e); + } + }); } } catch (Exception e) { listener.onFailure(e);