From 781c31ea65933d5bc5765edef389c5ec5d933715 Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Sat, 6 Apr 2019 10:17:29 -0400 Subject: [PATCH] Suppress lease background sync failures if stopping (#40902) If the transport service is stopped, likely because we are shutting down, and a retention lease background sync fires the logs will display a warn message and stacktrace. Yet, this situaton is harmless and can happen as a normal course of business when shutting down. This commit suppresses the log messages in this case. --- .../main/java/org/elasticsearch/ExceptionsHelper.java | 9 +++++++++ .../support/replication/ReplicationOperation.java | 8 +++----- .../seqno/RetentionLeaseBackgroundSyncAction.java | 10 ++++++++-- .../org/elasticsearch/transport/TransportService.java | 10 ++++++++-- .../seqno/RetentionLeaseBackgroundSyncActionTests.java | 7 ++++++- 5 files changed, 34 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/ExceptionsHelper.java b/server/src/main/java/org/elasticsearch/ExceptionsHelper.java index e0525127ee7e7..e4269a375dd6c 100644 --- a/server/src/main/java/org/elasticsearch/ExceptionsHelper.java +++ b/server/src/main/java/org/elasticsearch/ExceptionsHelper.java @@ -29,6 +29,7 @@ import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; import org.elasticsearch.index.Index; import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.transport.TransportException; import java.io.IOException; import java.io.PrintWriter; @@ -193,6 +194,14 @@ public static Throwable unwrap(Throwable t, Class... clazzes) { return null; } + public static boolean isTransportStoppedForAction(final Throwable t, final String action) { + final TransportException maybeTransport = + (TransportException) ExceptionsHelper.unwrap(t, TransportException.class); + return maybeTransport != null + && (maybeTransport.getMessage().equals("TransportService is closed stopped can't send request") + || maybeTransport.getMessage().equals("transport stopped, action: " + action)); + } + /** * Throws the specified exception. If null if specified then true is returned. */ diff --git a/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java b/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java index 94de57b5f2d94..6f238fbaa771f 100644 --- a/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java +++ b/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java @@ -38,7 +38,6 @@ import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.node.NodeClosedException; import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.transport.TransportException; import java.io.IOException; import java.util.ArrayList; @@ -200,10 +199,9 @@ public void onFailure(Exception replicaException) { private void onNoLongerPrimary(Exception failure) { final Throwable cause = ExceptionsHelper.unwrapCause(failure); - final boolean nodeIsClosing = cause instanceof NodeClosedException - || (cause instanceof TransportException && - ("TransportService is closed stopped can't send request".equals(cause.getMessage()) - || "transport stopped, action: internal:cluster/shard/failure".equals(cause.getMessage()))); + final boolean nodeIsClosing = + cause instanceof NodeClosedException + || ExceptionsHelper.isTransportStoppedForAction(cause, "internal:cluster/shard/failure"); final String message; if (nodeIsClosing) { message = String.format(Locale.ROOT, diff --git a/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncAction.java b/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncAction.java index 48c68c9f66a91..e56c509f452b0 100644 --- a/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncAction.java +++ b/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncAction.java @@ -115,9 +115,15 @@ public void backgroundSync( ActionListener.wrap( r -> {}, e -> { - if (ExceptionsHelper.unwrap(e, AlreadyClosedException.class, IndexShardClosedException.class) == null) { - getLogger().warn(new ParameterizedMessage("{} retention lease background sync failed", shardId), e); + if (ExceptionsHelper.isTransportStoppedForAction(e, ACTION_NAME + "[p]")) { + // we are likely shutting down + return; } + if (ExceptionsHelper.unwrap(e, AlreadyClosedException.class, IndexShardClosedException.class) != null) { + // the shard is closed + return; + } + getLogger().warn(new ParameterizedMessage("{} retention lease background sync failed", shardId), e); })); } } diff --git a/server/src/main/java/org/elasticsearch/transport/TransportService.java b/server/src/main/java/org/elasticsearch/transport/TransportService.java index a7beacdea7be3..0fe7f4ce933ab 100644 --- a/server/src/main/java/org/elasticsearch/transport/TransportService.java +++ b/server/src/main/java/org/elasticsearch/transport/TransportService.java @@ -271,6 +271,7 @@ public void onFailure(Exception e) { } @Override public void doRun() { + // cf. ExceptionsHelper#isTransportStoppedForAction TransportException ex = new TransportException("transport stopped, action: " + holderToNotify.action()); holderToNotify.handler().handleException(ex); } @@ -615,8 +616,13 @@ private void sendRequestInternal(final Transport.C } try { if (lifecycle.stoppedOrClosed()) { - // if we are not started the exception handling will remove the RequestHolder again and calls the handler to notify - // the caller. It will only notify if the toStop code hasn't done the work yet. + /* + * If we are not started the exception handling will remove the request holder again and calls the handler to notify the + * caller. It will only notify if toStop hasn't done the work yet. + * + * Do not edit this exception message, it is currently relied upon in production code! + */ + // TODO: make a dedicated exception for a stopped transport service? cf. ExceptionsHelper#isTransportStoppedForAction throw new TransportException("TransportService is closed stopped can't send request"); } if (timeoutHandler != null) { diff --git a/server/src/test/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncActionTests.java b/server/src/test/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncActionTests.java index 3e357cefc74ed..8364724560924 100644 --- a/server/src/test/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncActionTests.java +++ b/server/src/test/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncActionTests.java @@ -41,6 +41,7 @@ import org.elasticsearch.test.transport.CapturingTransport; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportException; import org.elasticsearch.transport.TransportService; import org.mockito.ArgumentCaptor; @@ -204,9 +205,13 @@ protected void doExecute(Task task, Request request, ActionListener captor = ArgumentCaptor.forClass(ParameterizedMessage.class); verify(retentionLeaseSyncActionLogger).warn(captor.capture(), same(e)); final ParameterizedMessage message = captor.getValue();