From 51a300670e75d7b65a1fdd8eec54f3862e2c6a0f Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 27 Sep 2022 20:41:17 +0200 Subject: [PATCH] [Transform] Don't fail a transform due to ILM closing an index (#90396) (#90402) Transform can fail due to a ClusterBlockException that reports to be non-retryable. This is a special kind of race condition where the initial checks pass, but meanwhile between the check and the action something changes. In the particular case a wildcard index pattern got resolved to concrete index names. One of the indices got closed (ILM) before transform run the search operation. Pragmatically we should handle a cluster block exception as retry-able error. fixes #89802 Co-authored-by: Elastic Machine --- docs/changelog/90396.yaml | 6 ++++ .../transforms/TransformFailureHandler.java | 4 +++ .../TransformFailureHandlerTests.java | 33 +++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 docs/changelog/90396.yaml diff --git a/docs/changelog/90396.yaml b/docs/changelog/90396.yaml new file mode 100644 index 0000000000000..fbe2b636aa72e --- /dev/null +++ b/docs/changelog/90396.yaml @@ -0,0 +1,6 @@ +pr: 90396 +summary: Don't fail a transform on a ClusterBlockException, this may be due to ILM closing an index +area: Transform +type: bug +issues: + - 89802 diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/TransformFailureHandler.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/TransformFailureHandler.java index f087d2b789464..30122eb9f25b1 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/TransformFailureHandler.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/TransformFailureHandler.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.common.breaker.CircuitBreakingException; import org.elasticsearch.script.ScriptException; import org.elasticsearch.xpack.core.transform.TransformMessages; @@ -65,6 +66,9 @@ void handleIndexerFailure(Exception e, SettingsConfig settingsConfig) { handleScriptException(scriptException, unattended); } else if (unwrappedException instanceof BulkIndexingException bulkIndexingException) { handleBulkIndexingException(bulkIndexingException, unattended, getNumFailureRetries(settingsConfig)); + } else if (unwrappedException instanceof ClusterBlockException clusterBlockException) { + // gh#89802 always retry for a cluster block exception, because a cluster block should be temporary. + retry(clusterBlockException, clusterBlockException.getDetailedMessage(), unattended, getNumFailureRetries(settingsConfig)); } else if (unwrappedException instanceof ElasticsearchException elasticsearchException) { handleElasticsearchException(elasticsearchException, unattended, getNumFailureRetries(settingsConfig)); } else if (unwrappedException instanceof IllegalArgumentException illegalArgumentException) { diff --git a/x-pack/plugin/transform/src/test/java/org/elasticsearch/xpack/transform/transforms/TransformFailureHandlerTests.java b/x-pack/plugin/transform/src/test/java/org/elasticsearch/xpack/transform/transforms/TransformFailureHandlerTests.java index 6a66d0b53fdf9..0218f5ae86226 100644 --- a/x-pack/plugin/transform/src/test/java/org/elasticsearch/xpack/transform/transforms/TransformFailureHandlerTests.java +++ b/x-pack/plugin/transform/src/test/java/org/elasticsearch/xpack/transform/transforms/TransformFailureHandlerTests.java @@ -11,6 +11,8 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.ShardSearchFailure; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.metadata.MetadataIndexStateService; import org.elasticsearch.common.breaker.CircuitBreaker; import org.elasticsearch.common.breaker.CircuitBreakingException; import org.elasticsearch.rest.RestStatus; @@ -20,6 +22,9 @@ import org.elasticsearch.xpack.core.transform.transforms.TransformTaskState; import org.elasticsearch.xpack.transform.notifications.MockTransformAuditor; +import java.util.Map; +import java.util.Set; + import static java.util.Collections.singletonList; public class TransformFailureHandlerTests extends ESTestCase { @@ -113,6 +118,34 @@ public void testUnattended() { assertNoFailure(handler, new NullPointerException("NPE"), contextListener, settings); } + public void testClusterBlock() { + String transformId = randomAlphaOfLength(10); + SettingsConfig settings = new SettingsConfig.Builder().setNumFailureRetries(2).build(); + + MockTransformAuditor auditor = MockTransformAuditor.createMockAuditor(); + MockTransformContextListener contextListener = new MockTransformContextListener(); + TransformContext context = new TransformContext(TransformTaskState.STARTED, "", 0, contextListener); + context.setPageSize(500); + + TransformFailureHandler handler = new TransformFailureHandler(auditor, context, transformId); + + final ClusterBlockException clusterBlock = new ClusterBlockException( + Map.of("test-index", Set.of(MetadataIndexStateService.INDEX_CLOSED_BLOCK)) + ); + + handler.handleIndexerFailure(clusterBlock, settings); + assertFalse(contextListener.getFailed()); + assertEquals(1, contextListener.getFailureCountChangedCounter()); + + handler.handleIndexerFailure(clusterBlock, settings); + assertFalse(contextListener.getFailed()); + assertEquals(2, contextListener.getFailureCountChangedCounter()); + + handler.handleIndexerFailure(clusterBlock, settings); + assertTrue(contextListener.getFailed()); + assertEquals(3, contextListener.getFailureCountChangedCounter()); + } + private void assertNoFailure( TransformFailureHandler handler, Exception e,