From 5f72407ce7e48f87abf5ece6652ef2496b487a87 Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Mon, 28 Oct 2019 08:19:07 -0700 Subject: [PATCH] Retry on RepositoryException in SLM tests (#48548) Due to a bug, GETing a snapshot can cause a RespositoryException to be thrown. This error is transient and should be retried, rather than causing the test to fail. This commit converts those RepositoryExceptions into AssertionErrors so that they will be retried in code wrapped in assertBusy. --- .../documentation/ILMDocumentationIT.java | 3 ++- .../slm/SLMSnapshotBlockingIntegTests.java | 24 +++++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/ILMDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/ILMDocumentationIT.java index f250b0a939319..333c8c0b087ab 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/ILMDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/ILMDocumentationIT.java @@ -765,7 +765,6 @@ public void onFailure(Exception e) { assertTrue(latch.await(30L, TimeUnit.SECONDS)); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/46021") public void testAddSnapshotLifecyclePolicy() throws Exception { RestHighLevelClient client = highLevelClient(); @@ -1057,6 +1056,8 @@ private void assertSnapshotExists(final RestHighLevelClient client, final String } catch (Exception e) { if (e.getMessage().contains("snapshot_missing_exception")) { fail("snapshot does not exist: " + snapshotName); + } else if (e.getMessage().contains("repository_exception")) { + fail("got a respository_exception, retrying. original message: " + e.getMessage()); } throw e; } diff --git a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SLMSnapshotBlockingIntegTests.java b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SLMSnapshotBlockingIntegTests.java index de25a1c6af31b..a2ad9e32fd5d7 100644 --- a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SLMSnapshotBlockingIntegTests.java +++ b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SLMSnapshotBlockingIntegTests.java @@ -6,7 +6,6 @@ package org.elasticsearch.xpack.slm; -import org.apache.lucene.util.LuceneTestCase; import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; @@ -124,7 +123,6 @@ public void testSnapshotInProgress() throws Exception { } } - @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/48441") public void testRetentionWhileSnapshotInProgress() throws Exception { final String indexName = "test"; final String policyId = "slm-policy"; @@ -144,8 +142,7 @@ public void testRetentionWhileSnapshotInProgress() throws Exception { logger.info("--> kicked off snapshot {}", completedSnapshotName); assertBusy(() -> { try { - SnapshotsStatusResponse s = - client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(completedSnapshotName).get(); + SnapshotsStatusResponse s = getSnapshotStatus(completedSnapshotName); assertThat("expected a snapshot but none were returned", s.getSnapshots().size(), equalTo(1)); SnapshotStatus status = s.getSnapshots().get(0); logger.info("--> waiting for snapshot {} to be completed, got: {}", completedSnapshotName, status.getState()); @@ -213,13 +210,8 @@ public void testRetentionWhileSnapshotInProgress() throws Exception { client().admin().cluster().prepareReroute().get(); logger.info("--> waiting for snapshot to be deleted"); try { - SnapshotsStatusResponse s = - client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(completedSnapshotName).get(); + SnapshotsStatusResponse s = getSnapshotStatus(completedSnapshotName); assertNull("expected no snapshot but one was returned", s.getSnapshots().get(0)); - } catch (RepositoryException e) { - // Concurrent status calls and write operations may lead to failures in determining the current repository generation - // TODO: Remove this hack once tracking the current repository generation has been made consistent - throw new AssertionError(e); } catch (SnapshotMissingException e) { // Great, we wanted it to be deleted! } @@ -383,6 +375,18 @@ private void testUnsuccessfulSnapshotRetention(boolean partialSuccess) throws Ex } } + private SnapshotsStatusResponse getSnapshotStatus(String snapshotName) { + try { + return client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(snapshotName).get(); + } catch (RepositoryException e) { + // Convert this to an AssertionError so that it can be retried in an assertBusy - this is often a transient error because + // concurrent status calls and write operations may lead to failures in determining the current repository generation + // TODO: Remove this hack once tracking the current repository generation has been made consistent + logger.warn(e); + throw new AssertionError(e); + } + } + private void createAndPopulateIndex(String indexName) throws InterruptedException { logger.info("--> creating and populating index [{}]", indexName); assertAcked(prepareCreate(indexName, 0, Settings.builder()