diff --git a/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchCluster.java b/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchCluster.java index 00ef6ea57e074..8d59d52ad21ed 100644 --- a/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchCluster.java +++ b/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchCluster.java @@ -348,6 +348,11 @@ public void goToNextVersion() { writeUnicastHostsFiles(); } + public void fullRestart() { + stop(false); + start(); + } + public void nextNodeToNextVersion() { if (nodeIndex + 1 > nodes.size()) { throw new TestClustersException("Ran out of nodes to take to the next version"); diff --git a/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java b/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java index cd2daea3378f7..4069207640d8c 100644 --- a/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java +++ b/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java @@ -653,24 +653,6 @@ static String toStr(Response response) throws IOException { return EntityUtils.toString(response.getEntity()); } - static void assertNoFailures(Map response) { - int failed = (int) XContentMapValues.extractValue("_shards.failed", response); - assertEquals(0, failed); - } - - void assertTotalHits(int expectedTotalHits, Map response) { - int actualTotalHits = extractTotalHits(response); - assertEquals(response.toString(), expectedTotalHits, actualTotalHits); - } - - int extractTotalHits(Map response) { - if (isRunningAgainstOldCluster() && getOldClusterVersion().before(Version.V_7_0_0)) { - return (Integer) XContentMapValues.extractValue("hits.total", response); - } else { - return (Integer) XContentMapValues.extractValue("hits.total.value", response); - } - } - /** * Tests that a single document survives. Super basic smoke test. */ @@ -708,6 +690,12 @@ public void testEmptyShard() throws IOException { // before timing out .put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "100ms") .put(SETTING_ALLOCATION_MAX_RETRY.getKey(), "0"); // fail faster + if (getOldClusterVersion().onOrAfter(Version.V_6_5_0)) { + settings.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), randomBoolean()); + } + if (randomBoolean()) { + settings.put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), "-1"); + } createIndex(index, settings.build()); } ensureGreen(index); @@ -1429,4 +1417,43 @@ public void testTurnOffTranslogRetentionAfterUpgraded() throws Exception { ensurePeerRecoveryRetentionLeasesRenewedAndSynced(index); } } + + public void testRecoveryWithTranslogRetentionDisabled() throws Exception { + if (isRunningAgainstOldCluster()) { + final Settings.Builder settings = Settings.builder() + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1); + if (getOldClusterVersion().onOrAfter(Version.V_6_5_0)) { + settings.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), randomBoolean()); + } + if (randomBoolean()) { + settings.put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), "-1"); + } + if (randomBoolean()) { + settings.put(IndexSettings.INDEX_TRANSLOG_GENERATION_THRESHOLD_SIZE_SETTING.getKey(), "1kb"); + } + createIndex(index, settings.build()); + ensureGreen(index); + int numDocs = randomIntBetween(0, 100); + for (int i = 0; i < numDocs; i++) { + indexDocument(Integer.toString(i)); + if (rarely()) { + flush(index, randomBoolean()); + } + } + client().performRequest(new Request("POST", "/" + index + "/_refresh")); + if (randomBoolean()) { + ensurePeerRecoveryRetentionLeasesRenewedAndSynced(index); + } + if (randomBoolean()) { + flush(index, randomBoolean()); + } else if (randomBoolean()) { + performSyncedFlush(index); + } + saveInfoDocument("doc_count", Integer.toString(numDocs)); + } + ensureGreen(index); + final int numDocs = Integer.parseInt(loadInfoDocument("doc_count")); + assertTotalHits(numDocs, entityAsMap(client().performRequest(new Request("GET", "/" + index + "/_search")))); + } } diff --git a/qa/translog-policy/build.gradle b/qa/translog-policy/build.gradle new file mode 100644 index 0000000000000..1eaa52a4d98a7 --- /dev/null +++ b/qa/translog-policy/build.gradle @@ -0,0 +1,101 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import org.elasticsearch.gradle.Version +import org.elasticsearch.gradle.info.BuildParams +import org.elasticsearch.gradle.testclusters.RestTestRunnerTask +import org.elasticsearch.gradle.testclusters.TestDistribution + +apply plugin: 'elasticsearch.testclusters' +apply plugin: 'elasticsearch.standalone-test' +apply from : "$rootDir/gradle/bwc-test.gradle" + +for (Version bwcVersion : BuildParams.bwcVersions.indexCompatible) { + String baseName = "v${bwcVersion}" + + testClusters { + "${baseName}" { + versions = [bwcVersion.toString(), project.version] + numberOfNodes = 2 + setting 'http.content_type.required', 'true' + } + } + + tasks.register("${baseName}#Step1OldClusterTest", RestTestRunnerTask) { + useCluster testClusters."${baseName}" + mustRunAfter(precommit) + systemProperty 'tests.test_step', 'step1' + systemProperty 'tests.is_old_cluster', 'true' + } + + tasks.register("${baseName}#Step2OldClusterTest", RestTestRunnerTask) { + useCluster testClusters."${baseName}" + dependsOn "${baseName}#Step1OldClusterTest" + doFirst { + testClusters."${baseName}".fullRestart() + } + systemProperty 'tests.test_step', 'step2' + systemProperty 'tests.is_old_cluster', 'true' + } + + tasks.register("${baseName}#Step3NewClusterTest", RestTestRunnerTask) { + useCluster testClusters."${baseName}" + dependsOn "${baseName}#Step2OldClusterTest" + doFirst { + testClusters."${baseName}".goToNextVersion() + } + systemProperty 'tests.test_step', 'step3' + systemProperty 'tests.is_old_cluster', 'false' + } + + tasks.register("${baseName}#Step4NewClusterTest", RestTestRunnerTask) { + useCluster testClusters."${baseName}" + dependsOn "${baseName}#Step3NewClusterTest" + doFirst { + testClusters."${baseName}".fullRestart() + } + systemProperty 'tests.test_step', 'step4' + systemProperty 'tests.is_old_cluster', 'false' + } + + String oldVersion = bwcVersion.toString().minus("-SNAPSHOT") + tasks.matching { it.name.startsWith(baseName) && it.name.endsWith("ClusterTest") }.configureEach { + it.systemProperty 'tests.old_cluster_version', oldVersion + it.nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") + it.nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") + } + + tasks.register(bwcTaskName(bwcVersion)) { + dependsOn tasks.named("${baseName}#Step4NewClusterTest") + } +} + +configurations { + testArtifacts.extendsFrom testRuntime +} + +task testJar(type: Jar) { + appendix 'test' + from sourceSets.test.output +} + +artifacts { + testArtifacts testJar +} diff --git a/qa/translog-policy/src/test/java/org/elasticsearch/upgrades/TranslogPolicyIT.java b/qa/translog-policy/src/test/java/org/elasticsearch/upgrades/TranslogPolicyIT.java new file mode 100644 index 0000000000000..85a03fecc2320 --- /dev/null +++ b/qa/translog-policy/src/test/java/org/elasticsearch/upgrades/TranslogPolicyIT.java @@ -0,0 +1,147 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.upgrades; + +import org.elasticsearch.Version; +import org.elasticsearch.client.Request; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.index.IndexSettings; +import org.junit.Before; + +import java.io.IOException; +import java.util.Locale; + +/** + * Ensures that we correctly trim unsafe commits when migrating from a translog generation to the sequence number based policy. + * See https://github.com/elastic/elasticsearch/issues/57091 + */ +public class TranslogPolicyIT extends AbstractFullClusterRestartTestCase { + + private enum TestStep { + STEP1_OLD_CLUSTER("step1"), + STEP2_OLD_CLUSTER("step2"), + STEP3_NEW_CLUSTER("step3"), + STEP4_NEW_CLUSTER("step4"); + + private final String name; + + TestStep(String name) { + this.name = name; + } + + @Override + public String toString() { + return name; + } + + public static TestStep parse(String value) { + switch (value) { + case "step1": + return STEP1_OLD_CLUSTER; + case "step2": + return STEP2_OLD_CLUSTER; + case "step3": + return STEP3_NEW_CLUSTER; + case "step4": + return STEP4_NEW_CLUSTER; + default: + throw new AssertionError("unknown test step: " + value); + } + } + } + + protected static final TestStep TEST_STEP = TestStep.parse(System.getProperty("tests.test_step")); + + private String index; + private String type; + + @Before + public void setIndex() { + index = getTestName().toLowerCase(Locale.ROOT); + } + + @Before + public void setType() { + type = getOldClusterVersion().before(Version.V_6_7_0) ? "doc" : "_doc"; + } + + public void testEmptyIndex() throws Exception { + if (TEST_STEP == TestStep.STEP1_OLD_CLUSTER) { + final Settings.Builder settings = Settings.builder() + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, between(0, 1)); + if (getOldClusterVersion().onOrAfter(Version.V_6_5_0)) { + settings.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), randomBoolean()); + } + if (randomBoolean()) { + settings.put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), "-1"); + } + createIndex(index, settings.build()); + } + ensureGreen(index); + assertTotalHits(0, entityAsMap(client().performRequest(new Request("GET", "/" + index + "/_search")))); + } + + public void testRecoverReplica() throws Exception { + int numDocs = 100; + if (TEST_STEP == TestStep.STEP1_OLD_CLUSTER) { + final Settings.Builder settings = Settings.builder() + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1); + if (getOldClusterVersion().onOrAfter(Version.V_6_5_0)) { + settings.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), randomBoolean()); + } + if (randomBoolean()) { + settings.put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), "-1"); + } + if (randomBoolean()) { + settings.put(IndexSettings.INDEX_TRANSLOG_GENERATION_THRESHOLD_SIZE_SETTING.getKey(), "1kb"); + } + createIndex(index, settings.build()); + ensureGreen(index); + for (int i = 0; i < numDocs; i++) { + indexDocument(Integer.toString(i)); + if (rarely()) { + flush(index, randomBoolean()); + } + } + client().performRequest(new Request("POST", "/" + index + "/_refresh")); + if (randomBoolean()) { + ensurePeerRecoveryRetentionLeasesRenewedAndSynced(index); + } + if (randomBoolean()) { + flush(index, randomBoolean()); + } else if (randomBoolean()) { + performSyncedFlush(index); + } + } + ensureGreen(index); + assertTotalHits(100, entityAsMap(client().performRequest(new Request("GET", "/" + index + "/_search")))); + } + + private void indexDocument(String id) throws IOException { + final Request indexRequest = new Request("POST", "/" + index + "/" + type + "/" + id); + indexRequest.setJsonEntity(Strings.toString(JsonXContent.contentBuilder().startObject().field("f", "v").endObject())); + assertOK(client().performRequest(indexRequest)); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/engine/CombinedDeletionPolicy.java b/server/src/main/java/org/elasticsearch/index/engine/CombinedDeletionPolicy.java index 77bc5f60968d0..31876463443f6 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/CombinedDeletionPolicy.java +++ b/server/src/main/java/org/elasticsearch/index/engine/CombinedDeletionPolicy.java @@ -179,7 +179,7 @@ synchronized boolean releaseCommit(final IndexCommit snapshotCommit) { */ public static IndexCommit findSafeCommitPoint(List commits, long globalCheckpoint) throws IOException { if (commits.isEmpty()) { - throw new IllegalArgumentException("Commit list must not empty"); + throw new IllegalArgumentException("Commit list must not be empty"); } final int keptPosition = indexOfKeptCommits(commits, globalCheckpoint); return commits.get(keptPosition); diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 007de8f096e06..b60a4853e5892 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -1414,14 +1414,16 @@ public long recoverLocallyUpToGlobalCheckpoint() { logger.debug("skip local recovery as failed to find the safe commit", e); return UNASSIGNED_SEQ_NO; } - if (safeCommit.isPresent() == false) { - logger.trace("skip local recovery as no safe commit found"); - return UNASSIGNED_SEQ_NO; - } - assert safeCommit.get().localCheckpoint <= globalCheckpoint : safeCommit.get().localCheckpoint + " > " + globalCheckpoint; try { maybeCheckIndex(); // check index here and won't do it again if ops-based recovery occurs recoveryState.setStage(RecoveryState.Stage.TRANSLOG); + if (safeCommit.isPresent() == false) { + assert globalCheckpoint == UNASSIGNED_SEQ_NO || indexSettings.getIndexVersionCreated().before(Version.V_6_2_0) : + "global checkpoint [" + globalCheckpoint + "] [ created version [" + indexSettings.getIndexVersionCreated() + "]"; + logger.trace("skip local recovery as no safe commit found"); + return UNASSIGNED_SEQ_NO; + } + assert safeCommit.get().localCheckpoint <= globalCheckpoint : safeCommit.get().localCheckpoint + " > " + globalCheckpoint; if (safeCommit.get().localCheckpoint == globalCheckpoint) { logger.trace("skip local recovery as the safe commit is up to date; safe commit {} global checkpoint {}", safeCommit.get(), globalCheckpoint); diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java index a43adaa3c49b5..676f3a73e14d0 100644 --- a/server/src/main/java/org/elasticsearch/index/store/Store.java +++ b/server/src/main/java/org/elasticsearch/index/store/Store.java @@ -1519,8 +1519,12 @@ public void trimUnsafeCommits(final long lastSyncedGlobalCheckpoint, final long recoverableCommits.add(commit); } } - assert recoverableCommits.isEmpty() == false : "No commit point with translog found; " + - "commits [" + existingCommits + "], minRetainedTranslogGen [" + minRetainedTranslogGen + "]"; + // We could reach here if the node is restarted multiple times after upgraded without flushing a new index commit. + // In this case, we can safely consider all commits as the starting commit because we have trimmed the unsafe + // commits in the first restart. + if (recoverableCommits.isEmpty()) { + recoverableCommits.addAll(existingCommits); + } startingIndexCommit = CombinedDeletionPolicy.findSafeCommitPoint(recoverableCommits, lastSyncedGlobalCheckpoint); } else { // TODO: Asserts the starting commit is a safe commit once peer-recovery sets global checkpoint. diff --git a/server/src/test/java/org/elasticsearch/indices/recovery/PeerRecoveryTargetServiceTests.java b/server/src/test/java/org/elasticsearch/indices/recovery/PeerRecoveryTargetServiceTests.java index 18bf8c8e225dc..5fe9dd2c6fc36 100644 --- a/server/src/test/java/org/elasticsearch/indices/recovery/PeerRecoveryTargetServiceTests.java +++ b/server/src/test/java/org/elasticsearch/indices/recovery/PeerRecoveryTargetServiceTests.java @@ -215,7 +215,8 @@ public void testPrepareIndexForPeerRecovery() throws Exception { // copy with truncated translog shard = newStartedShard(false); - globalCheckpoint = populateRandomData(shard).getGlobalCheckpoint(); + SeqNoStats seqNoStats = populateRandomData(shard); + globalCheckpoint = randomFrom(UNASSIGNED_SEQ_NO, seqNoStats.getMaxSeqNo()); replica = reinitShard(shard, ShardRoutingHelper.initWithSameId(shard.routingEntry(), RecoverySource.PeerRecoverySource.INSTANCE)); String translogUUID = Translog.createEmptyTranslog(replica.shardPath().resolveTranslog(), globalCheckpoint, @@ -233,6 +234,7 @@ public void testPrepareIndexForPeerRecovery() throws Exception { } assertThat(replica.recoveryState().getTranslog().recoveredOperations(), equalTo(0)); assertThat(replica.getLastKnownGlobalCheckpoint(), equalTo(UNASSIGNED_SEQ_NO)); + assertThat(replica.recoveryState().getStage(), equalTo(RecoveryState.Stage.TRANSLOG)); closeShards(replica); } diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java index 689ecd8fad295..c89d83924fa9b 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java @@ -1032,6 +1032,9 @@ protected static void createIndex(String name, Settings settings, String mapping entity += "}"; if (settings.getAsBoolean(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true) == false) { expectSoftDeletesWarning(request, name); + } else if (settings.hasValue(IndexSettings.INDEX_TRANSLOG_RETENTION_AGE_SETTING.getKey()) || + settings.hasValue(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey())) { + expectTranslogRetentionWarning(request); } request.setJsonEntity(entity); client().performRequest(request); @@ -1066,6 +1069,20 @@ protected static void expectSoftDeletesWarning(Request request, String indexName } } + protected static void expectTranslogRetentionWarning(Request request) { + final List expectedWarnings = Collections.singletonList( + "Translog retention settings [index.translog.retention.age] " + + "and [index.translog.retention.size] are deprecated and effectively ignored. They will be removed in a future version."); + final Builder requestOptions = RequestOptions.DEFAULT.toBuilder(); + if (nodeVersions.stream().allMatch(version -> version.onOrAfter(Version.V_7_7_0))) { + requestOptions.setWarningsHandler(warnings -> warnings.equals(expectedWarnings) == false); + request.setOptions(requestOptions); + } else if (nodeVersions.stream().anyMatch(version -> version.onOrAfter(Version.V_7_7_0))) { + requestOptions.setWarningsHandler(warnings -> warnings.isEmpty() == false && warnings.equals(expectedWarnings) == false); + request.setOptions(requestOptions); + } + } + protected static Map getIndexSettings(String index) throws IOException { Request request = new Request("GET", "/" + index + "/_settings"); request.addParameter("flat_settings", "true"); diff --git a/test/framework/src/main/java/org/elasticsearch/upgrades/AbstractFullClusterRestartTestCase.java b/test/framework/src/main/java/org/elasticsearch/upgrades/AbstractFullClusterRestartTestCase.java index 19bd1c12d2846..7b003131ff1d0 100644 --- a/test/framework/src/main/java/org/elasticsearch/upgrades/AbstractFullClusterRestartTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/upgrades/AbstractFullClusterRestartTestCase.java @@ -24,10 +24,12 @@ import org.elasticsearch.common.Booleans; import org.elasticsearch.common.Strings; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.test.rest.ESRestTestCase; import org.junit.Before; import java.io.IOException; +import java.util.Map; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.equalTo; @@ -114,4 +116,22 @@ protected boolean preserveILMPoliciesUponCompletion() { protected boolean preserveSLMPoliciesUponCompletion() { return true; } + + protected void assertNoFailures(Map response) { + int failed = (int) XContentMapValues.extractValue("_shards.failed", response); + assertEquals(0, failed); + } + + protected void assertTotalHits(int expectedTotalHits, Map response) { + int actualTotalHits = extractTotalHits(response); + assertEquals(response.toString(), expectedTotalHits, actualTotalHits); + } + + protected int extractTotalHits(Map response) { + if (isRunningAgainstOldCluster() && getOldClusterVersion().before(Version.V_7_0_0)) { + return (Integer) XContentMapValues.extractValue("hits.total", response); + } else { + return (Integer) XContentMapValues.extractValue("hits.total.value", response); + } + } }