diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/WaitForSnapshotStep.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/WaitForSnapshotStep.java index bc821b05549d5..563e0f614ab8d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/WaitForSnapshotStep.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/WaitForSnapshotStep.java @@ -6,6 +6,8 @@ */ package org.elasticsearch.xpack.core.ilm; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.xcontent.ToXContentObject; @@ -19,18 +21,19 @@ /*** * A step that waits for snapshot to be taken by SLM to ensure we have backup before we delete the index. - * It will signal error if it can't get data needed to do the check (phase time from ILM and SLM metadata) - * and will only return success if execution of SLM policy took place after index entered deleted phase. + * It will signal error if it can't get data needed to do the check (action time from ILM and SLM metadata) + * and will only return success if execution of SLM policy took place after index entered the wait for snapshot action. */ public class WaitForSnapshotStep extends ClusterStateWaitStep { static final String NAME = "wait-for-snapshot"; + private static final Logger logger = LogManager.getLogger(WaitForSnapshotStep.class); private static final String MESSAGE_FIELD = "message"; private static final String POLICY_NOT_EXECUTED_MESSAGE = "waiting for policy '%s' to be executed since %s"; private static final String POLICY_NOT_FOUND_MESSAGE = "configured policy '%s' not found"; private static final String NO_INDEX_METADATA_MESSAGE = "no index metadata found for index '%s'"; - private static final String NO_PHASE_TIME_MESSAGE = "no information about ILM phase start in index metadata for index '%s'"; + private static final String NO_ACTION_TIME_MESSAGE = "no information about ILM action start in index metadata for index '%s'"; private final String policy; @@ -46,10 +49,10 @@ public Result isConditionMet(Index index, ClusterState clusterState) { throw error(NO_INDEX_METADATA_MESSAGE, index.getName()); } - Long phaseTime = LifecycleExecutionState.fromIndexMetadata(indexMetadata).getPhaseTime(); + Long actionTime = LifecycleExecutionState.fromIndexMetadata(indexMetadata).getActionTime(); - if (phaseTime == null) { - throw error(NO_PHASE_TIME_MESSAGE, index.getName()); + if (actionTime == null) { + throw error(NO_ACTION_TIME_MESSAGE, index.getName()); } SnapshotLifecycleMetadata snapMeta = clusterState.metadata().custom(SnapshotLifecycleMetadata.TYPE); @@ -57,10 +60,30 @@ public Result isConditionMet(Index index, ClusterState clusterState) { throw error(POLICY_NOT_FOUND_MESSAGE, policy); } SnapshotLifecyclePolicyMetadata snapPolicyMeta = snapMeta.getSnapshotConfigurations().get(policy); - if (snapPolicyMeta.getLastSuccess() == null || snapPolicyMeta.getLastSuccess().getTimestamp() < phaseTime) { - return new Result(false, notExecutedMessage(phaseTime)); + if (snapPolicyMeta.getLastSuccess() == null || snapPolicyMeta.getLastSuccess().getSnapshotStartTimestamp() == null || + snapPolicyMeta.getLastSuccess().getSnapshotStartTimestamp() < actionTime) { + if (snapPolicyMeta.getLastSuccess() == null) { + logger.debug("skipping ILM policy execution because there is no last snapshot success, action time: {}", actionTime); + } else if (snapPolicyMeta.getLastSuccess().getSnapshotStartTimestamp() == null) { + /* + * This is because we are running in mixed cluster mode, and the snapshot was taken on an older master, which then went + * down before this check could happen. We'll wait until a snapshot is taken on this newer master before passing this check. + */ + logger.debug("skipping ILM policy execution because no last snapshot start date, action time: {}", actionTime); + } + else { + logger.debug("skipping ILM policy execution because snapshot start time {} is before action time {}, snapshot timestamp " + + "is {}", + snapPolicyMeta.getLastSuccess().getSnapshotStartTimestamp(), + actionTime, + snapPolicyMeta.getLastSuccess().getSnapshotFinishTimestamp()); + } + return new Result(false, notExecutedMessage(actionTime)); } - + logger.debug("executing policy because snapshot start time {} is after action time {}, snapshot timestamp is {}", + snapPolicyMeta.getLastSuccess().getSnapshotStartTimestamp(), + actionTime, + snapPolicyMeta.getLastSuccess().getSnapshotFinishTimestamp()); return new Result(true, null); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/slm/SnapshotInvocationRecord.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/slm/SnapshotInvocationRecord.java index 8a3a6b91f359b..55ef99e155781 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/slm/SnapshotInvocationRecord.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/slm/SnapshotInvocationRecord.java @@ -7,16 +7,18 @@ package org.elasticsearch.xpack.core.slm; +import org.elasticsearch.Version; import org.elasticsearch.cluster.AbstractDiffable; import org.elasticsearch.cluster.Diffable; -import org.elasticsearch.common.xcontent.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ParseField; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.core.Nullable; import java.io.IOException; import java.util.Objects; @@ -29,19 +31,22 @@ public class SnapshotInvocationRecord extends AbstractDiffable { static final ParseField SNAPSHOT_NAME = new ParseField("snapshot_name"); + static final ParseField START_TIMESTAMP = new ParseField("start_time"); static final ParseField TIMESTAMP = new ParseField("time"); static final ParseField DETAILS = new ParseField("details"); private String snapshotName; - private long timestamp; + private Long snapshotStartTimestamp; + private long snapshotFinishTimestamp; private String details; public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("snapshot_policy_invocation_record", true, - a -> new SnapshotInvocationRecord((String) a[0], (long) a[1], (String) a[2])); + a -> new SnapshotInvocationRecord((String) a[0], (Long) a[1], (long) a[2], (String) a[3])); static { PARSER.declareString(ConstructingObjectParser.constructorArg(), SNAPSHOT_NAME); + PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), START_TIMESTAMP); PARSER.declareLong(ConstructingObjectParser.constructorArg(), TIMESTAMP); PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), DETAILS); } @@ -50,15 +55,21 @@ public static SnapshotInvocationRecord parse(XContentParser parser, String name) return PARSER.apply(parser, name); } - public SnapshotInvocationRecord(String snapshotName, long timestamp, String details) { + public SnapshotInvocationRecord(String snapshotName, Long snapshotStartTimestamp, long snapshotFinishTimestamp, String details) { this.snapshotName = Objects.requireNonNull(snapshotName, "snapshot name must be provided"); - this.timestamp = timestamp; + this.snapshotStartTimestamp = snapshotStartTimestamp; + this.snapshotFinishTimestamp = snapshotFinishTimestamp; this.details = details; } public SnapshotInvocationRecord(StreamInput in) throws IOException { this.snapshotName = in.readString(); - this.timestamp = in.readVLong(); + if (in.getVersion().onOrAfter(Version.V_7_15_0)) { + this.snapshotStartTimestamp = in.readOptionalVLong(); + } else { + this.snapshotStartTimestamp = null; + } + this.snapshotFinishTimestamp = in.readVLong(); this.details = in.readOptionalString(); } @@ -66,8 +77,13 @@ public String getSnapshotName() { return snapshotName; } - public long getTimestamp() { - return timestamp; + @Nullable + public Long getSnapshotStartTimestamp() { + return snapshotStartTimestamp; + } + + public long getSnapshotFinishTimestamp() { + return snapshotFinishTimestamp; } public String getDetails() { @@ -77,7 +93,10 @@ public String getDetails() { @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(snapshotName); - out.writeVLong(timestamp); + if (out.getVersion().onOrAfter(Version.V_7_15_0)) { + out.writeOptionalVLong(snapshotStartTimestamp); + } + out.writeVLong(snapshotFinishTimestamp); out.writeOptionalString(details); } @@ -86,7 +105,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); { builder.field(SNAPSHOT_NAME.getPreferredName(), snapshotName); - builder.timeField(TIMESTAMP.getPreferredName(), "time_string", timestamp); + if (snapshotStartTimestamp != null) { + builder.timeField(START_TIMESTAMP.getPreferredName(), "start_time_string", snapshotStartTimestamp); + } + builder.timeField(TIMESTAMP.getPreferredName(), "time_string", snapshotFinishTimestamp); if (Objects.nonNull(details)) { builder.field(DETAILS.getPreferredName(), details); } @@ -100,13 +122,14 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SnapshotInvocationRecord that = (SnapshotInvocationRecord) o; - return getTimestamp() == that.getTimestamp() && + return getSnapshotFinishTimestamp() == that.getSnapshotFinishTimestamp() && + Objects.equals(getSnapshotStartTimestamp(), that.getSnapshotStartTimestamp()) && Objects.equals(getSnapshotName(), that.getSnapshotName()) && Objects.equals(getDetails(), that.getDetails()); } @Override public int hashCode() { - return Objects.hash(getSnapshotName(), getTimestamp(), getDetails()); + return Objects.hash(getSnapshotName(), getSnapshotStartTimestamp(), getSnapshotFinishTimestamp(), getDetails()); } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/WaitForSnapshotStepTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/WaitForSnapshotStepTests.java index c4a1182a84ab7..eaae55d5ed41a 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/WaitForSnapshotStepTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/WaitForSnapshotStepTests.java @@ -58,7 +58,8 @@ protected WaitForSnapshotStep copyInstance(WaitForSnapshotStep instance) { public void testNoSlmPolicies() { IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLength(10)) - .putCustom(LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY, Collections.singletonMap("phase_time", Long.toString(randomLong()))) + .putCustom(LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY, + Collections.singletonMap("action_time", Long.toString(randomLong()))) .settings(settings(Version.CURRENT)) .numberOfShards(randomIntBetween(1, 5)).numberOfReplicas(randomIntBetween(0, 5)).build(); ImmutableOpenMap.Builder indices = @@ -82,7 +83,8 @@ public void testSlmPolicyNotExecuted() throws IOException { IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLength(10)) - .putCustom(LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY, Collections.singletonMap("phase_time", Long.toString(randomLong()))) + .putCustom(LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY, + Collections.singletonMap("action_time", Long.toString(randomLong()))) .settings(settings(Version.CURRENT)) .numberOfShards(randomIntBetween(1, 5)).numberOfReplicas(randomIntBetween(0, 5)).build(); ImmutableOpenMap.Builder indices = @@ -95,19 +97,36 @@ public void testSlmPolicyNotExecuted() throws IOException { } public void testSlmPolicyExecutedBeforeStep() throws IOException { + // The snapshot was started and finished before the phase time, so we do not expect the step to finish: + assertSlmPolicyExecuted(false, false); + } + + public void testSlmPolicyExecutedAfterStep() throws IOException { + // The snapshot was started and finished after the phase time, so we do expect the step to finish: + assertSlmPolicyExecuted(true, true); + } + + public void testSlmPolicyNotExecutedWhenStartIsBeforePhaseTime() throws IOException { + // The snapshot was started before the phase time and finished after, so we do expect the step to finish: + assertSlmPolicyExecuted(false, true); + } + + private void assertSlmPolicyExecuted(boolean startTimeAfterPhaseTime, boolean finishTimeAfterPhaseTime) throws IOException { long phaseTime = randomLong(); WaitForSnapshotStep instance = createRandomInstance(); SnapshotLifecyclePolicyMetadata slmPolicy = SnapshotLifecyclePolicyMetadata.builder() .setModifiedDate(randomLong()) .setPolicy(new SnapshotLifecyclePolicy("", "", "", "", null, null)) - .setLastSuccess(new SnapshotInvocationRecord("", phaseTime - 10, "")) + .setLastSuccess(new SnapshotInvocationRecord("", + phaseTime + (startTimeAfterPhaseTime ? 10 : -100), + phaseTime + (finishTimeAfterPhaseTime ? 100 : -10), "")) .build(); SnapshotLifecycleMetadata smlMetadata = new SnapshotLifecycleMetadata(Collections.singletonMap(instance.getPolicy(), slmPolicy), OperationMode.RUNNING, null); IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLength(10)) - .putCustom(LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY, Collections.singletonMap("phase_time", Long.toString(phaseTime))) + .putCustom(LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY, Collections.singletonMap("action_time", Long.toString(phaseTime))) .settings(settings(Version.CURRENT)) .numberOfShards(randomIntBetween(1, 5)).numberOfReplicas(randomIntBetween(0, 5)).build(); ImmutableOpenMap.Builder indices = @@ -115,18 +134,25 @@ public void testSlmPolicyExecutedBeforeStep() throws IOException { Metadata.Builder meta = Metadata.builder().indices(indices.build()).putCustom(SnapshotLifecycleMetadata.TYPE, smlMetadata); ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).metadata(meta).build(); ClusterStateWaitStep.Result result = instance.isConditionMet(indexMetadata.getIndex(), clusterState); - assertFalse(result.isComplete()); - assertTrue(getMessage(result).contains("to be executed")); + if (startTimeAfterPhaseTime) { + assertTrue(result.isComplete()); + assertNull(result.getInfomationContext()); + } else { + assertFalse(result.isComplete()); + assertTrue(getMessage(result).contains("to be executed")); + } } - public void testSlmPolicyExecutedAfterStep() throws IOException { + public void testNullStartTime() throws IOException { long phaseTime = randomLong(); WaitForSnapshotStep instance = createRandomInstance(); SnapshotLifecyclePolicyMetadata slmPolicy = SnapshotLifecyclePolicyMetadata.builder() .setModifiedDate(randomLong()) .setPolicy(new SnapshotLifecyclePolicy("", "", "", "", null, null)) - .setLastSuccess(new SnapshotInvocationRecord("", phaseTime + 10, "")) + .setLastSuccess(new SnapshotInvocationRecord("", + null, + phaseTime + 100, "")) .build(); SnapshotLifecycleMetadata smlMetadata = new SnapshotLifecycleMetadata(Collections.singletonMap(instance.getPolicy(), slmPolicy), OperationMode.RUNNING, null); @@ -139,9 +165,9 @@ public void testSlmPolicyExecutedAfterStep() throws IOException { ImmutableOpenMap.builder().fPut(indexMetadata.getIndex().getName(), indexMetadata); Metadata.Builder meta = Metadata.builder().indices(indices.build()).putCustom(SnapshotLifecycleMetadata.TYPE, smlMetadata); ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).metadata(meta).build(); - ClusterStateWaitStep.Result result = instance.isConditionMet(indexMetadata.getIndex(), clusterState); - assertTrue(result.isComplete()); - assertNull(result.getInfomationContext()); + IllegalStateException e = expectThrows(IllegalStateException.class, () -> instance.isConditionMet(indexMetadata.getIndex(), + clusterState)); + assertTrue(e.getMessage().contains("no information about ILM action start")); } private String getMessage(ClusterStateWaitStep.Result result) throws IOException { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/slm/SnapshotInvocationRecordTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/slm/SnapshotInvocationRecordTests.java index 130fcc66e41a3..d8a8c4bc23498 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/slm/SnapshotInvocationRecordTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/slm/SnapshotInvocationRecordTests.java @@ -37,15 +37,17 @@ protected SnapshotInvocationRecord mutateInstance(SnapshotInvocationRecord insta case 0: return new SnapshotInvocationRecord( randomValueOtherThan(instance.getSnapshotName(), () -> randomAlphaOfLengthBetween(2,10)), - instance.getTimestamp(), + instance.getSnapshotFinishTimestamp() - 100, + instance.getSnapshotFinishTimestamp(), instance.getDetails()); case 1: + long timestamp = randomValueOtherThan(instance.getSnapshotFinishTimestamp(), ESTestCase::randomNonNegativeLong); return new SnapshotInvocationRecord(instance.getSnapshotName(), - randomValueOtherThan(instance.getTimestamp(), ESTestCase::randomNonNegativeLong), + timestamp - 100, timestamp, instance.getDetails()); case 2: return new SnapshotInvocationRecord(instance.getSnapshotName(), - instance.getTimestamp(), + instance.getSnapshotFinishTimestamp() - 100, instance.getSnapshotFinishTimestamp(), randomValueOtherThan(instance.getDetails(), () -> randomAlphaOfLengthBetween(2,10))); default: throw new AssertionError("failure, got illegal switch case"); @@ -55,8 +57,18 @@ protected SnapshotInvocationRecord mutateInstance(SnapshotInvocationRecord insta public static SnapshotInvocationRecord randomSnapshotInvocationRecord() { return new SnapshotInvocationRecord( randomAlphaOfLengthBetween(5,10), + randomNonNegativeNullableLong(), randomNonNegativeLong(), randomBoolean() ? null : randomAlphaOfLengthBetween(5, 10)); } + private static Long randomNonNegativeNullableLong() { + long value = randomLong(); + if (value < 0) { + return null; + } else { + return value; + } + } + } diff --git a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java index 69d06a2fcf613..9d38b0fa689d7 100644 --- a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java +++ b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java @@ -16,15 +16,15 @@ import org.elasticsearch.client.ResponseException; import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.cluster.metadata.IndexMetadata; -import org.elasticsearch.core.CheckedRunnable; -import org.elasticsearch.core.Nullable; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.TimeValue; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.core.CheckedRunnable; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.engine.EngineConfig; import org.elasticsearch.rest.action.admin.indices.RestPutIndexTemplateAction; @@ -199,18 +199,18 @@ public void testWaitForSnapshot() throws Exception { createIndexWithSettings(client(), index, alias, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)); String slmPolicy = randomAlphaOfLengthBetween(4, 10); - createNewSingletonPolicy(client(), policy, "delete", new WaitForSnapshotAction(slmPolicy)); + final String phaseName = "delete"; + createNewSingletonPolicy(client(), policy, phaseName, new WaitForSnapshotAction(slmPolicy)); updatePolicy(client(), index, policy); + waitForPhaseTime(phaseName); assertBusy(() -> { Map indexILMState = explainIndex(client(), index); assertThat(indexILMState.get("action"), is("wait_for_snapshot")); assertThat(indexILMState.get("failed_step"), is("wait-for-snapshot")); }, slmPolicy); - String snapshotRepo = randomAlphaOfLengthBetween(4, 10); createSnapshotRepo(client(), snapshotRepo, randomBoolean()); createSlmPolicy(slmPolicy, snapshotRepo); - assertBusy(() -> { Map indexILMState = explainIndex(client(), index); //wait for step to notice that the slm policy is created and to get out of error @@ -218,18 +218,48 @@ public void testWaitForSnapshot() throws Exception { assertThat(indexILMState.get("action"), is("wait_for_snapshot")); assertThat(indexILMState.get("step"), is("wait-for-snapshot")); }, slmPolicy); + waitForPhaseTime(phaseName); // The phase time was reset because the action went into the ERROR step and back out Request request = new Request("PUT", "/_slm/policy/" + slmPolicy + "/_execute"); assertOK(client().performRequest(request)); + assertBusy(() -> { + Step.StepKey stepKey = getStepKeyForIndex(client(), index); + logger.info("step key for index {} is {}", index, stepKey); + assertThat(stepKey.getAction(), equalTo("complete")); + }, slmPolicy); + } - assertBusy(() -> assertThat(getStepKeyForIndex(client(), index).getAction(), equalTo("complete")), slmPolicy); + /* + * This test more rapidly creates a policy and then executes a snapshot, in an attempt to reproduce a timing bug where the snapshot + * time gets set to a time earlier than the policy's action's time. + */ + public void testWaitForSnapshotFast() throws Exception { + createIndexWithSettings(client(), index, alias, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)); + String slmPolicy = randomAlphaOfLengthBetween(4, 10); + final String phaseName = "delete"; + String snapshotRepo = randomAlphaOfLengthBetween(4, 10); + createSnapshotRepo(client(), snapshotRepo, randomBoolean()); + createSlmPolicy(slmPolicy, snapshotRepo); + createNewSingletonPolicy(client(), policy, phaseName, new WaitForSnapshotAction(slmPolicy)); + updatePolicy(client(), index, policy); + waitForPhaseTime(phaseName); + + Request request = new Request("PUT", "/_slm/policy/" + slmPolicy + "/_execute"); + assertOK(client().performRequest(request)); + assertBusy(() -> { + Step.StepKey stepKey = getStepKeyForIndex(client(), index); + logger.info("step key for index {} is {}", index, stepKey); + assertThat(stepKey.getAction(), equalTo("complete")); + }, slmPolicy); } public void testWaitForSnapshotSlmExecutedBefore() throws Exception { createIndexWithSettings(client(), index, alias, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)); String slmPolicy = randomAlphaOfLengthBetween(4, 10); - createNewSingletonPolicy(client(), policy, "delete", new WaitForSnapshotAction(slmPolicy)); + final String phaseName = "delete"; + createNewSingletonPolicy(client(), policy, phaseName, new WaitForSnapshotAction(slmPolicy)); String snapshotRepo = randomAlphaOfLengthBetween(4, 10); createSnapshotRepo(client(), snapshotRepo, randomBoolean()); @@ -248,6 +278,7 @@ public void testWaitForSnapshotSlmExecutedBefore() throws Exception { }, slmPolicy); updatePolicy(client(), index, policy); + waitForPhaseTime(phaseName); assertBusy(() -> { Map indexILMState = explainIndex(client(), index); @@ -268,9 +299,36 @@ public void testWaitForSnapshotSlmExecutedBefore() throws Exception { } }, slmPolicy); + assertBusy(() -> { + Step.StepKey stepKey = getStepKeyForIndex(client(), index); + logger.info("stepKey for index {} is {}", index, stepKey); + assertThat(stepKey.getAction(), equalTo("complete")); + }, slmPolicy); assertBusy(() -> assertThat(getStepKeyForIndex(client(), index).getAction(), equalTo("complete")), slmPolicy); } + /* + * This method waits until phase_time gets set in the state store for the given phase name. Otherwise we can wind up starting a snapshot + * before the ILM policy is ready. + */ + @SuppressWarnings("unchecked") + private void waitForPhaseTime(String phaseName) throws Exception { + assertBusy(() -> { + Request request = new Request("GET", "/_cluster/state/metadata/" + index); + Map response = entityAsMap(client().performRequest(request)); + Map metadata = (Map) response.get("metadata"); + Map indices = (Map) metadata.get("indices"); + Map indexMap = (Map) indices.get(index); + Map ilm = (Map) indexMap.get("ilm"); + assertNotNull(ilm); + Object phase = ilm.get("phase"); + assertEquals(phaseName, phase); + Object phase_time = ilm.get("phase_time"); + assertNotNull(phase_time); + logger.info("found phase time for {} phase: {}", phaseName, phase_time); + }); + } + public void testDelete() throws Exception { createIndexWithSettings(client(), index, alias, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)); diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycle.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycle.java index dffb171e18277..1a8322906b05f 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycle.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycle.java @@ -137,6 +137,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.function.LongSupplier; import java.util.function.Supplier; import static org.elasticsearch.xpack.core.ClientHelper.INDEX_LIFECYCLE_ORIGIN; @@ -211,8 +212,15 @@ public Collection createComponents(Client client, ClusterService cluster ilmTemplateRegistry.initialize(); ilmHistoryStore.set(new ILMHistoryStore(settings, new OriginSettingClient(client, INDEX_LIFECYCLE_ORIGIN), clusterService, threadPool)); + /* + * Here we use threadPool::absoluteTimeInMillis rather than System::currentTimeInMillis because snapshot start time is set using + * ThreadPool.absoluteTimeInMillis(). ThreadPool.absoluteTimeInMillis() returns a cached time that can be several hundred + * milliseconds behind System.currentTimeMillis(). The result is that a snapshot taken after a policy is created can have a start + * time that is before the policy's (or action's) start time if System::currentTimeInMillis is used here. + */ + LongSupplier nowSupplier = threadPool::absoluteTimeInMillis; indexLifecycleInitialisationService.set(new IndexLifecycleService(settings, client, clusterService, threadPool, - getClock(), System::currentTimeMillis, xContentRegistry, ilmHistoryStore.get(), getLicenseState())); + getClock(), nowSupplier, xContentRegistry, ilmHistoryStore.get(), getLicenseState())); components.add(indexLifecycleInitialisationService.get()); SnapshotLifecycleTemplateRegistry templateRegistry = new SnapshotLifecycleTemplateRegistry(settings, clusterService, threadPool, diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotLifecycleTask.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotLifecycleTask.java index 310584f49a52c..0bf437a21d29d 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotLifecycleTask.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotLifecycleTask.java @@ -98,13 +98,13 @@ public void onResponse(CreateSnapshotResponse createSnapshotResponse) { logger.debug("snapshot response for [{}]: {}", policyMetadata.getPolicy().getId(), Strings.toString(createSnapshotResponse)); final SnapshotInfo snapInfo = createSnapshotResponse.getSnapshotInfo(); - // Check that there are no failed shards, since the request may not entirely // fail, but may still have failures (such as in the case of an aborted snapshot) if (snapInfo.failedShards() == 0) { + long snapshotStartTime = snapInfo.startTime(); final long timestamp = Instant.now().toEpochMilli(); clusterService.submitStateUpdateTask("slm-record-success-" + policyMetadata.getPolicy().getId(), - WriteJobStatus.success(policyMetadata.getPolicy().getId(), request.snapshot(), timestamp)); + WriteJobStatus.success(policyMetadata.getPolicy().getId(), request.snapshot(), snapshotStartTime, timestamp)); historyStore.putAsync(SnapshotHistoryItem.creationSuccessRecord(timestamp, policyMetadata.getPolicy(), request.snapshot())); } else { @@ -166,22 +166,25 @@ private static class WriteJobStatus extends ClusterStateUpdateTask { private final String policyName; private final String snapshotName; - private final long timestamp; + private final long snapshotStartTime; + private final long snapshotFinishTime; private final Optional exception; - private WriteJobStatus(String policyName, String snapshotName, long timestamp, Optional exception) { + private WriteJobStatus(String policyName, String snapshotName, long snapshotStartTime, long snapshotFinishTime, + Optional exception) { this.policyName = policyName; this.snapshotName = snapshotName; this.exception = exception; - this.timestamp = timestamp; + this.snapshotStartTime = snapshotStartTime; + this.snapshotFinishTime = snapshotFinishTime; } - static WriteJobStatus success(String policyId, String snapshotName, long timestamp) { - return new WriteJobStatus(policyId, snapshotName, timestamp, Optional.empty()); + static WriteJobStatus success(String policyId, String snapshotName, long snapshotStartTime, long snapshotFinishTime) { + return new WriteJobStatus(policyId, snapshotName, snapshotStartTime, snapshotFinishTime, Optional.empty()); } static WriteJobStatus failure(String policyId, String snapshotName, long timestamp, Exception exception) { - return new WriteJobStatus(policyId, snapshotName, timestamp, Optional.of(exception)); + return new WriteJobStatus(policyId, snapshotName, timestamp, timestamp, Optional.of(exception)); } private String exceptionToString() throws IOException { @@ -220,10 +223,11 @@ public ClusterState execute(ClusterState currentState) throws Exception { if (exception.isPresent()) { stats.snapshotFailed(policyName); - newPolicyMetadata.setLastFailure(new SnapshotInvocationRecord(snapshotName, timestamp, exceptionToString())); + newPolicyMetadata.setLastFailure(new SnapshotInvocationRecord(snapshotName, null, snapshotFinishTime, + exceptionToString())); } else { stats.snapshotTaken(policyName); - newPolicyMetadata.setLastSuccess(new SnapshotInvocationRecord(snapshotName, timestamp, null)); + newPolicyMetadata.setLastSuccess(new SnapshotInvocationRecord(snapshotName, snapshotStartTime, snapshotFinishTime, null)); } snapLifecycles.put(policyName, newPolicyMetadata.build());