Skip to content

Commit

Permalink
ILM: Add validation of the number_of_shards parameter in Shrink Actio…
Browse files Browse the repository at this point in the history
…n of ILM (elastic#74219)

Add validation of the number_of_shards parameter in Shrink Action of ILM
  • Loading branch information
gaobinlong authored Aug 16, 2021
1 parent 5c53a66 commit 58feb4e
Show file tree
Hide file tree
Showing 7 changed files with 255 additions and 99 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -621,9 +621,6 @@ public void testRetryPolicy() throws Exception {
.put("index.lifecycle.name", "my_policy")
.build());
client.indices().create(createIndexRequest, RequestOptions.DEFAULT);
assertBusy(() -> assertNotNull(client.indexLifecycle()
.explainLifecycle(new ExplainLifecycleRequest("my_index"), RequestOptions.DEFAULT)
.getIndexResponses().get("my_index").getFailedStep()), 30, TimeUnit.SECONDS);
}

// tag::ilm-retry-lifecycle-policy-request
Expand All @@ -644,8 +641,8 @@ public void testRetryPolicy() throws Exception {

assertTrue(acknowledged);
} catch (ElasticsearchException e) {
// the retry API might fail as the shrink action steps are retryable (so if the retry API reaches ES when ILM is retrying the
// failed `shrink` step, the retry API will fail)
// the retry API might fail as the shrink action steps are retryable (ILM will stuck in the `check-target-shards-count` step
// with no failure, the retry API will fail)
// assert that's the exception we encountered (we want to test to fail if there is an actual error with the retry api)
assertThat(e.getMessage(), containsStringIgnoringCase("reason=cannot retry an action for an index [my_index] that has not " +
"encountered an error when running a Lifecycle Policy"));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.core.ilm;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.index.Index;
import org.elasticsearch.xpack.core.ilm.step.info.SingleMessageFieldInfo;

import java.util.Locale;

/**
* This step checks whether the new shrunken index's shards count is a factor of the source index's shards count.
*/
public class CheckTargetShardsCountStep extends ClusterStateWaitStep {

public static final String NAME = "check-target-shards-count";

private final Integer numberOfShards;

private static final Logger logger = LogManager.getLogger(CheckTargetShardsCountStep.class);

CheckTargetShardsCountStep(StepKey key, StepKey nextStepKey, Integer numberOfShards) {
super(key, nextStepKey);
this.numberOfShards = numberOfShards;
}

@Override
public boolean isRetryable() {
return true;
}

public Integer getNumberOfShards() {
return numberOfShards;
}

@Override
public Result isConditionMet(Index index, ClusterState clusterState) {
IndexMetadata indexMetadata = clusterState.metadata().index(index);
if (indexMetadata == null) {
// Index must have been since deleted, ignore it
logger.debug("[{}] lifecycle action for index [{}] executed but index no longer exists",
getKey().getAction(), index.getName());
return new Result(false, null);
}
String indexName = indexMetadata.getIndex().getName();
if (numberOfShards != null) {
int sourceNumberOfShards = indexMetadata.getNumberOfShards();
if (sourceNumberOfShards % numberOfShards != 0) {
String policyName = indexMetadata.getSettings().get(LifecycleSettings.LIFECYCLE_NAME);
String errorMessage = String.format(Locale.ROOT, "lifecycle action of policy [%s] for index [%s] cannot make progress " +
"because the target shards count [%d] must be a factor of the source index's shards count [%d]",
policyName, indexName, numberOfShards, sourceNumberOfShards);
logger.debug(errorMessage);
return new Result(false, new SingleMessageFieldInfo(errorMessage));
}
}

return new Result(true, null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ public List<Step> toSteps(Client client, String phase, Step.StepKey nextStepKey)
StepKey checkNotWriteIndex = new StepKey(phase, NAME, CheckNotDataStreamWriteIndexStep.NAME);
StepKey waitForNoFollowerStepKey = new StepKey(phase, NAME, WaitForNoFollowersStep.NAME);
StepKey readOnlyKey = new StepKey(phase, NAME, ReadOnlyAction.NAME);
StepKey checkTargetShardsCountKey = new StepKey(phase, NAME, CheckTargetShardsCountStep.NAME);
StepKey cleanupShrinkIndexKey = new StepKey(phase, NAME, CleanupShrinkIndexStep.NAME);
StepKey generateShrinkIndexNameKey = new StepKey(phase, NAME, GenerateUniqueIndexNameStep.NAME);
StepKey setSingleNodeKey = new StepKey(phase, NAME, SetSingleNodeAllocateStep.NAME);
Expand Down Expand Up @@ -167,7 +168,9 @@ public List<Step> toSteps(Client client, String phase, Step.StepKey nextStepKey)
CheckNotDataStreamWriteIndexStep checkNotWriteIndexStep = new CheckNotDataStreamWriteIndexStep(checkNotWriteIndex,
waitForNoFollowerStepKey);
WaitForNoFollowersStep waitForNoFollowersStep = new WaitForNoFollowersStep(waitForNoFollowerStepKey, readOnlyKey, client);
ReadOnlyStep readOnlyStep = new ReadOnlyStep(readOnlyKey, cleanupShrinkIndexKey, client);
ReadOnlyStep readOnlyStep = new ReadOnlyStep(readOnlyKey, checkTargetShardsCountKey, client);
CheckTargetShardsCountStep checkTargetShardsCountStep = new CheckTargetShardsCountStep(checkTargetShardsCountKey,
cleanupShrinkIndexKey, numberOfShards);
// we generate a unique shrink index name but we also retry if the allocation of the shrunk index is not possible, so we want to
// delete the "previously generated" shrink index (this is a no-op if it's the first run of the action and he haven't generated a
// shrink index name)
Expand Down Expand Up @@ -211,9 +214,9 @@ public List<Step> toSteps(Client client, String phase, Step.StepKey nextStepKey)
DeleteStep deleteSourceIndexStep = new DeleteStep(deleteIndexKey, isShrunkIndexKey, client);
ShrunkenIndexCheckStep waitOnShrinkTakeover = new ShrunkenIndexCheckStep(isShrunkIndexKey, nextStepKey);
return Arrays.asList(conditionalSkipShrinkStep, checkNotWriteIndexStep, waitForNoFollowersStep, readOnlyStep,
cleanupShrinkIndexStep, generateUniqueIndexNameStep, setSingleNodeStep, checkShrinkReadyStep, shrink, allocated,
copyMetadata, isDataStreamBranchingStep, aliasSwapAndDelete, waitOnShrinkTakeover, replaceDataStreamBackingIndex,
deleteSourceIndexStep);
checkTargetShardsCountStep, cleanupShrinkIndexStep, generateUniqueIndexNameStep, setSingleNodeStep, checkShrinkReadyStep,
shrink, allocated, copyMetadata, isDataStreamBranchingStep, aliasSwapAndDelete, waitOnShrinkTakeover,
replaceDataStreamBackingIndex, deleteSourceIndexStep);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.core.ilm;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.xpack.core.ilm.Step.StepKey;
import org.elasticsearch.xpack.core.ilm.step.info.SingleMessageFieldInfo;

import static org.hamcrest.Matchers.is;

public class CheckTargetShardsCountStepTests extends AbstractStepTestCase<CheckTargetShardsCountStep> {

@Override
protected CheckTargetShardsCountStep createRandomInstance() {
return new CheckTargetShardsCountStep(randomStepKey(), randomStepKey(), null);
}

@Override
protected CheckTargetShardsCountStep mutateInstance(CheckTargetShardsCountStep instance) {
StepKey key = instance.getKey();
StepKey nextKey = instance.getNextStepKey();

switch (between(0, 1)) {
case 0:
key = new StepKey(key.getPhase(), key.getAction(), key.getName() + randomAlphaOfLength(5));
break;
case 1:
nextKey = new StepKey(key.getPhase(), key.getAction(), key.getName() + randomAlphaOfLength(5));
break;
default:
throw new AssertionError("Illegal randomisation branch");
}

return new CheckTargetShardsCountStep(key, nextKey, null);
}

@Override
protected CheckTargetShardsCountStep copyInstance(CheckTargetShardsCountStep instance) {
return new CheckTargetShardsCountStep(instance.getKey(), instance.getNextStepKey(), instance.getNumberOfShards());
}

public void testStepCompleteIfTargetShardsCountIsValid() {
String policyName = "test-ilm-policy";
IndexMetadata indexMetadata =
IndexMetadata.builder(randomAlphaOfLength(10)).settings(settings(Version.CURRENT)
.put(LifecycleSettings.LIFECYCLE_NAME, policyName))
.numberOfShards(10).numberOfReplicas(randomIntBetween(0, 5)).build();

ClusterState clusterState = ClusterState.builder(emptyClusterState()).metadata(
Metadata.builder().put(indexMetadata, true).build()).build();

CheckTargetShardsCountStep checkTargetShardsCountStep = new CheckTargetShardsCountStep(randomStepKey(), randomStepKey(), 2);

ClusterStateWaitStep.Result result = checkTargetShardsCountStep.isConditionMet(indexMetadata.getIndex(), clusterState);
assertThat(result.isComplete(), is(true));
}

public void testStepIncompleteIfTargetShardsCountNotValid() {
String indexName = randomAlphaOfLength(10);
String policyName = "test-ilm-policy";
IndexMetadata indexMetadata =
IndexMetadata.builder(indexName).settings(settings(Version.CURRENT).put(LifecycleSettings.LIFECYCLE_NAME, policyName))
.numberOfShards(10).numberOfReplicas(randomIntBetween(0, 5)).build();

ClusterState clusterState = ClusterState.builder(emptyClusterState()).metadata(
Metadata.builder().put(indexMetadata, true).build()).build();

CheckTargetShardsCountStep checkTargetShardsCountStep = new CheckTargetShardsCountStep(randomStepKey(), randomStepKey(), 3);

ClusterStateWaitStep.Result result = checkTargetShardsCountStep.isConditionMet(indexMetadata.getIndex(), clusterState);
assertThat(result.isComplete(), is(false));
SingleMessageFieldInfo info = (SingleMessageFieldInfo) result.getInfomationContext();
assertThat(info.getMessage(), is("lifecycle action of policy [" + policyName + "] for index [" + indexName +
"] cannot make progress because the target shards count [3] must be a factor of the source index's shards count [10]"));
}
}
Loading

0 comments on commit 58feb4e

Please sign in to comment.