Skip to content

Commit

Permalink
Downsampling test fix: reorder policy and index creation. (#109787)
Browse files Browse the repository at this point in the history
This PR fixes two test failures
#103981 &
#105437 and refactors the
code a bit to make things more explicit.

**What was the issue** These tests were creating an index with a policy
before that policy was created. This could cause an issue if ILM would
run after the index was created but before the policy was created. 

When ILM runs before the policy is added, the following happen:

-  the index encounters an error the ILM state sets that the current step is `null`, which makes sense since there is no policy to retrieve a step from. 
- A `null` step does not qualify to be executed periodically, which also makes sense because probably nothing changed, so chances are the index will remain in this state.
- The test keeps waiting for something to happen, but this is not happening because no cluster state updates are coming like they would have if this was a "real" cluster. 
- Until the test tear down starts, then the index gets updates with the ILM policy but it's a bit too late.

The previous scenario is confirmed by the logging too.

```
----> The index gets created referring a policy that does not exist yet, ILM runs at least twice before the policy is there
[2024-06-12T20:14:28,857][....] [index-sanohmhwxl] creating index, ......
[2024-06-12T20:14:28,870][....] [index-sanohmhwxl] retrieved current step key: null
[2024-06-12T20:14:28,871][....] unable to retrieve policy [policy-tohpA] for index [index-sanohmhwxl], recording this in step_info for this index java.lang.IllegalArgumentException: policy [policy-tohpA] does not exist

-----> Only now the policy is added
[2024-06-12T20:14:29,024][....] adding index lifecycle policy [policy-tohpA]

-----> ILM is running periodically but because the current step is null it ignores it
[2024-06-12T20:15:23,791][....] job triggered: ilm, 1718223323790, 1718223323790
[2024-06-12T20:15:23,791][....] retrieved current step key: null
[2024-06-12T20:15:23,791][....] maybe running periodic step (InitializePolicyContextStep) with current step {"phase":"new","action":"init","name":"init"}
```

This can also be locally reproduced by adding a 5s thread sleep before
adding the policy. 

**The fix** Adding a non existing policy to an index is a not a
supported path. For this reason, we refactored the test to reflect a
more realistic scenario. 

- We add the policy as an argument in `private void createIndex(String index, String alias, String policy, boolean isTimeSeries)`. This way it's clear that a policy could be added.
- We created the policy before adding the index, it does not appear that adding the policy later is crucial for the test, so simplifying it sounded like a good idea.
- Simplified `testRollupIndexInTheHotPhaseWithoutRollover` that ensures that a downsampling action cannot be added in the hot phase without rollover. An index is not necessary for this test, so again simplifying it makes the purpose of the test more clear.

Fixes: #103981 Fixes:
#105437
  • Loading branch information
gmarouli authored Jun 17, 2024
1 parent 820fc5d commit 05f80ef
Showing 1 changed file with 18 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.time.FormatNames;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.rest.action.admin.indices.RestPutIndexTemplateAction;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.test.rest.ESRestTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
Expand Down Expand Up @@ -54,7 +54,6 @@
import static org.elasticsearch.xpack.TimeSeriesRestDriver.getStepKeyForIndex;
import static org.elasticsearch.xpack.TimeSeriesRestDriver.index;
import static org.elasticsearch.xpack.TimeSeriesRestDriver.rolloverMaxOneDocCondition;
import static org.elasticsearch.xpack.TimeSeriesRestDriver.updatePolicy;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
Expand Down Expand Up @@ -158,11 +157,13 @@ public void updatePollInterval() throws IOException {
updateClusterSettings(client(), Settings.builder().put("indices.lifecycle.poll_interval", "5s").build());
}

private void createIndex(String index, String alias, boolean isTimeSeries) throws IOException {
private void createIndex(String index, String alias, @Nullable String policy, boolean isTimeSeries) throws IOException {
Settings.Builder settings = Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put(LifecycleSettings.LIFECYCLE_NAME, policy);
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0);
if (policy != null) {
settings.put(LifecycleSettings.LIFECYCLE_NAME, policy);
}

if (isTimeSeries) {
settings.put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES)
Expand Down Expand Up @@ -191,15 +192,15 @@ private void createIndex(String index, String alias, boolean isTimeSeries) throw
createIndexWithSettings(client(), index, alias, settings, mapping);
}

@TestLogging(value = "org.elasticsearch.xpack.ilm:TRACE", reason = "https://github.com/elastic/elasticsearch/issues/105437")
public void testRollupIndex() throws Exception {
createIndex(index, alias, true);
index(client(), index, true, null, "@timestamp", "2020-01-01T05:10:00Z", "volume", 11.0, "metricset", randomAlphaOfLength(5));

// Create the ILM policy
String phaseName = randomFrom("warm", "cold");
DateHistogramInterval fixedInterval = ConfigTestHelpers.randomInterval();
createNewSingletonPolicy(client(), policy, phaseName, new DownsampleAction(fixedInterval, DownsampleAction.DEFAULT_WAIT_TIMEOUT));
updatePolicy(client(), index, policy);

// Create a time series index managed by the policy
createIndex(index, alias, policy, true);
index(client(), index, true, null, "@timestamp", "2020-01-01T05:10:00Z", "volume", 11.0, "metricset", randomAlphaOfLength(5));

String rollupIndex = waitAndGetRollupIndexName(client(), index, fixedInterval);
assertNotNull("Cannot retrieve rollup index name", rollupIndex);
Expand All @@ -222,10 +223,7 @@ public void testRollupIndex() throws Exception {
);
}

public void testRollupIndexInTheHotPhase() throws Exception {
createIndex(index, alias, true);
index(client(), index, true, null, "@timestamp", "2020-01-01T05:10:00Z", "volume", 11.0, "metricset", randomAlphaOfLength(5));

public void testRollupIndexInTheHotPhaseWithoutRollover() {
ResponseException e = expectThrows(
ResponseException.class,
() -> createNewSingletonPolicy(
Expand Down Expand Up @@ -274,7 +272,7 @@ public void testRollupIndexInTheHotPhaseAfterRollover() throws Exception {
client().performRequest(createTemplateRequest);

// then create the index and index a document to trigger rollover
createIndex(originalIndex, alias, true);
createIndex(originalIndex, alias, policy, true);
index(
client(),
originalIndex,
Expand Down Expand Up @@ -396,15 +394,15 @@ public void testILMWaitsForTimeSeriesEndTimeToLapse() throws Exception {
}, 30, TimeUnit.SECONDS);
}

@TestLogging(value = "org.elasticsearch.xpack.ilm:TRACE", reason = "https://github.com/elastic/elasticsearch/issues/103981")
public void testRollupNonTSIndex() throws Exception {
createIndex(index, alias, false);
index(client(), index, true, null, "@timestamp", "2020-01-01T05:10:00Z", "volume", 11.0, "metricset", randomAlphaOfLength(5));

// Create the ILM policy
String phaseName = randomFrom("warm", "cold");
DateHistogramInterval fixedInterval = ConfigTestHelpers.randomInterval();
createNewSingletonPolicy(client(), policy, phaseName, new DownsampleAction(fixedInterval, DownsampleAction.DEFAULT_WAIT_TIMEOUT));
updatePolicy(client(), index, policy);

// Create a non TSDB managed index
createIndex(index, alias, policy, false);
index(client(), index, true, null, "@timestamp", "2020-01-01T05:10:00Z", "volume", 11.0, "metricset", randomAlphaOfLength(5));

try {
assertBusy(
Expand Down

0 comments on commit 05f80ef

Please sign in to comment.