Skip to content

Commit

Permalink
[7.x][ML] More advanced model snapshot retention options (#56194)
Browse files Browse the repository at this point in the history
This PR implements the following changes to make ML model snapshot
retention more flexible in advance of adding a UI for the feature in
an upcoming release.

- The default for `model_snapshot_retention_days` for new jobs is now
  10 instead of 1
- There is a new job setting, `daily_model_snapshot_retention_after_days`,
  that defaults to 1 for new jobs and `model_snapshot_retention_days`
  for pre-7.8 jobs
- For days that are older than `model_snapshot_retention_days`, all
  model snapshots are deleted as before
- For days that are in between `daily_model_snapshot_retention_after_days`
  and `model_snapshot_retention_days` all but the first model snapshot
  for that day are deleted
- The `retain` setting of model snapshots is still respected to allow
  selected model snapshots to be retained indefinitely

Backport of #56125
  • Loading branch information
droberts195 authored May 5, 2020
1 parent 4dfdd46 commit 7aa0daa
Show file tree
Hide file tree
Showing 28 changed files with 593 additions and 99 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2549,10 +2549,12 @@ private static Job buildJobForExpiredDataTests(String jobId) {
.setFunction("count")
.setDetectorDescription(randomAlphaOfLength(10))
.build();
AnalysisConfig.Builder configBuilder = new AnalysisConfig.Builder(Arrays.asList(detector));
AnalysisConfig.Builder configBuilder = new AnalysisConfig.Builder(Collections.singletonList(detector));
//should not be random, see:https://github.com/elastic/ml-cpp/issues/208
configBuilder.setBucketSpan(new TimeValue(1, TimeUnit.HOURS));
builder.setAnalysisConfig(configBuilder);
builder.setModelSnapshotRetentionDays(1L);
builder.setDailyModelSnapshotRetentionAfterDays(1L);

DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setTimeFormat(DataDescription.EPOCH_MS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,17 @@ public static Job.Builder createRandomizedJobBuilder() {
if (randomBoolean()) {
builder.setBackgroundPersistInterval(TimeValue.timeValueHours(randomIntBetween(1, 24)));
}
Long modelSnapshotRetentionDays = null;
if (randomBoolean()) {
builder.setModelSnapshotRetentionDays(randomNonNegativeLong());
modelSnapshotRetentionDays = randomNonNegativeLong();
builder.setModelSnapshotRetentionDays(modelSnapshotRetentionDays);
}
if (randomBoolean()) {
builder.setDailyModelSnapshotRetentionAfterDays(randomNonNegativeLong());
if (modelSnapshotRetentionDays != null) {
builder.setDailyModelSnapshotRetentionAfterDays(randomLongBetween(0, modelSnapshotRetentionDays));
} else {
builder.setDailyModelSnapshotRetentionAfterDays(randomNonNegativeLong());
}
}
if (randomBoolean()) {
builder.setResultsRetentionDays(randomNonNegativeLong());
Expand Down
3 changes: 2 additions & 1 deletion docs/reference/ml/anomaly-detection/apis/get-job.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ The API returns the following results:
"model_plot_config" : {
"enabled" : true
},
"model_snapshot_retention_days" : 1,
"model_snapshot_retention_days" : 10,
"daily_model_snapshot_retention_after_days" : 1,
"custom_settings" : {
"created_by" : "ml-module-sample",
...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ This is a possible response:
},
"model_memory_limit" : "1gb",
"categorization_examples_limit" : 4,
"model_snapshot_retention_days" : 1
"model_snapshot_retention_days" : 10,
"daily_model_snapshot_retention_after_days" : 1
},
"datafeeds" : {
"scroll_size" : 1000
Expand Down
7 changes: 6 additions & 1 deletion docs/reference/ml/anomaly-detection/apis/put-job.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,10 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=custom-settings]
include::{docdir}/ml/ml-shared.asciidoc[tag=data-description]
//End data_description

`daily_model_snapshot_retention_after_days`::
(Optional, long)
include::{docdir}/ml/ml-shared.asciidoc[tag=daily-model-snapshot-retention-after-days]

`description`::
(Optional, string) A description of the job.

Expand Down Expand Up @@ -320,7 +324,8 @@ When the job is created, you receive the following results:
"time_field" : "timestamp",
"time_format" : "epoch_ms"
},
"model_snapshot_retention_days" : 1,
"model_snapshot_retention_days" : 10,
"daily_model_snapshot_retention_after_days" : 1,
"results_index_name" : "shared",
"allow_lazy_open" : false
}
Expand Down
4 changes: 4 additions & 0 deletions docs/reference/ml/anomaly-detection/apis/update-job.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ close the job, then reopen the job and restart the {dfeed} for the changes to ta
(object)
include::{docdir}/ml/ml-shared.asciidoc[tag=custom-settings]

`daily_model_snapshot_retention_after_days`::
(long)
include::{docdir}/ml/ml-shared.asciidoc[tag=daily-model-snapshot-retention-after-days]

`description`::
(string) A description of the job.

Expand Down
16 changes: 14 additions & 2 deletions docs/reference/ml/ml-shared.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,18 @@ example, it can contain custom URL information as shown in
{ml-docs}/ml-configuring-url.html[Adding custom URLs to {ml} results].
end::custom-settings[]

tag::daily-model-snapshot-retention-after-days[]
Advanced configuration option. Specifies a number of days between 0 and the
value of `model_snapshot_retention_days`. After this period of time, only the first
model snapshot per day is retained for this job. Age is calculated relative to
the timestamp of the newest model snapshot. For new jobs, the default value is
`1`, which means that all snapshots are retained for one day. Older snapshots
are thinned out such that only one per day is retained. For jobs that were
created before this setting was available, the default value matches the
`model_snapshot_retention_days` value, which preserves the original behavior
and no thinning out of model snapshots occurs.
end::daily-model-snapshot-retention-after-days[]

tag::data-description[]
The data description defines the format of the input data when you send data to
the job by using the <<ml-post-data,post data>> API. Note that when configure
Expand Down Expand Up @@ -992,8 +1004,8 @@ end::model-plot-config-terms[]
tag::model-snapshot-retention-days[]
Advanced configuration option. The period of time (in days) that model snapshots
are retained. Age is calculated relative to the timestamp of the newest model
snapshot. The default value is `1`, which means snapshots that are one day
(twenty-four hours) older than the newest snapshot are deleted.
snapshot. The default value is `10`, which means snapshots that are ten days
older than the newest snapshot are deleted.
end::model-snapshot-retention-days[]

tag::model-timestamp[]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContentO
*/
public static final ByteSizeValue PROCESS_MEMORY_OVERHEAD = new ByteSizeValue(10, ByteSizeUnit.MB);

public static final long DEFAULT_MODEL_SNAPSHOT_RETENTION_DAYS = 1;
public static final long DEFAULT_MODEL_SNAPSHOT_RETENTION_DAYS = 10;
public static final long DEFAULT_DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS = 1;

private static ObjectParser<Builder, Void> createParser(boolean ignoreUnknownFields) {
ObjectParser<Builder, Void> parser = new ObjectParser<>("job_details", ignoreUnknownFields, Builder::new);
Expand Down Expand Up @@ -858,6 +859,10 @@ public Builder setModelSnapshotRetentionDays(Long modelSnapshotRetentionDays) {
return this;
}

public Long getModelSnapshotRetentionDays() {
return modelSnapshotRetentionDays;
}

public Builder setDailyModelSnapshotRetentionAfterDays(Long dailyModelSnapshotRetentionAfterDays) {
this.dailyModelSnapshotRetentionAfterDays = dailyModelSnapshotRetentionAfterDays;
return this;
Expand Down Expand Up @@ -1105,9 +1110,6 @@ public void validateInputFields() {

checkValidBackgroundPersistInterval();
checkValueNotLessThan(0, RENORMALIZATION_WINDOW_DAYS.getPreferredName(), renormalizationWindowDays);
checkValueNotLessThan(0, MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(), modelSnapshotRetentionDays);
checkValueNotLessThan(0, DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS.getPreferredName(),
dailyModelSnapshotRetentionAfterDays);
checkValueNotLessThan(0, RESULTS_RETENTION_DAYS.getPreferredName(), resultsRetentionDays);

if (!MlStrings.isValidId(id)) {
Expand All @@ -1117,6 +1119,8 @@ public void validateInputFields() {
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_ID_TOO_LONG, MlStrings.ID_LENGTH_LIMIT));
}

validateModelSnapshotRetentionSettings();

validateGroups();

// Results index name not specified in user input means use the default, so is acceptable in this validation
Expand All @@ -1138,6 +1142,37 @@ public void validateAnalysisLimitsAndSetDefaults(@Nullable ByteSizeValue maxMode
AnalysisLimits.DEFAULT_MODEL_MEMORY_LIMIT_MB);
}

/**
* This is meant to be called when a new job is created.
* It sets {@link #dailyModelSnapshotRetentionAfterDays} to the default value if it is not set and the default makes sense.
*/
public void validateModelSnapshotRetentionSettingsAndSetDefaults() {
validateModelSnapshotRetentionSettings();
if (dailyModelSnapshotRetentionAfterDays == null &&
modelSnapshotRetentionDays != null &&
modelSnapshotRetentionDays > DEFAULT_DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS) {
dailyModelSnapshotRetentionAfterDays = DEFAULT_DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS;
}
}

/**
* Validates that {@link #modelSnapshotRetentionDays} and {@link #dailyModelSnapshotRetentionAfterDays} make sense,
* both individually and in combination.
*/
public void validateModelSnapshotRetentionSettings() {

checkValueNotLessThan(0, MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(), modelSnapshotRetentionDays);
checkValueNotLessThan(0, DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS.getPreferredName(),
dailyModelSnapshotRetentionAfterDays);

if (modelSnapshotRetentionDays != null &&
dailyModelSnapshotRetentionAfterDays != null &&
dailyModelSnapshotRetentionAfterDays > modelSnapshotRetentionDays) {
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_MODEL_SNAPSHOT_RETENTION_SETTINGS_INCONSISTENT,
dailyModelSnapshotRetentionAfterDays, modelSnapshotRetentionDays));
}
}

private void validateGroups() {
for (String group : this.groups) {
if (MlStrings.isValidId(group) == false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.elasticsearch.xpack.core.ml.job.messages;

import org.elasticsearch.xpack.core.ml.MachineLearningField;
import org.elasticsearch.xpack.core.ml.job.config.Job;

import java.text.MessageFormat;
import java.util.Locale;
Expand Down Expand Up @@ -212,6 +213,9 @@ public final class Messages {
"This job would cause a mapping clash with existing field [{0}] - avoid the clash by assigning a dedicated results index";
public static final String JOB_CONFIG_TIME_FIELD_NOT_ALLOWED_IN_ANALYSIS_CONFIG =
"data_description.time_field may not be used in the analysis_config";
public static final String JOB_CONFIG_MODEL_SNAPSHOT_RETENTION_SETTINGS_INCONSISTENT =
"The value of '" + Job.DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS + "' [{0}] cannot be greater than '" +
Job.MODEL_SNAPSHOT_RETENTION_DAYS + "' [{1}]";

public static final String JOB_AND_GROUP_NAMES_MUST_BE_UNIQUE =
"job and group names must be unique but job [{0}] and group [{0}] have the same name";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ public final class ReservedFieldNames {
Job.RENORMALIZATION_WINDOW_DAYS.getPreferredName(),
Job.BACKGROUND_PERSIST_INTERVAL.getPreferredName(),
Job.MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(),
Job.DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS.getPreferredName(),
Job.RESULTS_RETENTION_DAYS.getPreferredName(),
Job.MODEL_SNAPSHOT_ID.getPreferredName(),
Job.MODEL_SNAPSHOT_MIN_VERSION.getPreferredName(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@
"type" : "object",
"enabled" : false
},
"daily_model_snapshot_retention_after_days" : {
"type" : "long"
},
"data_description" : {
"properties" : {
"field_delimiter" : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ public void testConstructor_GivenEmptyJobConfiguration() {
assertNull(job.getModelPlotConfig());
assertNull(job.getRenormalizationWindowDays());
assertNull(job.getBackgroundPersistInterval());
assertThat(job.getModelSnapshotRetentionDays(), equalTo(1L));
assertThat(job.getModelSnapshotRetentionDays(), equalTo(10L));
assertNull(job.getDailyModelSnapshotRetentionAfterDays());
assertNull(job.getResultsRetentionDays());
assertNotNull(job.allInputFields());
Expand Down Expand Up @@ -168,7 +168,7 @@ public void testEquals_GivenDifferentIds() {
Job job1 = builder.build();
builder.setId("bar");
Job job2 = builder.build();
assertFalse(job1.equals(job2));
assertNotEquals(job1, job2);
}

public void testEquals_GivenDifferentRenormalizationWindowDays() {
Expand All @@ -183,7 +183,7 @@ public void testEquals_GivenDifferentRenormalizationWindowDays() {
jobDetails2.setRenormalizationWindowDays(4L);
jobDetails2.setAnalysisConfig(createAnalysisConfig());
jobDetails2.setCreateTime(date);
assertFalse(jobDetails1.build().equals(jobDetails2.build()));
assertNotEquals(jobDetails1.build(), jobDetails2.build());
}

public void testEquals_GivenDifferentBackgroundPersistInterval() {
Expand All @@ -198,7 +198,7 @@ public void testEquals_GivenDifferentBackgroundPersistInterval() {
jobDetails2.setBackgroundPersistInterval(TimeValue.timeValueSeconds(8000L));
jobDetails2.setAnalysisConfig(createAnalysisConfig());
jobDetails2.setCreateTime(date);
assertFalse(jobDetails1.build().equals(jobDetails2.build()));
assertNotEquals(jobDetails1.build(), jobDetails2.build());
}

public void testEquals_GivenDifferentModelSnapshotRetentionDays() {
Expand All @@ -213,7 +213,7 @@ public void testEquals_GivenDifferentModelSnapshotRetentionDays() {
jobDetails2.setModelSnapshotRetentionDays(8L);
jobDetails2.setAnalysisConfig(createAnalysisConfig());
jobDetails2.setCreateTime(date);
assertFalse(jobDetails1.build().equals(jobDetails2.build()));
assertNotEquals(jobDetails1.build(), jobDetails2.build());
}

public void testEquals_GivenDifferentResultsRetentionDays() {
Expand All @@ -228,7 +228,7 @@ public void testEquals_GivenDifferentResultsRetentionDays() {
jobDetails2.setResultsRetentionDays(4L);
jobDetails2.setAnalysisConfig(createAnalysisConfig());
jobDetails2.setCreateTime(date);
assertFalse(jobDetails1.build().equals(jobDetails2.build()));
assertNotEquals(jobDetails1.build(), jobDetails2.build());
}

public void testEquals_GivenDifferentCustomSettings() {
Expand All @@ -240,7 +240,7 @@ public void testEquals_GivenDifferentCustomSettings() {
Map<String, Object> customSettings2 = new HashMap<>();
customSettings2.put("key2", "value2");
jobDetails2.setCustomSettings(customSettings2);
assertFalse(jobDetails1.build().equals(jobDetails2.build()));
assertNotEquals(jobDetails1.build(), jobDetails2.build());
}

// JobConfigurationTests:
Expand Down Expand Up @@ -397,6 +397,30 @@ public void testVerify_GivenNegativeModelSnapshotRetentionDays() {
assertEquals(errorMessage, e.getMessage());
}

public void testVerify_GivenNegativeDailyModelSnapshotRetentionAfterDays() {
String errorMessage =
Messages.getMessage(Messages.JOB_CONFIG_FIELD_VALUE_TOO_LOW, "daily_model_snapshot_retention_after_days", 0, -1);
Job.Builder builder = buildJobBuilder("foo");
builder.setDailyModelSnapshotRetentionAfterDays(-1L);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, builder::build);

assertEquals(errorMessage, e.getMessage());
}

public void testVerify_GivenInconsistentModelSnapshotRetentionSettings() {
long dailyModelSnapshotRetentionAfterDays = randomLongBetween(1, Long.MAX_VALUE);
long modelSnapshotRetentionDays = randomLongBetween(0, dailyModelSnapshotRetentionAfterDays - 1);
String errorMessage =
Messages.getMessage(Messages.JOB_CONFIG_MODEL_SNAPSHOT_RETENTION_SETTINGS_INCONSISTENT,
dailyModelSnapshotRetentionAfterDays, modelSnapshotRetentionDays);
Job.Builder builder = buildJobBuilder("foo");
builder.setDailyModelSnapshotRetentionAfterDays(dailyModelSnapshotRetentionAfterDays);
builder.setModelSnapshotRetentionDays(modelSnapshotRetentionDays);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, builder::build);

assertEquals(errorMessage, e.getMessage());
}

public void testVerify_GivenLowBackgroundPersistInterval() {
String errorMessage = Messages.getMessage(Messages.JOB_CONFIG_FIELD_VALUE_TOO_LOW, "background_persist_interval", 3600, 3599);
Job.Builder builder = buildJobBuilder("foo");
Expand Down Expand Up @@ -628,7 +652,11 @@ public static Job createRandomizedJob() {
builder.setModelSnapshotRetentionDays(randomNonNegativeLong());
}
if (randomBoolean()) {
builder.setDailyModelSnapshotRetentionAfterDays(randomNonNegativeLong());
if (builder.getModelSnapshotRetentionDays() != null) {
builder.setDailyModelSnapshotRetentionAfterDays(randomLongBetween(0, builder.getModelSnapshotRetentionDays()));
} else {
builder.setDailyModelSnapshotRetentionAfterDays(randomNonNegativeLong());
}
}
if (randomBoolean()) {
builder.setResultsRetentionDays(randomNonNegativeLong());
Expand Down
Loading

0 comments on commit 7aa0daa

Please sign in to comment.