Skip to content

Commit

Permalink
[7.x][ML] Remove model snapshot legacy doc ids (elastic#62434)
Browse files Browse the repository at this point in the history
Removes methods that were no longer used regarding version 5.4 doc ids of ModelState.

Also adds clean up of 5.4 model state and quantile docs in the daily maintenance.

Backport of elastic#62434
  • Loading branch information
dimitris-athanasiou committed Sep 17, 2020
1 parent f5c28e2 commit f248e82
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.core.common.time.TimeUtils;
import org.elasticsearch.xpack.core.ml.job.config.Job;

import java.io.IOException;
import java.io.InputStream;
Expand Down Expand Up @@ -287,18 +287,6 @@ public List<String> stateDocumentIds() {
return stateDocumentIds;
}

/**
* This is how the IDs were formed in v5.4
*/
public List<String> legacyStateDocumentIds() {
List<String> stateDocumentIds = new ArrayList<>(snapshotDocCount);
// The state documents count suffices are 1-based
for (int i = 1; i <= snapshotDocCount; i++) {
stateDocumentIds.add(ModelState.v54DocumentId(jobId, snapshotId, i));
}
return stateDocumentIds;
}

public static String documentIdPrefix(String jobId) {
return jobId + "_" + TYPE + "_";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,48 @@
package org.elasticsearch.xpack.core.ml.job.process.autodetect.state;


import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* The model state does not need to be understood on the Java side.
* The Java code only needs to know how to form the document IDs so that
* it can retrieve and delete the correct documents.
*/
public class ModelState {
public final class ModelState {

/**
* Legacy type, now used only as a discriminant in the document ID
*/
public static final String TYPE = "model_state";

public static final String documentId(String jobId, String snapshotId, int docNum) {
return jobId + "_" + TYPE + "_" + snapshotId + "#" + docNum;
}
private static final Pattern V_5_4_DOC_ID_REGEX = Pattern.compile("(.*)-\\d{10}#\\d+");

/**
* This is how the IDs were formed in v5.4
*/
public static final String v54DocumentId(String jobId, String snapshotId, int docNum) {
return jobId + "-" + snapshotId + "#" + docNum;
public static String documentId(String jobId, String snapshotId, int docNum) {
return jobId + "_" + TYPE + "_" + snapshotId + "#" + docNum;
}

/**
* Given the id of a state document it extracts the job id
* @param docId the state document id
* @return the job id or {@code null} if the id is not valid
*/
public static final String extractJobId(String docId) {
public static String extractJobId(String docId) {
int suffixIndex = docId.lastIndexOf("_" + TYPE + "_");
return suffixIndex <= 0 ? null : docId.substring(0, suffixIndex);
return suffixIndex <= 0 ? v54ExtractJobId(docId) : docId.substring(0, suffixIndex);
}

/**
* On version 5.4 the state doc ids had a different pattern.
* The pattern started with the job id, followed by a hyphen, the snapshot id,
* and ended with hash and an integer.
*/
private static String v54ExtractJobId(String docId) {
Matcher matcher = V_5_4_DOC_ID_REGEX.matcher(docId);
if (matcher.matches()) {
return matcher.group(1);
}
return null;
}

private ModelState() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,13 @@ public static String v54DocumentId(String jobId) {
* @param docId the quantiles document id
* @return the job id or {@code null} if the id is not valid
*/
public static final String extractJobId(String docId) {
public static String extractJobId(String docId) {
int suffixIndex = docId.lastIndexOf("_" + TYPE);
return suffixIndex <= 0 ? v54extractJobId(docId) : docId.substring(0, suffixIndex);
}

private static String v54extractJobId(String docId) {
int suffixIndex = docId.lastIndexOf("-" + TYPE);
return suffixIndex <= 0 ? null : docId.substring(0, suffixIndex);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,13 @@ public void testExtractJobId_GivenInvalidDocId() {
assertThat(ModelState.extractJobId("_model_state_3141341341"), is(nullValue()));
assertThat(ModelState.extractJobId("foo_quantiles"), is(nullValue()));
}
}

public void testExtractJobId_GivenV54DocId() {
assertThat(ModelState.extractJobId("test_job_id-1-0123456789#1"), equalTo("test_job_id-1"));
assertThat(ModelState.extractJobId("test_job_id-2-9876543210#42"), equalTo("test_job_id-2"));
assertThat(ModelState.extractJobId("test_job_id-0123456789-9876543210#42"), equalTo("test_job_id-0123456789"));

// This one has one less digit for snapshot id
assertThat(ModelState.extractJobId("test_job_id-123456789#42"), is(nullValue()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ public void testExtractJobId_GivenValidDocId() {
assertThat(Quantiles.extractJobId("_quantiles_quantiles"), equalTo("_quantiles"));
}

public void testExtractJobId_GivenV54DocId() {
assertThat(Quantiles.extractJobId("foo-quantiles"), equalTo("foo"));
assertThat(Quantiles.extractJobId("bar-quantiles"), equalTo("bar"));
assertThat(Quantiles.extractJobId("foo-bar-quantiles"), equalTo("foo-bar"));
assertThat(Quantiles.extractJobId("-quantiles-quantiles"), equalTo("-quantiles"));
}

public void testExtractJobId_GivenInvalidDocId() {
assertThat(Quantiles.extractJobId(""), is(nullValue()));
assertThat(Quantiles.extractJobId("foo"), is(nullValue()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public void testIsValidId() {
assertThat(MlStrings.isValidId("b.-_3"), is(true));
assertThat(MlStrings.isValidId("a-b.c_d"), is(true));

assertThat(MlStrings.isValidId("1_-.a#"), is(false));
assertThat(MlStrings.isValidId("a1_-."), is(false));
assertThat(MlStrings.isValidId("-.a1_"), is(false));
assertThat(MlStrings.isValidId(".a1_-"), is(false));
Expand Down

0 comments on commit f248e82

Please sign in to comment.