From 7118ff797667594ba781526cc16d6eda371ead6d Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Thu, 17 Sep 2020 23:43:28 +0300 Subject: [PATCH] [7.x][ML] Remove model snapshot legacy doc ids (#62434) (#62569) Removes methods that were no longer used regarding version 5.4 doc ids of ModelState. Also adds clean up of 5.4 model state and quantile docs in the daily maintenance. Backport of #62434 --- .../autodetect/state/ModelSnapshot.java | 14 +------- .../process/autodetect/state/ModelState.java | 33 ++++++++++++------- .../process/autodetect/state/Quantiles.java | 7 +++- .../autodetect/state/ModelStateTests.java | 11 ++++++- .../autodetect/state/QuantilesTests.java | 7 ++++ .../xpack/ml/utils/MlStringsTests.java | 1 + 6 files changed, 47 insertions(+), 26 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSnapshot.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSnapshot.java index 13415d2bbd5bb..c230c9336f612 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSnapshot.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSnapshot.java @@ -21,8 +21,8 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.core.common.time.TimeUtils; +import org.elasticsearch.xpack.core.ml.job.config.Job; import java.io.IOException; import java.io.InputStream; @@ -287,18 +287,6 @@ public List stateDocumentIds() { return stateDocumentIds; } - /** - * This is how the IDs were formed in v5.4 - */ - public List legacyStateDocumentIds() { - List stateDocumentIds = new ArrayList<>(snapshotDocCount); - // The state documents count suffices are 1-based - for (int i = 1; i <= snapshotDocCount; i++) { - stateDocumentIds.add(ModelState.v54DocumentId(jobId, snapshotId, i)); - } - return stateDocumentIds; - } - public static String documentIdPrefix(String jobId) { return jobId + "_" + TYPE + "_"; } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelState.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelState.java index fbec7bb6c7291..30a8511e1867e 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelState.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelState.java @@ -6,27 +6,25 @@ package org.elasticsearch.xpack.core.ml.job.process.autodetect.state; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + /** * The model state does not need to be understood on the Java side. * The Java code only needs to know how to form the document IDs so that * it can retrieve and delete the correct documents. */ -public class ModelState { +public final class ModelState { /** * Legacy type, now used only as a discriminant in the document ID */ public static final String TYPE = "model_state"; - public static final String documentId(String jobId, String snapshotId, int docNum) { - return jobId + "_" + TYPE + "_" + snapshotId + "#" + docNum; - } + private static final Pattern V_5_4_DOC_ID_REGEX = Pattern.compile("(.*)-\\d{10}#\\d+"); - /** - * This is how the IDs were formed in v5.4 - */ - public static final String v54DocumentId(String jobId, String snapshotId, int docNum) { - return jobId + "-" + snapshotId + "#" + docNum; + public static String documentId(String jobId, String snapshotId, int docNum) { + return jobId + "_" + TYPE + "_" + snapshotId + "#" + docNum; } /** @@ -34,9 +32,22 @@ public static final String v54DocumentId(String jobId, String snapshotId, int do * @param docId the state document id * @return the job id or {@code null} if the id is not valid */ - public static final String extractJobId(String docId) { + public static String extractJobId(String docId) { int suffixIndex = docId.lastIndexOf("_" + TYPE + "_"); - return suffixIndex <= 0 ? null : docId.substring(0, suffixIndex); + return suffixIndex <= 0 ? v54ExtractJobId(docId) : docId.substring(0, suffixIndex); + } + + /** + * On version 5.4 the state doc ids had a different pattern. + * The pattern started with the job id, followed by a hyphen, the snapshot id, + * and ended with hash and an integer. + */ + private static String v54ExtractJobId(String docId) { + Matcher matcher = V_5_4_DOC_ID_REGEX.matcher(docId); + if (matcher.matches()) { + return matcher.group(1); + } + return null; } private ModelState() { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/Quantiles.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/Quantiles.java index 0b3ddcc7b5197..c7d93fa6312e4 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/Quantiles.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/Quantiles.java @@ -65,8 +65,13 @@ public static String v54DocumentId(String jobId) { * @param docId the quantiles document id * @return the job id or {@code null} if the id is not valid */ - public static final String extractJobId(String docId) { + public static String extractJobId(String docId) { int suffixIndex = docId.lastIndexOf("_" + TYPE); + return suffixIndex <= 0 ? v54extractJobId(docId) : docId.substring(0, suffixIndex); + } + + private static String v54extractJobId(String docId) { + int suffixIndex = docId.lastIndexOf("-" + TYPE); return suffixIndex <= 0 ? null : docId.substring(0, suffixIndex); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelStateTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelStateTests.java index 0e42a06111931..b18b53e572488 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelStateTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelStateTests.java @@ -28,4 +28,13 @@ public void testExtractJobId_GivenInvalidDocId() { assertThat(ModelState.extractJobId("_model_state_3141341341"), is(nullValue())); assertThat(ModelState.extractJobId("foo_quantiles"), is(nullValue())); } -} \ No newline at end of file + + public void testExtractJobId_GivenV54DocId() { + assertThat(ModelState.extractJobId("test_job_id-1-0123456789#1"), equalTo("test_job_id-1")); + assertThat(ModelState.extractJobId("test_job_id-2-9876543210#42"), equalTo("test_job_id-2")); + assertThat(ModelState.extractJobId("test_job_id-0123456789-9876543210#42"), equalTo("test_job_id-0123456789")); + + // This one has one less digit for snapshot id + assertThat(ModelState.extractJobId("test_job_id-123456789#42"), is(nullValue())); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/QuantilesTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/QuantilesTests.java index 146e3ed5bd539..b0eeeac7b1e38 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/QuantilesTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/QuantilesTests.java @@ -28,6 +28,13 @@ public void testExtractJobId_GivenValidDocId() { assertThat(Quantiles.extractJobId("_quantiles_quantiles"), equalTo("_quantiles")); } + public void testExtractJobId_GivenV54DocId() { + assertThat(Quantiles.extractJobId("foo-quantiles"), equalTo("foo")); + assertThat(Quantiles.extractJobId("bar-quantiles"), equalTo("bar")); + assertThat(Quantiles.extractJobId("foo-bar-quantiles"), equalTo("foo-bar")); + assertThat(Quantiles.extractJobId("-quantiles-quantiles"), equalTo("-quantiles")); + } + public void testExtractJobId_GivenInvalidDocId() { assertThat(Quantiles.extractJobId(""), is(nullValue())); assertThat(Quantiles.extractJobId("foo"), is(nullValue())); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/MlStringsTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/MlStringsTests.java index f4e638fc9199d..04396c284d0a6 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/MlStringsTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/MlStringsTests.java @@ -34,6 +34,7 @@ public void testIsValidId() { assertThat(MlStrings.isValidId("b.-_3"), is(true)); assertThat(MlStrings.isValidId("a-b.c_d"), is(true)); + assertThat(MlStrings.isValidId("1_-.a#"), is(false)); assertThat(MlStrings.isValidId("a1_-."), is(false)); assertThat(MlStrings.isValidId("-.a1_"), is(false)); assertThat(MlStrings.isValidId(".a1_-"), is(false));