diff --git a/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java b/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java index de8e02ed0c7d..2f258733e757 100644 --- a/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java +++ b/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java @@ -424,7 +424,7 @@ public synchronized void start() { if (plugins.isEmpty() == false) { logToProcessStdout("Installing " + plugins.size() + " plugins"); - plugins.forEach(plugin -> runElaticsearchBinScript( + plugins.forEach(plugin -> runElasticsearchBinScript( "elasticsearch-plugin", "install", "--batch", plugin.toString()) ); @@ -432,7 +432,7 @@ public synchronized void start() { if (getVersion().before("6.3.0") && testDistribution == TestDistribution.DEFAULT) { LOGGER.info("emulating the {} flavor for {} by installing x-pack", testDistribution, getVersion()); - runElaticsearchBinScript( + runElasticsearchBinScript( "elasticsearch-plugin", "install", "--batch", "x-pack" ); @@ -440,7 +440,7 @@ public synchronized void start() { if (keystoreSettings.isEmpty() == false || keystoreFiles.isEmpty() == false) { logToProcessStdout("Adding " + keystoreSettings.size() + " keystore settings and " + keystoreFiles.size() + " keystore files"); - runElaticsearchBinScript("elasticsearch-keystore", "create"); + runElasticsearchBinScript("elasticsearch-keystore", "create"); keystoreSettings.forEach((key, value) -> runElasticsearchBinScriptWithInput(value.toString(), "elasticsearch-keystore", "add", "-x", key) @@ -452,7 +452,7 @@ public synchronized void start() { if (file.exists() == false) { throw new TestClustersException("supplied keystore file " + file + " does not exist, require for " + this); } - runElaticsearchBinScript("elasticsearch-keystore", "add-file", entry.getKey(), file.getAbsolutePath()); + runElasticsearchBinScript("elasticsearch-keystore", "add-file", entry.getKey(), file.getAbsolutePath()); } } @@ -467,7 +467,7 @@ public synchronized void start() { if (credentials.isEmpty() == false) { logToProcessStdout("Setting up " + credentials.size() + " users"); - credentials.forEach(paramMap -> runElaticsearchBinScript( + credentials.forEach(paramMap -> runElasticsearchBinScript( getVersion().onOrAfter("6.3.0") ? "elasticsearch-users" : "x-pack/users", paramMap.entrySet().stream() .flatMap(entry -> Stream.of(entry.getKey(), entry.getValue())) @@ -663,7 +663,7 @@ private void runElasticsearchBinScriptWithInput(String input, String tool, Strin } } - private void runElaticsearchBinScript(String tool, String... args) { + private void runElasticsearchBinScript(String tool, String... args) { runElasticsearchBinScriptWithInput("", tool, args); } diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 6c7d6798a65c..ad486276f082 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.0.0 -lucene = 8.4.0-snapshot-e648d601efb +lucene = 8.4.0-snapshot-662c455 bundled_jdk_vendor = adoptopenjdk bundled_jdk = 13.0.1+9 diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Classification.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Classification.java index d4e7bce5ec44..9d384e6d8678 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Classification.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Classification.java @@ -49,6 +49,7 @@ public static Builder builder(String dependentVariable) { static final ParseField PREDICTION_FIELD_NAME = new ParseField("prediction_field_name"); static final ParseField TRAINING_PERCENT = new ParseField("training_percent"); static final ParseField NUM_TOP_CLASSES = new ParseField("num_top_classes"); + static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed"); private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( @@ -63,7 +64,8 @@ public static Builder builder(String dependentVariable) { (Double) a[5], (String) a[6], (Double) a[7], - (Integer) a[8])); + (Integer) a[8], + (Long) a[9])); static { PARSER.declareString(ConstructingObjectParser.constructorArg(), DEPENDENT_VARIABLE); @@ -75,6 +77,7 @@ public static Builder builder(String dependentVariable) { PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), PREDICTION_FIELD_NAME); PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), TRAINING_PERCENT); PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), NUM_TOP_CLASSES); + PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), RANDOMIZE_SEED); } private final String dependentVariable; @@ -86,10 +89,11 @@ public static Builder builder(String dependentVariable) { private final String predictionFieldName; private final Double trainingPercent; private final Integer numTopClasses; + private final Long randomizeSeed; private Classification(String dependentVariable, @Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta, @Nullable Integer maximumNumberTrees, @Nullable Double featureBagFraction, @Nullable String predictionFieldName, - @Nullable Double trainingPercent, @Nullable Integer numTopClasses) { + @Nullable Double trainingPercent, @Nullable Integer numTopClasses, @Nullable Long randomizeSeed) { this.dependentVariable = Objects.requireNonNull(dependentVariable); this.lambda = lambda; this.gamma = gamma; @@ -99,6 +103,7 @@ private Classification(String dependentVariable, @Nullable Double lambda, @Nulla this.predictionFieldName = predictionFieldName; this.trainingPercent = trainingPercent; this.numTopClasses = numTopClasses; + this.randomizeSeed = randomizeSeed; } @Override @@ -138,6 +143,10 @@ public Double getTrainingPercent() { return trainingPercent; } + public Long getRandomizeSeed() { + return randomizeSeed; + } + public Integer getNumTopClasses() { return numTopClasses; } @@ -167,6 +176,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (trainingPercent != null) { builder.field(TRAINING_PERCENT.getPreferredName(), trainingPercent); } + if (randomizeSeed != null) { + builder.field(RANDOMIZE_SEED.getPreferredName(), randomizeSeed); + } if (numTopClasses != null) { builder.field(NUM_TOP_CLASSES.getPreferredName(), numTopClasses); } @@ -177,7 +189,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @Override public int hashCode() { return Objects.hash(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName, - trainingPercent, numTopClasses); + trainingPercent, randomizeSeed, numTopClasses); } @Override @@ -193,6 +205,7 @@ public boolean equals(Object o) { && Objects.equals(featureBagFraction, that.featureBagFraction) && Objects.equals(predictionFieldName, that.predictionFieldName) && Objects.equals(trainingPercent, that.trainingPercent) + && Objects.equals(randomizeSeed, that.randomizeSeed) && Objects.equals(numTopClasses, that.numTopClasses); } @@ -211,6 +224,7 @@ public static class Builder { private String predictionFieldName; private Double trainingPercent; private Integer numTopClasses; + private Long randomizeSeed; private Builder(String dependentVariable) { this.dependentVariable = Objects.requireNonNull(dependentVariable); @@ -251,6 +265,11 @@ public Builder setTrainingPercent(Double trainingPercent) { return this; } + public Builder setRandomizeSeed(Long randomizeSeed) { + this.randomizeSeed = randomizeSeed; + return this; + } + public Builder setNumTopClasses(Integer numTopClasses) { this.numTopClasses = numTopClasses; return this; @@ -258,7 +277,7 @@ public Builder setNumTopClasses(Integer numTopClasses) { public Classification build() { return new Classification(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName, - trainingPercent, numTopClasses); + trainingPercent, numTopClasses, randomizeSeed); } } } diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Regression.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Regression.java index 3c1edece6fc1..fa55ee40b27f 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Regression.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Regression.java @@ -48,6 +48,7 @@ public static Builder builder(String dependentVariable) { static final ParseField FEATURE_BAG_FRACTION = new ParseField("feature_bag_fraction"); static final ParseField PREDICTION_FIELD_NAME = new ParseField("prediction_field_name"); static final ParseField TRAINING_PERCENT = new ParseField("training_percent"); + static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed"); private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( @@ -61,7 +62,8 @@ public static Builder builder(String dependentVariable) { (Integer) a[4], (Double) a[5], (String) a[6], - (Double) a[7])); + (Double) a[7], + (Long) a[8])); static { PARSER.declareString(ConstructingObjectParser.constructorArg(), DEPENDENT_VARIABLE); @@ -72,6 +74,7 @@ public static Builder builder(String dependentVariable) { PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), FEATURE_BAG_FRACTION); PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), PREDICTION_FIELD_NAME); PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), TRAINING_PERCENT); + PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), RANDOMIZE_SEED); } private final String dependentVariable; @@ -82,10 +85,11 @@ public static Builder builder(String dependentVariable) { private final Double featureBagFraction; private final String predictionFieldName; private final Double trainingPercent; + private final Long randomizeSeed; private Regression(String dependentVariable, @Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta, @Nullable Integer maximumNumberTrees, @Nullable Double featureBagFraction, @Nullable String predictionFieldName, - @Nullable Double trainingPercent) { + @Nullable Double trainingPercent, @Nullable Long randomizeSeed) { this.dependentVariable = Objects.requireNonNull(dependentVariable); this.lambda = lambda; this.gamma = gamma; @@ -94,6 +98,7 @@ private Regression(String dependentVariable, @Nullable Double lambda, @Nullable this.featureBagFraction = featureBagFraction; this.predictionFieldName = predictionFieldName; this.trainingPercent = trainingPercent; + this.randomizeSeed = randomizeSeed; } @Override @@ -133,6 +138,10 @@ public Double getTrainingPercent() { return trainingPercent; } + public Long getRandomizeSeed() { + return randomizeSeed; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); @@ -158,6 +167,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (trainingPercent != null) { builder.field(TRAINING_PERCENT.getPreferredName(), trainingPercent); } + if (randomizeSeed != null) { + builder.field(RANDOMIZE_SEED.getPreferredName(), randomizeSeed); + } builder.endObject(); return builder; } @@ -165,7 +177,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @Override public int hashCode() { return Objects.hash(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName, - trainingPercent); + trainingPercent, randomizeSeed); } @Override @@ -180,7 +192,8 @@ public boolean equals(Object o) { && Objects.equals(maximumNumberTrees, that.maximumNumberTrees) && Objects.equals(featureBagFraction, that.featureBagFraction) && Objects.equals(predictionFieldName, that.predictionFieldName) - && Objects.equals(trainingPercent, that.trainingPercent); + && Objects.equals(trainingPercent, that.trainingPercent) + && Objects.equals(randomizeSeed, that.randomizeSeed); } @Override @@ -197,6 +210,7 @@ public static class Builder { private Double featureBagFraction; private String predictionFieldName; private Double trainingPercent; + private Long randomizeSeed; private Builder(String dependentVariable) { this.dependentVariable = Objects.requireNonNull(dependentVariable); @@ -237,9 +251,14 @@ public Builder setTrainingPercent(Double trainingPercent) { return this; } + public Builder setRandomizeSeed(Long randomizeSeed) { + this.randomizeSeed = randomizeSeed; + return this; + } + public Regression build() { return new Regression(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName, - trainingPercent); + trainingPercent, randomizeSeed); } } } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java index 6ed3734831aa..29e69c5095cb 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java @@ -1291,6 +1291,7 @@ public void testPutDataFrameAnalyticsConfig_GivenRegression() throws Exception { .setAnalysis(org.elasticsearch.client.ml.dataframe.Regression.builder("my_dependent_variable") .setPredictionFieldName("my_dependent_variable_prediction") .setTrainingPercent(80.0) + .setRandomizeSeed(42L) .build()) .setDescription("this is a regression") .build(); @@ -1326,6 +1327,7 @@ public void testPutDataFrameAnalyticsConfig_GivenClassification() throws Excepti .setAnalysis(org.elasticsearch.client.ml.dataframe.Classification.builder("my_dependent_variable") .setPredictionFieldName("my_dependent_variable_prediction") .setTrainingPercent(80.0) + .setRandomizeSeed(42L) .setNumTopClasses(1) .build()) .setDescription("this is a classification") diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java index 1d9a151cf8ae..13185e221633 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java @@ -2975,7 +2975,8 @@ public void testPutDataFrameAnalytics() throws Exception { .setFeatureBagFraction(0.4) // <6> .setPredictionFieldName("my_prediction_field_name") // <7> .setTrainingPercent(50.0) // <8> - .setNumTopClasses(1) // <9> + .setRandomizeSeed(1234L) // <9> + .setNumTopClasses(1) // <10> .build(); // end::put-data-frame-analytics-classification @@ -2988,6 +2989,7 @@ public void testPutDataFrameAnalytics() throws Exception { .setFeatureBagFraction(0.4) // <6> .setPredictionFieldName("my_prediction_field_name") // <7> .setTrainingPercent(50.0) // <8> + .setRandomizeSeed(1234L) // <9> .build(); // end::put-data-frame-analytics-regression diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/ClassificationTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/ClassificationTests.java index 98f060cc8534..5ef8fdaef5a2 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/ClassificationTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/ClassificationTests.java @@ -34,6 +34,7 @@ public static Classification randomClassification() { .setFeatureBagFraction(randomBoolean() ? null : randomDoubleBetween(0.0, 1.0, false)) .setPredictionFieldName(randomBoolean() ? null : randomAlphaOfLength(10)) .setTrainingPercent(randomBoolean() ? null : randomDoubleBetween(1.0, 100.0, true)) + .setRandomizeSeed(randomBoolean() ? null : randomLong()) .setNumTopClasses(randomBoolean() ? null : randomIntBetween(0, 10)) .build(); } diff --git a/docs/java-rest/high-level/ml/put-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/put-data-frame-analytics.asciidoc index 91a97ad604ce..2152eff5c085 100644 --- a/docs/java-rest/high-level/ml/put-data-frame-analytics.asciidoc +++ b/docs/java-rest/high-level/ml/put-data-frame-analytics.asciidoc @@ -119,7 +119,8 @@ include-tagged::{doc-tests-file}[{api}-classification] <6> The fraction of features which will be used when selecting a random bag for each candidate split. A double in (0, 1]. <7> The name of the prediction field in the results object. <8> The percentage of training-eligible rows to be used in training. Defaults to 100%. -<9> The number of top classes to be reported in the results. Defaults to 2. +<9> The seed to be used by the random generator that picks which rows are used in training. +<10> The number of top classes to be reported in the results. Defaults to 2. ===== Regression @@ -138,6 +139,7 @@ include-tagged::{doc-tests-file}[{api}-regression] <6> The fraction of features which will be used when selecting a random bag for each candidate split. A double in (0, 1]. <7> The name of the prediction field in the results object. <8> The percentage of training-eligible rows to be used in training. Defaults to 100%. +<9> The seed to be used by the random generator that picks which rows are used in training. ==== Analyzed fields diff --git a/docs/reference/cat/shards.asciidoc b/docs/reference/cat/shards.asciidoc index 61b1c869f042..b26472812f51 100644 --- a/docs/reference/cat/shards.asciidoc +++ b/docs/reference/cat/shards.asciidoc @@ -257,7 +257,7 @@ Reason the shard is unassigned. Returned values are: * `NEW_INDEX_RESTORED`: Unassigned as a result of restoring into a new index. * `NODE_LEFT`: Unassigned as a result of the node hosting it leaving the cluster. * `REALLOCATED_REPLICA`: A better replica location is identified and causes the existing replica allocation to be cancelled. -* `REINITIALIZED`: When a shard moves from started back to initializing, for example, with shadow replicas. +* `REINITIALIZED`: When a shard moves from started back to initializing. * `REPLICA_ADDED`: Unassigned as a result of explicit addition of a replica. * `REROUTE_CANCELLED`: Unassigned as a result of explicit cancel reroute command. diff --git a/docs/reference/ingest/ingest-node.asciidoc b/docs/reference/ingest/ingest-node.asciidoc index 0da0fd19e16e..596bda67d3ed 100644 --- a/docs/reference/ingest/ingest-node.asciidoc +++ b/docs/reference/ingest/ingest-node.asciidoc @@ -825,6 +825,7 @@ include::processors/append.asciidoc[] include::processors/bytes.asciidoc[] include::processors/circle.asciidoc[] include::processors/convert.asciidoc[] +include::processors/csv.asciidoc[] include::processors/date.asciidoc[] include::processors/date-index-name.asciidoc[] include::processors/dissect.asciidoc[] diff --git a/docs/reference/ingest/processors/csv.asciidoc b/docs/reference/ingest/processors/csv.asciidoc new file mode 100644 index 000000000000..c589c9eb4361 --- /dev/null +++ b/docs/reference/ingest/processors/csv.asciidoc @@ -0,0 +1,33 @@ +[[csv-processor]] +=== CSV Processor +Extracts fields from CSV line out of a single text field within a document. Any empty field in CSV will be skipped. + +[[csv-options]] +.CSV Options +[options="header"] +|====== +| Name | Required | Default | Description +| `field` | yes | - | The field to extract data from +| `target_fields` | yes | - | The array of fields to assign extracted values to +| `separator` | no | , | Separator used in CSV, has to be single character string +| `quote` | no | " | Quote used in CSV, has to be single character string +| `ignore_missing` | no | `true` | If `true` and `field` does not exist, the processor quietly exits without modifying the document +| `trim` | no | `false` | Trim whitespaces in unquoted fields +include::common-options.asciidoc[] +|====== + +[source,js] +-------------------------------------------------- +{ + "csv": { + "field": "my_field", + "target_fields": ["field1, field2"], + } +} +-------------------------------------------------- +// NOTCONSOLE + +If the `trim` option is enabled then any whitespace in the beginning and in the end of each unquoted field will be trimmed. +For example with configuration above, a value of `A, B` will result in field `field2` +having value `{nbsp}B` (with space at the beginning). If `trim` is enabled `A, B` will result in field `field2` +having value `B` (no whitespace). Quoted fields will be left untouched. diff --git a/docs/reference/mapping/params/normalizer.asciidoc b/docs/reference/mapping/params/normalizer.asciidoc index 1e7e6870c302..b218d311c720 100644 --- a/docs/reference/mapping/params/normalizer.asciidoc +++ b/docs/reference/mapping/params/normalizer.asciidoc @@ -90,12 +90,12 @@ both index and query time. "value": 2, "relation": "eq" }, - "max_score": 0.47000363, + "max_score": 0.4700036, "hits": [ { "_index": "index", "_id": "1", - "_score": 0.47000363, + "_score": 0.4700036, "_source": { "foo": "BÀR" } @@ -103,7 +103,7 @@ both index and query time. { "_index": "index", "_id": "2", - "_score": 0.47000363, + "_score": 0.4700036, "_source": { "foo": "bar" } diff --git a/docs/reference/ml/anomaly-detection/apis/datafeedresource.asciidoc b/docs/reference/ml/anomaly-detection/apis/datafeedresource.asciidoc deleted file mode 100644 index 864e71e35bdb..000000000000 --- a/docs/reference/ml/anomaly-detection/apis/datafeedresource.asciidoc +++ /dev/null @@ -1,161 +0,0 @@ -[role="xpack"] -[testenv="platinum"] -[[ml-datafeed-resource]] -=== {dfeed-cap} resources - -A {dfeed} resource has the following properties: - -`aggregations`:: - (object) If set, the {dfeed} performs aggregation searches. - Support for aggregations is limited and should only be used with - low cardinality data. For more information, see - {stack-ov}/ml-configuring-aggregation.html[Aggregating Data for Faster Performance]. - -`chunking_config`:: - (object) Specifies how data searches are split into time chunks. - See <>. - For example: `{"mode": "manual", "time_span": "3h"}` - -`datafeed_id`:: - (string) A numerical character string that uniquely identifies the {dfeed}. - This property is informational; you cannot change the identifier for existing - {dfeeds}. - -`frequency`:: - (time units) The interval at which scheduled queries are made while the - {dfeed} runs in real time. The default value is either the bucket span for short - bucket spans, or, for longer bucket spans, a sensible fraction of the bucket - span. For example: `150s`. - -`indices`:: - (array) An array of index names. For example: `["it_ops_metrics"]` - -`job_id`:: - (string) The unique identifier for the job to which the {dfeed} sends data. - -`query`:: - (object) The {es} query domain-specific language (DSL). This value - corresponds to the query object in an {es} search POST body. All the - options that are supported by {es} can be used, as this object is - passed verbatim to {es}. By default, this property has the following - value: `{"match_all": {"boost": 1}}`. - -`query_delay`:: - (time units) The number of seconds behind real time that data is queried. For - example, if data from 10:04 a.m. might not be searchable in {es} until - 10:06 a.m., set this property to 120 seconds. The default value is randomly - selected between `60s` and `120s`. This randomness improves the query - performance when there are multiple jobs running on the same node. - -`script_fields`:: - (object) Specifies scripts that evaluate custom expressions and returns - script fields to the {dfeed}. - The detector configuration objects in a job can contain - functions that use these script fields. - For more information, see - {stack-ov}/ml-configuring-transform.html[Transforming Data With Script Fields]. - -`scroll_size`:: - (unsigned integer) The `size` parameter that is used in {es} searches. - The default value is `1000`. - -`delayed_data_check_config`:: - (object) Specifies whether the data feed checks for missing data and - the size of the window. For example: - `{"enabled": true, "check_window": "1h"}` See - <>. - -`max_empty_searches`:: - (integer) If a real-time {dfeed} has never seen any data (including during - any initial training period) then it will automatically stop itself and - close its associated job after this many real-time searches that return no - documents. In other words, it will stop after `frequency` times - `max_empty_searches` of real-time operation. If not set - then a {dfeed} with no end time that sees no data will remain started until - it is explicitly stopped. By default this setting is not set. - -[[ml-datafeed-chunking-config]] -==== Chunking configuration objects - -{dfeeds-cap} might be required to search over long time periods, for several months -or years. This search is split into time chunks in order to ensure the load -on {es} is managed. Chunking configuration controls how the size of these time -chunks are calculated and is an advanced configuration option. - -A chunking configuration object has the following properties: - -`mode`:: - There are three available modes: + - `auto`::: The chunk size will be dynamically calculated. This is the default - and recommended value. - `manual`::: Chunking will be applied according to the specified `time_span`. - `off`::: No chunking will be applied. - -`time_span`:: - (time units) The time span that each search will be querying. - This setting is only applicable when the mode is set to `manual`. - For example: `3h`. - -[[ml-datafeed-delayed-data-check-config]] -==== Delayed data check configuration objects - -The {dfeed} can optionally search over indices that have already been read in -an effort to determine whether any data has subsequently been added to the index. -If missing data is found, it is a good indication that the `query_delay` option -is set too low and the data is being indexed after the {dfeed} has passed that -moment in time. See -{stack-ov}/ml-delayed-data-detection.html[Working with delayed data]. - -This check runs only on real-time {dfeeds}. - -The configuration object has the following properties: - -`enabled`:: - (boolean) Specifies whether the {dfeed} periodically checks for delayed data. - Defaults to `true`. - -`check_window`:: - (time units) The window of time that is searched for late data. This window of - time ends with the latest finalized bucket. It defaults to `null`, which - causes an appropriate `check_window` to be calculated when the real-time - {dfeed} runs. In particular, the default `check_window` span calculation is - based on the maximum of `2h` or `8 * bucket_span`. - -[float] -[[ml-datafeed-counts]] -==== {dfeed-cap} counts - -The get {dfeed} statistics API provides information about the operational -progress of a {dfeed}. All of these properties are informational; you cannot -update their values: - -`assignment_explanation`:: - (string) For started {dfeeds} only, contains messages relating to the - selection of a node. - -`datafeed_id`:: - (string) A numerical character string that uniquely identifies the {dfeed}. - -`node`:: - (object) The node upon which the {dfeed} is started. The {dfeed} and job will - be on the same node. - `id`::: The unique identifier of the node. For example, - "0-o0tOoRTwKFZifatTWKNw". - `name`::: The node name. For example, `0-o0tOo`. - `ephemeral_id`::: The node ephemeral ID. - `transport_address`::: The host and port where transport HTTP connections are - accepted. For example, `127.0.0.1:9300`. - `attributes`::: For example, `{"ml.machine_memory": "17179869184"}`. - -`state`:: - (string) The status of the {dfeed}, which can be one of the following values: + - `started`::: The {dfeed} is actively receiving data. - `stopped`::: The {dfeed} is stopped and will not receive data until it is - re-started. - -`timing_stats`:: - (object) An object that provides statistical information about timing aspect of this datafeed. + - `job_id`::: A numerical character string that uniquely identifies the job. - `search_count`::: Number of searches performed by this datafeed. - `total_search_time_ms`::: Total time the datafeed spent searching in milliseconds. - diff --git a/docs/reference/ml/anomaly-detection/apis/delete-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/delete-datafeed.asciidoc index 21b4eb75bef0..d933afe4f9a4 100644 --- a/docs/reference/ml/anomaly-detection/apis/delete-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/delete-datafeed.asciidoc @@ -28,14 +28,15 @@ can delete it. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {dfeed}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] [[ml-delete-datafeed-query-parms]] ==== {api-query-parms-title} `force`:: - (Optional, boolean) Use to forcefully delete a started {dfeed}; this method is - quicker than stopping and deleting the {dfeed}. +(Optional, boolean) Use to forcefully delete a started {dfeed}; this method is +quicker than stopping and deleting the {dfeed}. [[ml-delete-datafeed-example]] ==== {api-examples-title} diff --git a/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc index bd126a651e26..feccd52364f4 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc @@ -45,36 +45,61 @@ IMPORTANT: This API returns a maximum of 10,000 {dfeeds}. ==== {api-path-parms-title} ``:: - (Optional, string) Identifier for the {dfeed}. It can be a {dfeed} identifier - or a wildcard expression. If you do not specify one of these options, the API - returns statistics for all {dfeeds}. +(Optional, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id-wildcard] +If you do not specify one of these options, the API returns information about +all {dfeeds}. [[ml-get-datafeed-stats-query-parms]] ==== {api-query-parms-title} `allow_no_datafeeds`:: - (Optional, boolean) Specifies what to do when the request: -+ --- -* Contains wildcard expressions and there are no {datafeeds} that match. -* Contains the `_all` string or no identifiers and there are no matches. -* Contains wildcard expressions and there are only partial matches. - -The default value is `true`, which returns an empty `datafeeds` array when -there are no matches and the subset of results when there are partial matches. -If this parameter is `false`, the request returns a `404` status code when there -are no matches or only partial matches. --- +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-datafeeds] [[ml-get-datafeed-stats-results]] ==== {api-response-body-title} -The API returns the following information: +The API returns an array of {dfeed} count objects. All of these properties are +informational; you cannot update their values. + +`assignment_explanation`:: +(string) For started {dfeeds} only, contains messages relating to the selection of a node. + +`datafeed_id`:: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] + +`node`:: +(object) For started {dfeeds} only, the node upon which the {dfeed} is started. The {dfeed} and job will be on the same node. +`id`::: The unique identifier of the node. For example, "0-o0tOoRTwKFZifatTWKNw". +`name`::: The node name. For example, `0-o0tOo`. +`ephemeral_id`::: The node ephemeral ID. +`transport_address`::: The host and port where transport HTTP connections are +accepted. For example, `127.0.0.1:9300`. +`attributes`::: For example, `{"ml.machine_memory": "17179869184"}`. + +`state`:: +(string) The status of the {dfeed}, which can be one of the following values: ++ +-- +* `started`: The {dfeed} is actively receiving data. +* `stopped`: The {dfeed} is stopped and will not receive data until it is +re-started. +-- + +`timing_stats`:: +(object) An object that provides statistical information about timing aspect of +this {dfeed}. +//average_search_time_per_bucket_ms +//bucket_count +//exponential_average_search_time_per_hour_ms +`job_id`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] +`search_count`::: Number of searches performed by this {dfeed}. +`total_search_time_ms`::: Total time the {dfeed} spent searching in milliseconds. -`datafeeds`:: - (array) An array of {dfeed} count objects. - For more information, see <>. [[ml-get-datafeed-stats-response-codes]] ==== {api-response-codes-title} @@ -86,46 +111,46 @@ The API returns the following information: [[ml-get-datafeed-stats-example]] ==== {api-examples-title} -The following example gets usage information for the -`datafeed-total-requests` {dfeed}: - [source,console] -------------------------------------------------- -GET _ml/datafeeds/datafeed-total-requests/_stats +GET _ml/datafeeds/datafeed-high_sum_total_sales/_stats -------------------------------------------------- -// TEST[skip:setup:server_metrics_startdf] +// TEST[skip:Kibana sample data started datafeed] The API returns the following results: [source,console-result] ---- { - "count": 1, - "datafeeds": [ + "count" : 1, + "datafeeds" : [ { - "datafeed_id": "datafeed-total-requests", - "state": "started", - "node": { - "id": "2spCyo1pRi2Ajo-j-_dnPX", - "name": "node-0", - "ephemeral_id": "hoXMLZB0RWKfR9UPPUCxXX", - "transport_address": "127.0.0.1:9300", - "attributes": { - "ml.machine_memory": "17179869184", - "ml.max_open_jobs": "20" + "datafeed_id" : "datafeed-high_sum_total_sales", + "state" : "started", + "node" : { + "id" : "7bmMXyWCRs-TuPfGJJ_yMw", + "name" : "node-0", + "ephemeral_id" : "hoXMLZB0RWKfR9UPPUCxXX", + "transport_address" : "127.0.0.1:9300", + "attributes" : { + "ml.machine_memory" : "17179869184", + "ml.max_open_jobs" : "20" } }, - "assignment_explanation": "", - "timing_stats": { - "job_id": "job-total-requests", - "search_count": 20, - "total_search_time_ms": 120.5 + "assignment_explanation" : "", + "timing_stats" : { + "job_id" : "high_sum_total_sales", + "search_count" : 7, + "bucket_count" : 743, + "total_search_time_ms" : 134.0, + "average_search_time_per_bucket_ms" : 0.180349932705249, + "exponential_average_search_time_per_hour_ms" : 11.514712961628677 } } ] } ---- -// TESTRESPONSE[s/"2spCyo1pRi2Ajo-j-_dnPX"/$body.$_path/] +// TESTRESPONSE[s/"7bmMXyWCRs-TuPfGJJ_yMw"/$body.$_path/] // TESTRESPONSE[s/"node-0"/$body.$_path/] // TESTRESPONSE[s/"hoXMLZB0RWKfR9UPPUCxXX"/$body.$_path/] // TESTRESPONSE[s/"127.0.0.1:9300"/$body.$_path/] diff --git a/docs/reference/ml/anomaly-detection/apis/get-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-datafeed.asciidoc index 3330ae7b821d..11aca1edd95e 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-datafeed.asciidoc @@ -42,35 +42,71 @@ IMPORTANT: This API returns a maximum of 10,000 {dfeeds}. ==== {api-path-parms-title} ``:: - (Optional, string) Identifier for the {dfeed}. It can be a {dfeed} identifier - or a wildcard expression. If you do not specify one of these options, the API - returns information about all {dfeeds}. +(Optional, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id-wildcard] +If you do not specify one of these options, the API returns information about +all {dfeeds}. [[ml-get-datafeed-query-parms]] ==== {api-query-parms-title} `allow_no_datafeeds`:: - (Optional, boolean) Specifies what to do when the request: -+ --- -* Contains wildcard expressions and there are no {datafeeds} that match. -* Contains the `_all` string or no identifiers and there are no matches. -* Contains wildcard expressions and there are only partial matches. - -The default value is `true`, which returns an empty `datafeeds` array when -there are no matches and the subset of results when there are partial matches. -If this parameter is `false`, the request returns a `404` status code when there -are no matches or only partial matches. --- +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-datafeeds] [[ml-get-datafeed-results]] ==== {api-response-body-title} -The API returns the following information: +The API returns an array of {dfeed} resources, which have the following +properties: -`datafeeds`:: - (array) An array of {dfeed} objects. - For more information, see <>. +`aggregations`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=aggregations] + +`chunking_config`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=chunking-config] + +`datafeed_id`:: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] + +`delayed_data_check_config`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=delayed-data-check-config] + +`frequency`:: +(<>) +include::{docdir}/ml/ml-shared.asciidoc[tag=frequency] + +`indices`:: +(array) +include::{docdir}/ml/ml-shared.asciidoc[tag=indices] + +`job_id`:: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-datafeed] + +`max_empty_searches`:: +(integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=max-empty-searches] + +`query`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=query] + +`query_delay`:: +(<>) +include::{docdir}/ml/ml-shared.asciidoc[tag=query-delay] + +`script_fields`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=script-fields] + +`scroll_size`:: +(unsigned integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=scroll-size] [[ml-get-datafeed-response-codes]] ==== {api-response-codes-title} @@ -83,39 +119,48 @@ The API returns the following information: ==== {api-examples-title} The following example gets configuration information for the -`datafeed-total-requests` {dfeed}: +`datafeed-high_sum_total_sales` {dfeed}: [source,console] -------------------------------------------------- -GET _ml/datafeeds/datafeed-total-requests +GET _ml/datafeeds/datafeed-high_sum_total_sales -------------------------------------------------- -// TEST[skip:setup:server_metrics_datafeed] +// TEST[skip:kibana sample data] The API returns the following results: [source,console-result] ---- { - "count": 1, - "datafeeds": [ + "count" : 1, + "datafeeds" : [ { - "datafeed_id": "datafeed-total-requests", - "job_id": "total-requests", - "query_delay": "83474ms", - "indices": [ - "server-metrics" + "datafeed_id" : "datafeed-high_sum_total_sales", + "job_id" : "high_sum_total_sales", + "query_delay" : "93169ms", + "indices" : [ + "kibana_sample_data_ecommerce" ], - "query": { - "match_all": { - "boost": 1.0 + "query" : { + "bool" : { + "filter" : [ + { + "term" : { + "_index" : "kibana_sample_data_ecommerce" + } + } + ] } }, - "scroll_size": 1000, - "chunking_config": { - "mode": "auto" + "scroll_size" : 1000, + "chunking_config" : { + "mode" : "auto" + }, + "delayed_data_check_config" : { + "enabled" : true } } ] } ---- -// TESTRESPONSE[s/"query.boost": "1.0"/"query.boost": $body.query.boost/] +// TESTRESPONSE[s/"query.boost": "93169ms"/"query.boost": $body.query.boost/] diff --git a/docs/reference/ml/anomaly-detection/apis/preview-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/preview-datafeed.asciidoc index c3afca8b03c6..6220d8a1de24 100644 --- a/docs/reference/ml/anomaly-detection/apis/preview-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/preview-datafeed.asciidoc @@ -41,18 +41,17 @@ it to ensure it is returning the expected data. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {dfeed}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] [[ml-preview-datafeed-example]] ==== {api-examples-title} -The following example obtains a preview of the `datafeed-farequote` {dfeed}: - [source,console] -------------------------------------------------- -GET _ml/datafeeds/datafeed-farequote/_preview +GET _ml/datafeeds/datafeed-high_sum_total_sales/_preview -------------------------------------------------- -// TEST[skip:setup:farequote_datafeed] +// TEST[skip:set up Kibana sample data] The data that is returned for this example is as follows: @@ -60,22 +59,28 @@ The data that is returned for this example is as follows: ---- [ { - "time": 1454803200000, - "airline": "JZA", - "doc_count": 5, - "responsetime": 990.4628295898438 + "order_date" : 1574294659000, + "category.keyword" : "Men's Clothing", + "customer_full_name.keyword" : "Sultan Al Benson", + "taxful_total_price" : 35.96875 }, { - "time": 1454803200000, - "airline": "JBU", - "doc_count": 23, - "responsetime": 877.5927124023438 + "order_date" : 1574294918000, + "category.keyword" : [ + "Women's Accessories", + "Women's Clothing" + ], + "customer_full_name.keyword" : "Pia Webb", + "taxful_total_price" : 83.0 }, { - "time": 1454803200000, - "airline": "KLM", - "doc_count": 42, - "responsetime": 1355.481201171875 + "order_date" : 1574295782000, + "category.keyword" : [ + "Women's Accessories", + "Women's Shoes" + ], + "customer_full_name.keyword" : "Brigitte Graham", + "taxful_total_price" : 72.0 } ] ---- diff --git a/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc index ca3b9d61ba7a..cb3765a86c97 100644 --- a/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc @@ -43,70 +43,52 @@ those same roles. ==== {api-path-parms-title} ``:: - (Required, string) A numerical character string that uniquely identifies the - {dfeed}. This identifier can contain lowercase alphanumeric characters (a-z - and 0-9), hyphens, and underscores. It must start and end with alphanumeric - characters. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] [[ml-put-datafeed-request-body]] ==== {api-request-body-title} `aggregations`:: - (Optional, object) If set, the {dfeed} performs aggregation searches. For more - information, see <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=aggregations] `chunking_config`:: - (Optional, object) Specifies how data searches are split into time chunks. See - <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=chunking-config] `delayed_data_check_config`:: - (Optional, object) Specifies whether the data feed checks for missing data and - the size of the window. See <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=delayed-data-check-config] `frequency`:: - (Optional, <>) The interval at which scheduled queries - are made while the {dfeed} runs in real time. The default value is either the - bucket span for short bucket spans, or, for longer bucket spans, a sensible - fraction of the bucket span. For example: `150s`. +(Optional, <>) +include::{docdir}/ml/ml-shared.asciidoc[tag=frequency] `indices`:: - (Required, array) An array of index names. Wildcards are supported. For - example: `["it_ops_metrics", "server*"]`. -+ --- -NOTE: If any indices are in remote clusters then `cluster.remote.connect` must -not be set to `false` on any ML node. --- +(Required, array) +include::{docdir}/ml/ml-shared.asciidoc[tag=indices] -`job_id`:: - (Required, string) A numerical character string that uniquely identifies the - {anomaly-job}. +`job_id`:: +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] + `query`:: - (Optional, object) The {es} query domain-specific language (DSL). This value - corresponds to the query object in an {es} search POST body. All the options - that are supported by {Es} can be used, as this object is passed verbatim to - {es}. By default, this property has the following value: - `{"match_all": {"boost": 1}}`. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=query] `query_delay`:: - (Optional, <>) The number of seconds behind real time - that data is queried. For example, if data from 10:04 a.m. might not be - searchable in {es} until 10:06 a.m., set this property to 120 seconds. The - default value is `60s`. +(Optional, <>) +include::{docdir}/ml/ml-shared.asciidoc[tag=query-delay] `script_fields`:: - (Optional, object) Specifies scripts that evaluate custom expressions and - returns script fields to the {dfeed}. The detector configuration objects in a - job can contain functions that use these script fields. For more information, - see <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=script-fields] `scroll_size`:: - (Optional, unsigned integer) The `size` parameter that is used in {es} - searches. The default value is `1000`. - -For more information about these properties, -see <>. +(Optional, unsigned integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=scroll-size] [[ml-put-datafeed-example]] ==== {api-examples-title} diff --git a/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc index 7faba863774d..dd3e6bbdfff5 100644 --- a/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc @@ -74,7 +74,8 @@ creation/update and runs the query using those same roles. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {dfeed}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] [[ml-start-datafeed-request-body]] ==== {api-request-body-title} diff --git a/docs/reference/ml/anomaly-detection/apis/stop-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/stop-datafeed.asciidoc index cde9f16c384a..f115d8657f7e 100644 --- a/docs/reference/ml/anomaly-detection/apis/stop-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/stop-datafeed.asciidoc @@ -40,25 +40,15 @@ comma-separated list of {dfeeds} or a wildcard expression. You can close all ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {dfeed}. It can be a {dfeed} identifier - or a wildcard expression. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id-wildcard] [[ml-stop-datafeed-query-parms]] ==== {api-query-parms-title} `allow_no_datafeeds`:: - (Optional, boolean) Specifies what to do when the request: -+ --- -* Contains wildcard expressions and there are no {datafeeds} that match. -* Contains the `_all` string or no identifiers and there are no matches. -* Contains wildcard expressions and there are only partial matches. - -The default value is `true`, which returns an empty `datafeeds` array when -there are no matches and the subset of results when there are partial matches. -If this parameter is `false`, the request returns a `404` status code when there -are no matches or only partial matches. --- +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-datafeeds] [[ml-stop-datafeed-request-body]] ==== {api-request-body-title} diff --git a/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc index d201d6cd093b..1336f71fcff7 100644 --- a/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc @@ -39,7 +39,8 @@ using those same roles. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {dfeed}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] [[ml-update-datafeed-request-body]] ==== {api-request-body-title} @@ -47,70 +48,58 @@ using those same roles. The following properties can be updated after the {dfeed} is created: `aggregations`:: - (Optional, object) If set, the {dfeed} performs aggregation searches. For more - information, see <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=aggregations] `chunking_config`:: - (Optional, object) Specifies how data searches are split into time chunks. See - <>. - +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=chunking-config] + `delayed_data_check_config`:: - (Optional, object) Specifies whether the data feed checks for missing data and - the size of the window. See <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=delayed-data-check-config] `frequency`:: - (Optional, <>) The interval at which scheduled queries - are made while the {dfeed} runs in real time. The default value is either the - bucket span for short bucket spans, or, for longer bucket spans, a sensible - fraction of the bucket span. For example: `150s`. +(Optional, <>) +include::{docdir}/ml/ml-shared.asciidoc[tag=frequency] `indices`:: - (Optional, array) An array of index names. Wildcards are supported. For - example: `["it_ops_metrics", "server*"]`. +(Optional, array) +include::{docdir}/ml/ml-shared.asciidoc[tag=indices] + +`max_empty_searches`:: +(Optional, integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=max-empty-searches] ++ +-- +The special value `-1` unsets this setting. +-- `query`:: - (Optional, object) The {es} query domain-specific language (DSL). This value - corresponds to the query object in an {es} search POST body. All the options - that are supported by {es} can be used, as this object is passed verbatim to - {es}. By default, this property has the following value: - `{"match_all": {"boost": 1}}`. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=query] + -- -WARNING: If you change the query, then the analyzed data will also be changed, -therefore the required time to learn might be long and the understandability of -the results is unpredictable. -If you want to make significant changes to the source data, we would recommend -you clone it and create a second job containing the amendments. Let both run in -parallel and close one when you are satisfied with the results of the other job. +WARNING: If you change the query, the analyzed data is also changed. Therefore, +the required time to learn might be long and the understandability of the +results is unpredictable. If you want to make significant changes to the source +data, we would recommend you clone it and create a second job containing the +amendments. Let both run in parallel and close one when you are satisfied with +the results of the other job. + -- `query_delay`:: - (Optional, <>) The number of seconds behind real-time - that data is queried. For example, if data from 10:04 a.m. might not be - searchable in {es} until 10:06 a.m., set this property to 120 seconds. The - default value is `60s`. +(Optional, <>) +include::{docdir}/ml/ml-shared.asciidoc[tag=query-delay] `script_fields`:: - (Optional, object) Specifies scripts that evaluate custom expressions and - returns script fields to the {dfeed}. The detector configuration objects in a - job can contain functions that use these script fields. For more information, - see <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=script-fields] `scroll_size`:: - (Optional, unsigned integer) The `size` parameter that is used in {es} - searches. The default value is `1000`. - -`max_empty_searches`:: - (Optional, integer) If a real-time {dfeed} has never seen any data (including - during any initial training period) then it will automatically stop itself - and close its associated job after this many real-time searches that return - no documents. In other words, it will stop after `frequency` times - `max_empty_searches` of real-time operation. If not set - then a {dfeed} with no end time that sees no data will remain started until - it is explicitly stopped. The special value `-1` unsets this setting. - -For more information about these properties, see <>. - +(Optional, unsigned integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=scroll-size] [[ml-update-datafeed-example]] ==== {api-examples-title} diff --git a/docs/reference/ml/anomaly-detection/delayed-data-detection.asciidoc b/docs/reference/ml/anomaly-detection/delayed-data-detection.asciidoc index 625f839a8683..53f1756a4ec9 100644 --- a/docs/reference/ml/anomaly-detection/delayed-data-detection.asciidoc +++ b/docs/reference/ml/anomaly-detection/delayed-data-detection.asciidoc @@ -5,14 +5,15 @@ Delayed data are documents that are indexed late. That is to say, it is data related to a time that the {dfeed} has already processed. -When you create a datafeed, you can specify a -{ref}/ml-datafeed-resource.html[`query_delay`] setting. This setting enables the -datafeed to wait for some time past real-time, which means any "late" data in -this period is fully indexed before the datafeed tries to gather it. However, if -the setting is set too low, the datafeed may query for data before it has been -indexed and consequently miss that document. Conversely, if it is set too high, -analysis drifts farther away from real-time. The balance that is struck depends -upon each use case and the environmental factors of the cluster. +When you create a {dfeed}, you can specify a +{ref}/ml-put-datafeed.html#ml-put-datafeed-request-body[`query_delay`] setting. +This setting enables the {dfeed} to wait for some time past real-time, which +means any "late" data in this period is fully indexed before the {dfeed} tries +to gather it. However, if the setting is set too low, the {dfeed} may query for +data before it has been indexed and consequently miss that document. Conversely, +if it is set too high, analysis drifts farther away from real-time. The balance +that is struck depends upon each use case and the environmental factors of the +cluster. ==== Why worry about delayed data? @@ -28,8 +29,7 @@ recorded so that you can determine a next course of action. ==== How do we detect delayed data? -In addition to the `query_delay` field, there is a -{ref}/ml-datafeed-resource.html#ml-datafeed-delayed-data-check-config[delayed data check config], +In addition to the `query_delay` field, there is a delayed data check config, which enables you to configure the datafeed to look in the past for delayed data. Every 15 minutes or every `check_window`, whichever is smaller, the datafeed triggers a document search over the configured indices. This search looks over a diff --git a/docs/reference/ml/df-analytics/apis/dfanalyticsresources.asciidoc b/docs/reference/ml/df-analytics/apis/dfanalyticsresources.asciidoc index e8ee463c66af..111953b8321a 100644 --- a/docs/reference/ml/df-analytics/apis/dfanalyticsresources.asciidoc +++ b/docs/reference/ml/df-analytics/apis/dfanalyticsresources.asciidoc @@ -204,6 +204,8 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=prediction_field_name] include::{docdir}/ml/ml-shared.asciidoc[tag=training_percent] +include::{docdir}/ml/ml-shared.asciidoc[tag=randomize_seed] + [float] [[regression-resources-advanced]] @@ -252,6 +254,8 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=prediction_field_name] include::{docdir}/ml/ml-shared.asciidoc[tag=training_percent] +include::{docdir}/ml/ml-shared.asciidoc[tag=randomize_seed] + [float] [[classification-resources-advanced]] diff --git a/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc index 5b0987e41c4b..123eb6633e37 100644 --- a/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc +++ b/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc @@ -397,7 +397,8 @@ PUT _ml/data_frame/analytics/student_performance_mathematics_0.3 { "regression": { "dependent_variable": "G3", - "training_percent": 70 <1> + "training_percent": 70, <1> + "randomize_seed": 19673948271 <2> } } } @@ -406,6 +407,7 @@ PUT _ml/data_frame/analytics/student_performance_mathematics_0.3 <1> The `training_percent` defines the percentage of the data set that will be used for training the model. +<2> The `randomize_seed` is the seed used to randomly pick which data is used for training. [[ml-put-dfanalytics-example-c]] diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc index 11e062796afa..f277e6ab2e4a 100644 --- a/docs/reference/ml/ml-shared.asciidoc +++ b/docs/reference/ml/ml-shared.asciidoc @@ -1,3 +1,10 @@ +tag::aggregations[] +If set, the {dfeed} performs aggregation searches. Support for aggregations is +limited and should only be used with low cardinality data. For more information, +see +{stack-ov}/ml-configuring-aggregation.html[Aggregating data for faster performance]. +end::aggregations[] + tag::allow-lazy-open[] Advanced configuration option. Specifies whether this job can open when there is insufficient {ml} node capacity for it to be immediately assigned to a node. The @@ -9,6 +16,21 @@ return an error and the job waits in the `opening` state until sufficient {ml} node capacity is available. end::allow-lazy-open[] +tag::allow-no-datafeeds[] +Specifies what to do when the request: ++ +-- +* Contains wildcard expressions and there are no {dfeeds} that match. +* Contains the `_all` string or no identifiers and there are no matches. +* Contains wildcard expressions and there are only partial matches. + +The default value is `true`, which returns an empty `datafeeds` array when +there are no matches and the subset of results when there are partial matches. +If this parameter is `false`, the request returns a `404` status code when there +are no matches or only partial matches. +-- +end::allow-no-datafeeds[] + tag::allow-no-jobs[] Specifies what to do when the request: + @@ -207,6 +229,22 @@ add them here as <>. end::char-filter[] +tag::chunking-config[] +{dfeeds-cap} might be required to search over long time periods, for several months +or years. This search is split into time chunks in order to ensure the load +on {es} is managed. Chunking configuration controls how the size of these time +chunks are calculated and is an advanced configuration option. +A chunking configuration object has the following properties: + +`mode`::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=mode] + +`time_span`::: +(<>) +include::{docdir}/ml/ml-shared.asciidoc[tag=time-span] +end::chunking-config[] + tag::custom-rules[] An array of custom rule objects, which enable you to customize the way detectors operate. For example, a rule may dictate to the detector conditions under which @@ -301,6 +339,47 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=time-format] -- end::data-description[] +tag::datafeed-id[] +A numerical character string that uniquely identifies the +{dfeed}. This identifier can contain lowercase alphanumeric characters (a-z +and 0-9), hyphens, and underscores. It must start and end with alphanumeric +characters. +end::datafeed-id[] + +tag::datafeed-id-wildcard[] +Identifier for the {dfeed}. It can be a {dfeed} identifier or a wildcard +expression. +end::datafeed-id-wildcard[] + +tag::delayed-data-check-config[] +Specifies whether the {dfeed} checks for missing data and the size of the +window. For example: `{"enabled": true, "check_window": "1h"}`. ++ +-- +The {dfeed} can optionally search over indices that have already been read in +an effort to determine whether any data has subsequently been added to the index. +If missing data is found, it is a good indication that the `query_delay` option +is set too low and the data is being indexed after the {dfeed} has passed that +moment in time. See +{stack-ov}/ml-delayed-data-detection.html[Working with delayed data]. + +This check runs only on real-time {dfeeds}. + +The configuration object has the following properties: + +`enabled`:: +(boolean) Specifies whether the {dfeed} periodically checks for delayed data. +Defaults to `true`. + +`check_window`:: +(<>) The window of time that is searched for late data. +This window of time ends with the latest finalized bucket. It defaults to +`null`, which causes an appropriate `check_window` to be calculated when the +real-time {dfeed} runs. In particular, the default `check_window` span +calculation is based on the maximum of `2h` or `8 * bucket_span`. +-- +end::delayed-data-check-config[] + tag::dependent_variable[] `dependent_variable`:: (Required, string) Defines which field of the document is to be predicted. @@ -405,6 +484,13 @@ optional. If it is not specified, no token filters are applied prior to categorization. end::filter[] +tag::frequency[] +The interval at which scheduled queries are made while the {dfeed} runs in real +time. The default value is either the bucket span for short bucket spans, or, +for longer bucket spans, a sensible fraction of the bucket span. For example: +`150s`. +end::frequency[] + tag::function[] The analysis function that is used. For example, `count`, `rare`, `mean`, `min`, `max`, and `sum`. For more information, see @@ -424,6 +510,17 @@ tag::groups[] A list of job groups. A job can belong to no groups or many. end::groups[] +tag::indices[] +An array of index names. Wildcards are supported. For example: +`["it_ops_metrics", "server*"]`. ++ +-- +NOTE: If any indices are in remote clusters then `cluster.remote.connect` must +not be set to `false` on any {ml} nodes. + +-- +end::indices[] + tag::influencers[] A comma separated list of influencer field names. Typically these can be the by, over, or partition fields that are used in the detector configuration. You might @@ -475,6 +572,10 @@ alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start and end with alphanumeric characters. end::job-id-data-frame-analytics-define[] +tag::job-id-datafeed[] +The unique identifier for the job to which the {dfeed} sends data. +end::job-id-datafeed[] + tag::jobs-stats-anomaly-detection[] An array of {anomaly-job} statistics objects. For more information, see <>. @@ -502,12 +603,32 @@ the <> API. -- end::latency[] +tag::max-empty-searches[] +If a real-time {dfeed} has never seen any data (including during any initial +training period) then it will automatically stop itself and close its associated +job after this many real-time searches that return no documents. In other words, +it will stop after `frequency` times `max_empty_searches` of real-time operation. +If not set then a {dfeed} with no end time that sees no data will remain started +until it is explicitly stopped. By default this setting is not set. +end::max-empty-searches[] + tag::maximum_number_trees[] `maximum_number_trees`:: (Optional, integer) Defines the maximum number of trees the forest is allowed to contain. The maximum value is 2000. end::maximum_number_trees[] +tag::mode[] +There are three available modes: ++ +-- +* `auto`: The chunk size is dynamically calculated. This is the default and +recommended value. +* `manual`: Chunking is applied according to the specified `time_span`. +* `off`: No chunking is applied. +-- +end::mode[] + tag::model-memory-limit[] The approximate maximum amount of memory resources that are required for analytical processing. Once this limit is approached, data pruning becomes @@ -615,6 +736,21 @@ tag::prediction_field_name[] Defaults to `_prediction`. end::prediction_field_name[] +tag::query[] +The {es} query domain-specific language (DSL). This value corresponds to the +query object in an {es} search POST body. All the options that are supported by +{es} can be used, as this object is passed verbatim to {es}. By default, this +property has the following value: `{"match_all": {"boost": 1}}`. +end::query[] + +tag::query-delay[] +The number of seconds behind real time that data is queried. For example, if +data from 10:04 a.m. might not be searchable in {es} until 10:06 a.m., set this +property to 120 seconds. The default value is randomly selected between `60s` +and `120s`. This randomness improves the query performance when there are +multiple jobs running on the same node. +end::query-delay[] + tag::renormalization-window-days[] Advanced configuration option. The period over which adjustments to the score are applied, as new data is seen. The default value is the longer of 30 days or @@ -633,6 +769,18 @@ are deleted from {es}. The default value is null, which means results are retained. end::results-retention-days[] +tag::script-fields[] +Specifies scripts that evaluate custom expressions and returns script fields to +the {dfeed}. The detector configuration objects in a job can contain functions +that use these script fields. For more information, see +{stack-ov}/ml-configuring-transform.html[Transforming data with script fields] +and <>. +end::script-fields[] + +tag::scroll-size[] +The `size` parameter that is used in {es} searches. The default value is `1000`. +end::scroll-size[] + tag::summary-count-field-name[] If this property is specified, the data that is fed to the job is expected to be pre-summarized. This property value is the name of the field that contains the @@ -663,6 +811,11 @@ job creation fails. -- end::time-format[] +tag::time-span[] +The time span that each search will be querying. This setting is only applicable +when the mode is set to `manual`. For example: `3h`. +end::time-span[] + tag::tokenizer[] The name or definition of the <> to use after character filters are applied. This property is compulsory if @@ -681,6 +834,15 @@ those that contain arrays) won’t be included in the calculation for used percentage. Defaults to `100`. end::training_percent[] +tag::randomize_seed[] +`randomize_seed`:: +(Optional, long) Defines the seed to the random generator that is used to pick +which documents will be used for training. By default it is randomly generated. +Set it to a specific value to ensure the same documents are used for training +assuming other related parameters (e.g. `source`, `analyzed_fields`, etc.) are the same. +end::randomize_seed[] + + tag::use-null[] Defines whether a new series is used as the null series when there is no value for the by or partition fields. The default value is `false`. diff --git a/docs/reference/modules/indices/query_cache.asciidoc b/docs/reference/modules/indices/query_cache.asciidoc index aaa1ab174284..b61c2a6eee04 100644 --- a/docs/reference/modules/indices/query_cache.asciidoc +++ b/docs/reference/modules/indices/query_cache.asciidoc @@ -18,7 +18,8 @@ the cluster: either a percentage value, like `5%`, or an exact value, like `512mb`. The following setting is an _index_ setting that can be configured on a -per-index basis: +per-index basis. Can only be set at index creation time or on a +<>: `index.queries.cache.enabled`:: diff --git a/docs/reference/query-dsl/function-score-query.asciidoc b/docs/reference/query-dsl/function-score-query.asciidoc index 371ba5e63814..c4924ebd3672 100644 --- a/docs/reference/query-dsl/function-score-query.asciidoc +++ b/docs/reference/query-dsl/function-score-query.asciidoc @@ -415,7 +415,7 @@ GET /_search `offset`:: If an `offset` is defined, the decay function will only compute the - decay function for documents with a distance greater that the defined + decay function for documents with a distance greater than the defined `offset`. The default is 0. `decay`:: diff --git a/docs/reference/redirects.asciidoc b/docs/reference/redirects.asciidoc index 26694e9aa05a..afc10a4493b1 100644 --- a/docs/reference/redirects.asciidoc +++ b/docs/reference/redirects.asciidoc @@ -1054,4 +1054,15 @@ This page was deleted. [[ml-analysisconfig]] See the details in [[ml-apimodelplotconfig]] -<>, <>, and <>. \ No newline at end of file +<>, <>, and <>. + +[role="exclude",id="ml-datafeed-resource"] +=== {dfeed-cap} resources + +This page was deleted. +[[ml-datafeed-chunking-config]] +See the details in <>, <>, +[[ml-datafeed-delayed-data-check-config]] +<>, +[[ml-datafeed-counts]] +<>. \ No newline at end of file diff --git a/docs/reference/rest-api/defs.asciidoc b/docs/reference/rest-api/defs.asciidoc index ec1a5a0e4154..8bdf35e62f11 100644 --- a/docs/reference/rest-api/defs.asciidoc +++ b/docs/reference/rest-api/defs.asciidoc @@ -5,8 +5,6 @@ These resource definitions are used in APIs related to {ml-features} and {security-features} and in {kib} advanced {ml} job configuration options. -* <> -* <> * <> * <> * <> @@ -15,7 +13,6 @@ These resource definitions are used in APIs related to {ml-features} and * <> * <> -include::{es-repo-dir}/ml/anomaly-detection/apis/datafeedresource.asciidoc[] include::{es-repo-dir}/ml/df-analytics/apis/dfanalyticsresources.asciidoc[] include::{es-repo-dir}/ml/df-analytics/apis/evaluateresources.asciidoc[] include::{es-repo-dir}/ml/anomaly-detection/apis/jobcounts.asciidoc[] diff --git a/docs/reference/search/explain.asciidoc b/docs/reference/search/explain.asciidoc index a9d431e70228..91654f32adcc 100644 --- a/docs/reference/search/explain.asciidoc +++ b/docs/reference/search/explain.asciidoc @@ -106,12 +106,12 @@ The API returns the following response: "_id":"0", "matched":true, "explanation":{ - "value":1.6943597, + "value":1.6943598, "description":"weight(message:elasticsearch in 0) [PerFieldSimilarity], result of:", "details":[ { - "value":1.6943597, - "description":"score(freq=1.0), product of:", + "value":1.6943598, + "description":"score(freq=1.0), computed as boost * idf * tf from:", "details":[ { "value":2.2, @@ -135,7 +135,7 @@ The API returns the following response: ] }, { - "value":0.5555555, + "value":0.5555556, "description":"tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:", "details":[ { diff --git a/docs/reference/search/request-body.asciidoc b/docs/reference/search/request-body.asciidoc index d236a83c8eac..c3a9fe71e16b 100644 --- a/docs/reference/search/request-body.asciidoc +++ b/docs/reference/search/request-body.asciidoc @@ -118,12 +118,12 @@ The API returns the following response: "value": 1, "relation": "eq" }, - "max_score": 1.3862944, + "max_score": 1.3862942, "hits" : [ { "_index" : "twitter", "_id" : "0", - "_score": 1.3862944, + "_score": 1.3862942, "_source" : { "user" : "kimchy", "message": "trying out Elasticsearch", diff --git a/docs/reference/search/request/highlighting.asciidoc b/docs/reference/search/request/highlighting.asciidoc index e8171d43b17f..cb9b84ad3378 100644 --- a/docs/reference/search/request/highlighting.asciidoc +++ b/docs/reference/search/request/highlighting.asciidoc @@ -840,12 +840,12 @@ Response: "value": 1, "relation": "eq" }, - "max_score": 1.601195, + "max_score": 1.6011951, "hits": [ { "_index": "twitter", "_id": "1", - "_score": 1.601195, + "_score": 1.6011951, "_source": { "user": "test", "message": "some message with the number 1", @@ -897,12 +897,12 @@ Response: "value": 1, "relation": "eq" }, - "max_score": 1.601195, + "max_score": 1.6011951, "hits": [ { "_index": "twitter", "_id": "1", - "_score": 1.601195, + "_score": 1.6011951, "_source": { "user": "test", "message": "some message with the number 1", diff --git a/docs/reference/search/request/inner-hits.asciidoc b/docs/reference/search/request/inner-hits.asciidoc index b356c2cfc2d7..53ae303e484a 100644 --- a/docs/reference/search/request/inner-hits.asciidoc +++ b/docs/reference/search/request/inner-hits.asciidoc @@ -379,12 +379,12 @@ Which would look like: "value": 1, "relation": "eq" }, - "max_score": 0.6931472, + "max_score": 0.6931471, "hits": [ { "_index": "test", "_id": "1", - "_score": 0.6931472, + "_score": 0.6931471, "_source": ..., "inner_hits": { "comments.votes": { <1> @@ -393,7 +393,7 @@ Which would look like: "value": 1, "relation": "eq" }, - "max_score": 0.6931472, + "max_score": 0.6931471, "hits": [ { "_index": "test", @@ -406,7 +406,7 @@ Which would look like: "offset": 0 } }, - "_score": 0.6931472, + "_score": 0.6931471, "_source": { "value": 1, "voter": "kimchy" diff --git a/docs/reference/search/search.asciidoc b/docs/reference/search/search.asciidoc index 85c97f6c74d5..ccec2345a0e5 100644 --- a/docs/reference/search/search.asciidoc +++ b/docs/reference/search/search.asciidoc @@ -360,12 +360,12 @@ The API returns the following response: "value" : 1, "relation" : "eq" }, - "max_score" : 1.3862944, + "max_score" : 1.3862942, "hits" : [ { "_index" : "twitter", "_id" : "0", - "_score" : 1.3862944, + "_score" : 1.3862942, "_source" : { "date" : "2009-11-15T14:12:12", "likes" : 0, diff --git a/docs/reference/search/uri-request.asciidoc b/docs/reference/search/uri-request.asciidoc index ff234f415a3d..695c4a6ada11 100644 --- a/docs/reference/search/uri-request.asciidoc +++ b/docs/reference/search/uri-request.asciidoc @@ -134,12 +134,12 @@ The API returns the following response: "value": 1, "relation": "eq" }, - "max_score": 1.3862944, + "max_score": 1.3862942, "hits" : [ { "_index" : "twitter", "_id" : "0", - "_score": 1.3862944, + "_score": 1.3862942, "_source" : { "user" : "kimchy", "date" : "2009-11-15T14:12:12", diff --git a/docs/reference/settings/security-settings.asciidoc b/docs/reference/settings/security-settings.asciidoc index 6fb5084b94a9..e5fc39ea9036 100644 --- a/docs/reference/settings/security-settings.asciidoc +++ b/docs/reference/settings/security-settings.asciidoc @@ -188,7 +188,7 @@ namespace in `elasticsearch.yml`. For example: ---------------------------------------- xpack.security.authc.realms: - native.realm1: + native.realm1: <1> order: 0 ... @@ -201,6 +201,9 @@ xpack.security.authc.realms: ... ... ---------------------------------------- +<1> Specifies the type of realm (for example, `native`, `ldap`, +`active_directory`, `pki`, `file`, `kerberos`, `saml`) and the realm name. This +information is required. The valid settings vary depending on the realm type. For more information, see <>. @@ -209,9 +212,6 @@ information, see <>. [[ref-realm-settings]] ===== Settings valid for all realms -`type`:: -The type of the realm: `native`, `ldap`, `active_directory`, `pki`, or `file`. Required. - `order`:: The priority of the realm within the realm chain. Realms with a lower order are consulted first. Although not required, use of this setting is strongly diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java index 51a4f86a0d3b..20fde0891b6f 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java @@ -819,7 +819,7 @@ private void unknownValue(Object value, boolean ensureNoSelfReferences) throws I } else if (value instanceof Map) { @SuppressWarnings("unchecked") final Map valueMap = (Map) value; - map(valueMap, ensureNoSelfReferences); + map(valueMap, ensureNoSelfReferences, true); } else if (value instanceof Iterable) { value((Iterable) value, ensureNoSelfReferences); } else if (value instanceof Object[]) { @@ -867,10 +867,15 @@ public XContentBuilder field(String name, Map values) throws IOE } public XContentBuilder map(Map values) throws IOException { - return map(values, true); + return map(values, true, true); } - private XContentBuilder map(Map values, boolean ensureNoSelfReferences) throws IOException { + /** writes a map without the start object and end object headers */ + public XContentBuilder mapContents(Map values) throws IOException { + return map(values, true, false); + } + + private XContentBuilder map(Map values, boolean ensureNoSelfReferences, boolean writeStartAndEndHeaders) throws IOException { if (values == null) { return nullValue(); } @@ -881,13 +886,17 @@ private XContentBuilder map(Map values, boolean ensureNoSelfReference ensureNoSelfReferences(values); } - startObject(); + if (writeStartAndEndHeaders) { + startObject(); + } for (Map.Entry value : values.entrySet()) { field(value.getKey()); // pass ensureNoSelfReferences=false as we already performed the check at a higher level unknownValue(value.getValue(), false); } - endObject(); + if (writeStartAndEndHeaders) { + endObject(); + } return this; } diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvParser.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvParser.java new file mode 100644 index 000000000000..077d12684e9a --- /dev/null +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvParser.java @@ -0,0 +1,206 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.ingest.common; + +import org.elasticsearch.ingest.IngestDocument; + +final class CsvParser { + + private static final char LF = '\n'; + private static final char CR = '\r'; + private static final char SPACE = ' '; + private static final char TAB = '\t'; + + private enum State { + START, UNQUOTED, QUOTED, QUOTED_END + } + + private final char quote; + private final char separator; + private final boolean trim; + private final String[] headers; + private final IngestDocument ingestDocument; + private final StringBuilder builder = new StringBuilder(); + private State state = State.START; + private String line; + private int currentHeader = 0; + private int startIndex = 0; + private int length; + private int currentIndex; + + CsvParser(IngestDocument ingestDocument, char quote, char separator, boolean trim, String[] headers) { + this.ingestDocument = ingestDocument; + this.quote = quote; + this.separator = separator; + this.trim = trim; + this.headers = headers; + } + + void process(String line) { + this.line = line; + length = line.length(); + for (currentIndex = 0; currentIndex < length; currentIndex++) { + switch (state) { + case START: + if (processStart()) { + return; + } + break; + case UNQUOTED: + if (processUnquoted()) { + return; + } + break; + case QUOTED: + processQuoted(); + break; + case QUOTED_END: + if (processQuotedEnd()) { + return; + } + break; + } + } + + //we've reached end of string, we need to handle last field + switch (state) { + case UNQUOTED: + setField(length); + break; + case QUOTED_END: + setField(length - 1); + break; + case QUOTED: + throw new IllegalArgumentException("Unmatched quote"); + } + } + + private boolean processStart() { + for (; currentIndex < length; currentIndex++) { + char c = currentChar(); + if (c == quote) { + state = State.QUOTED; + builder.setLength(0); + startIndex = currentIndex + 1; + return false; + } else if (c == separator) { + startIndex++; + if (nextHeader()) { + return true; + } + } else if (isWhitespace(c)) { + if (trim) { + startIndex++; + } + } else { + state = State.UNQUOTED; + builder.setLength(0); + return false; + } + } + return true; + } + + private boolean processUnquoted() { + int spaceCount = 0; + for (; currentIndex < length; currentIndex++) { + char c = currentChar(); + if (c == LF || c == CR || c == quote) { + throw new IllegalArgumentException("Illegal character inside unquoted field at " + currentIndex); + } else if (trim && isWhitespace(c)) { + spaceCount++; + } else if (c == separator) { + state = State.START; + if (setField(currentIndex - spaceCount)) { + return true; + } + startIndex = currentIndex + 1; + return false; + } else { + spaceCount = 0; + } + } + return false; + } + + private void processQuoted() { + for (; currentIndex < length; currentIndex++) { + if (currentChar() == quote) { + state = State.QUOTED_END; + break; + } + } + } + + private boolean processQuotedEnd() { + char c = currentChar(); + if (c == quote) { + builder.append(line, startIndex, currentIndex - 1).append(quote); + startIndex = currentIndex + 1; + state = State.QUOTED; + return false; + } + boolean shouldSetField = true; + for (; currentIndex < length; currentIndex++) { + c = currentChar(); + if (isWhitespace(c)) { + if (shouldSetField) { + if (setField(currentIndex - 1)) { + return true; + } + shouldSetField = false; + } + } else if (c == separator) { + if (shouldSetField && setField(currentIndex - 1)) { + return true; + } + startIndex = currentIndex + 1; + state = State.START; + return false; + } else { + throw new IllegalArgumentException("character '" + c + "' after quoted field at " + currentIndex); + } + } + return true; + } + + private char currentChar() { + return line.charAt(currentIndex); + } + + private boolean isWhitespace(char c) { + return c == SPACE || c == TAB; + } + + private boolean setField(int endIndex) { + if (builder.length() == 0) { + ingestDocument.setFieldValue(headers[currentHeader], line.substring(startIndex, endIndex)); + } else { + builder.append(line, startIndex, endIndex); + ingestDocument.setFieldValue(headers[currentHeader], builder.toString()); + } + return nextHeader(); + } + + private boolean nextHeader() { + currentHeader++; + return currentHeader == headers.length; + } +} diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvProcessor.java new file mode 100644 index 000000000000..66d10cc239e4 --- /dev/null +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvProcessor.java @@ -0,0 +1,108 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.ingest.common; + +import org.elasticsearch.ingest.AbstractProcessor; +import org.elasticsearch.ingest.ConfigurationUtils; +import org.elasticsearch.ingest.IngestDocument; + +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; + +/** + * A processor that breaks line from CSV file into separate fields. + * If there's more fields requested than there is in the CSV, extra field will not be present in the document after processing. + * In the same way this processor will skip any field that is empty in CSV. + * + * By default it uses rules according to RCF 4180 with one exception: whitespaces are + * allowed before or after quoted field. Processor can be tweaked with following parameters: + * + * quote: set custom quote character (defaults to ") + * separator: set custom separator (defaults to ,) + * trim: trim leading and trailing whitespaces in unquoted fields + */ +public final class CsvProcessor extends AbstractProcessor { + + public static final String TYPE = "csv"; + + private final String field; + private final String[] headers; + private final boolean trim; + private final char quote; + private final char separator; + private final boolean ignoreMissing; + + CsvProcessor(String tag, String field, String[] headers, boolean trim, char separator, char quote, boolean ignoreMissing) { + super(tag); + this.field = field; + this.headers = headers; + this.trim = trim; + this.quote = quote; + this.separator = separator; + this.ignoreMissing = ignoreMissing; + } + + @Override + public IngestDocument execute(IngestDocument ingestDocument) { + if (headers.length == 0) { + return ingestDocument; + } + + String line = ingestDocument.getFieldValue(field, String.class, ignoreMissing); + if (line == null && ignoreMissing == false) { + return ingestDocument; + } else if (line == null) { + throw new IllegalArgumentException("field [" + field + "] is null, cannot process it."); + } + new CsvParser(ingestDocument, quote, separator, trim, headers).process(line); + return ingestDocument; + } + + @Override + public String getType() { + return TYPE; + } + + public static final class Factory implements org.elasticsearch.ingest.Processor.Factory { + @Override + public CsvProcessor create(Map registry, String processorTag, + Map config) { + String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); + String quote = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "quote", "\""); + if (quote.length() != 1) { + throw newConfigurationException(TYPE, processorTag, "quote", "quote has to be single character like \" or '"); + } + String separator = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator", ","); + if (separator.length() != 1) { + throw newConfigurationException(TYPE, processorTag, "separator", "separator has to be single character like , or ;"); + } + boolean trim = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "trim", false); + boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); + List targetFields = ConfigurationUtils.readList(TYPE, processorTag, config, "target_fields"); + if (targetFields.isEmpty()) { + throw newConfigurationException(TYPE, processorTag, "target_fields", "target fields list can't be empty"); + } + return new CsvProcessor(processorTag, field, targetFields.toArray(String[]::new), trim, separator.charAt(0), quote.charAt(0), + ignoreMissing); + } + } +} diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java index 4f99c850e5bd..b37e5d13e460 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java @@ -88,7 +88,8 @@ public Map getProcessors(Processor.Parameters paramet entry(PipelineProcessor.TYPE, new PipelineProcessor.Factory(parameters.ingestService)), entry(DissectProcessor.TYPE, new DissectProcessor.Factory()), entry(DropProcessor.TYPE, new DropProcessor.Factory()), - entry(HtmlStripProcessor.TYPE, new HtmlStripProcessor.Factory())); + entry(HtmlStripProcessor.TYPE, new HtmlStripProcessor.Factory()), + entry(CsvProcessor.TYPE, new CsvProcessor.Factory())); } @Override diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CsvProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CsvProcessorTests.java new file mode 100644 index 000000000000..87da73cce129 --- /dev/null +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CsvProcessorTests.java @@ -0,0 +1,221 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.ingest.common; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.elasticsearch.ingest.IngestDocument; +import org.elasticsearch.ingest.RandomDocumentPicks; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.stream.Collectors; + +public class CsvProcessorTests extends ESTestCase { + + private static final Character[] SEPARATORS = new Character[]{',', ';', '|', '.'}; + private final String quote; + private char separator; + + + public CsvProcessorTests(@Name("quote") String quote) { + this.quote = quote; + } + + @ParametersFactory + public static Iterable parameters() { + return Arrays.asList(new Object[]{"'"}, new Object[]{"\""}, new Object[]{""}); + } + + @Before + public void setup() { + separator = randomFrom(SEPARATORS); + } + + public void testExactNumberOfFields() throws Exception { + int numItems = randomIntBetween(2, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), randomAlphaOfLengthBetween(5, 10)); + } + String[] headers = items.keySet().toArray(new String[numItems]); + String csv = items.values().stream().map(v -> quote + v + quote).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, csv); + + items.forEach((key, value) -> assertEquals(value, ingestDocument.getFieldValue(key, String.class))); + } + + public void testLessFieldsThanHeaders() throws Exception { + int numItems = randomIntBetween(4, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), randomAlphaOfLengthBetween(5, 10)); + } + String[] headers = items.keySet().toArray(new String[numItems]); + String csv = items.values().stream().map(v -> quote + v + quote).limit(3).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, csv); + + items.keySet().stream().skip(3).forEach(key -> assertFalse(ingestDocument.hasField(key))); + items.entrySet().stream().limit(3).forEach(e -> assertEquals(e.getValue(), ingestDocument.getFieldValue(e.getKey(), String.class))); + } + + public void testLessHeadersThanFields() throws Exception { + int numItems = randomIntBetween(5, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), randomAlphaOfLengthBetween(5, 10)); + } + String[] headers = items.keySet().stream().limit(3).toArray(String[]::new); + String csv = items.values().stream().map(v -> quote + v + quote).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, csv); + + items.entrySet().stream().limit(3).forEach(e -> assertEquals(e.getValue(), ingestDocument.getFieldValue(e.getKey(), String.class))); + } + + public void testSingleField() throws Exception { + String[] headers = new String[]{randomAlphaOfLengthBetween(5, 10)}; + String value = randomAlphaOfLengthBetween(5, 10); + String csv = quote + value + quote; + + IngestDocument ingestDocument = processDocument(headers, csv); + + assertEquals(value, ingestDocument.getFieldValue(headers[0], String.class)); + } + + public void testEscapedQuote() throws Exception { + int numItems = randomIntBetween(2, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), randomAlphaOfLengthBetween(5, 10) + quote + quote + randomAlphaOfLengthBetween(5 + , 10) + quote + quote); + } + String[] headers = items.keySet().toArray(new String[numItems]); + String csv = items.values().stream().map(v -> quote + v + quote).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, csv); + + items.forEach((key, value) -> assertEquals(value.replace(quote + quote, quote), ingestDocument.getFieldValue(key, String.class))); + } + + public void testQuotedStrings() throws Exception { + assumeFalse("quote needed", quote.isEmpty()); + int numItems = randomIntBetween(2, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), + separator + randomAlphaOfLengthBetween(5, 10) + separator + "\n\r" + randomAlphaOfLengthBetween(5, 10)); + } + String[] headers = items.keySet().toArray(new String[numItems]); + String csv = items.values().stream().map(v -> quote + v + quote).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, csv); + + items.forEach((key, value) -> assertEquals(value.replace(quote + quote, quote), ingestDocument.getFieldValue(key, + String.class))); + } + + public void testEmptyFields() throws Exception { + int numItems = randomIntBetween(5, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), randomAlphaOfLengthBetween(5, 10)); + } + String[] headers = items.keySet().toArray(new String[numItems]); + String csv = + items.values().stream().map(v -> quote + v + quote).limit(numItems - 1).skip(3).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, + "" + separator + "" + separator + "" + separator + csv + separator + separator + + "abc"); + + items.keySet().stream().limit(3).forEach(key -> assertFalse(ingestDocument.hasField(key))); + items.entrySet().stream().limit(numItems - 1).skip(3).forEach(e -> assertEquals(e.getValue(), + ingestDocument.getFieldValue(e.getKey(), String.class))); + items.keySet().stream().skip(numItems - 1).forEach(key -> assertFalse(ingestDocument.hasField(key))); + } + + public void testWrongStings() throws Exception { + assumeTrue("single run only", quote.isEmpty()); + expectThrows(IllegalArgumentException.class, () -> processDocument(new String[]{"a"}, "abc\"abc")); + expectThrows(IllegalArgumentException.class, () -> processDocument(new String[]{"a"}, "\"abc\"asd")); + expectThrows(IllegalArgumentException.class, () -> processDocument(new String[]{"a"}, "\"abcasd")); + expectThrows(IllegalArgumentException.class, () -> processDocument(new String[]{"a"}, "abc\nabc")); + expectThrows(IllegalArgumentException.class, () -> processDocument(new String[]{"a"}, "abc\rabc")); + } + + public void testQuotedWhitespaces() throws Exception { + assumeFalse("quote needed", quote.isEmpty()); + IngestDocument document = processDocument(new String[]{"a", "b", "c", "d"}, + " abc " + separator + " def" + separator + "ghi " + separator + " " + quote + " ooo " + quote); + assertEquals("abc", document.getFieldValue("a", String.class)); + assertEquals("def", document.getFieldValue("b", String.class)); + assertEquals("ghi", document.getFieldValue("c", String.class)); + assertEquals(" ooo ", document.getFieldValue("d", String.class)); + } + + public void testUntrimmed() throws Exception { + assumeFalse("quote needed", quote.isEmpty()); + IngestDocument document = processDocument(new String[]{"a", "b", "c", "d", "e", "f"}, + " abc " + separator + " def" + separator + "ghi " + separator + " " + + quote + "ooo" + quote + " " + separator + " " + quote + "jjj" + quote + " ", false); + assertEquals(" abc ", document.getFieldValue("a", String.class)); + assertEquals(" def", document.getFieldValue("b", String.class)); + assertEquals("ghi ", document.getFieldValue("c", String.class)); + assertEquals("ooo", document.getFieldValue("d", String.class)); + assertEquals("jjj", document.getFieldValue("e", String.class)); + assertFalse(document.hasField("f")); + } + + public void testEmptyHeaders() throws Exception { + assumeTrue("single run only", quote.isEmpty()); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "abc,abc"); + HashMap metadata = new HashMap<>(ingestDocument.getSourceAndMetadata()); + + CsvProcessor processor = new CsvProcessor(randomAlphaOfLength(5), fieldName, new String[0], false, ',', '"', false); + + processor.execute(ingestDocument); + + assertEquals(metadata, ingestDocument.getSourceAndMetadata()); + } + + private IngestDocument processDocument(String[] headers, String csv) throws Exception { + return processDocument(headers, csv, true); + } + + private IngestDocument processDocument(String[] headers, String csv, boolean trim) throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + + String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, csv); + char quoteChar = quote.isEmpty() ? '"' : quote.charAt(0); + CsvProcessor processor = new CsvProcessor(randomAlphaOfLength(5), fieldName, headers, trim, separator, quoteChar, false); + + processor.execute(ingestDocument); + + return ingestDocument; + } +} diff --git a/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/250_csv.yml b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/250_csv.yml new file mode 100644 index 000000000000..a38805fb1fec --- /dev/null +++ b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/250_csv.yml @@ -0,0 +1,164 @@ +--- +teardown: + - do: + ingest.delete_pipeline: + id: "my_pipeline" + ignore: 404 + +--- +"Test CSV Processor defaults": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "csv": { + "field": "value", + "target_fields":["a","b","c"] + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: > + { + "value": "aa,bb,cc" + } + + - do: + get: + index: test + id: 1 + - match: { _source.a: "aa" } + - match: { _source.b: "bb" } + - match: { _source.c: "cc" } + +--- +"Test CSV Processor quote and separator": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "csv": { + "field": "value", + "target_fields":["a","b","c","d","e"], + "quote": "'", + "separator": ";" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: > + { + "value": "'aa';'b;b';'cc';d,d;'ee''ee'" + } + + - do: + get: + index: test + id: 1 + - match: { _source.a: "aa" } + - match: { _source.b: "b;b" } + - match: { _source.c: "cc" } + - match: { _source.d: "d,d" } + - match: { _source.e: "ee'ee" } + +--- +"Test CSV Processor trim": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "csv": { + "field": "value", + "target_fields":["a","b","c"], + "trim": true, + "quote": "'" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: > + { + "value": " aa, bb , 'cc'" + } + + - do: + get: + index: test + id: 1 + - match: { _source.a: "aa" } + - match: { _source.b: "bb" } + - match: { _source.c: "cc" } + +--- +"Test CSV Processor trim log": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "csv": { + "field": "value", + "target_fields":["date","level","server","id","msg"], + "trim": true, + "separator": "|" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: > + { + "value": "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |listening on 0.0.0.0:9987, :::9987" + } + + - do: + get: + index: test + id: 1 + - match: { _source.date: "2018-01-06 16:56:14.295748" } + - match: { _source.level: "INFO" } + - match: { _source.server: "VirtualServer" } + - match: { _source.id: "1" } + - match: { _source.msg: "listening on 0.0.0.0:9987, :::9987" } diff --git a/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-662c455.jar.sha1 b/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..1c4c5ce2b62d --- /dev/null +++ b/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +4041db9db7c394584571b45812734732912ef8e2 \ No newline at end of file diff --git a/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-e648d601efb.jar.sha1 b/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 7a75661f63f6..000000000000 --- a/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -43b9178f582373f4fcee61837404c0cc8636043e \ No newline at end of file diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/Walker.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/Walker.java index 53c98f7589ef..9b4c52dad77b 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/Walker.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/Walker.java @@ -109,6 +109,7 @@ import org.elasticsearch.painless.node.AExpression; import org.elasticsearch.painless.node.ANode; import org.elasticsearch.painless.node.AStatement; +import org.elasticsearch.painless.node.DResolvedType; import org.elasticsearch.painless.node.DUnresolvedType; import org.elasticsearch.painless.node.EAssignment; import org.elasticsearch.painless.node.EBinary; @@ -232,6 +233,10 @@ private Location location(ParserRuleContext ctx) { return new Location(sourceName, ctx.getStart().getStartIndex()); } + private Location location(TerminalNode tn) { + return new Location(sourceName, tn.getSymbol().getStartIndex()); + } + @Override public ANode visitSource(SourceContext ctx) { List functions = new ArrayList<>(); @@ -503,7 +508,8 @@ public ANode visitTrap(TrapContext ctx) { String name = ctx.ID().getText(); SBlock block = (SBlock)visit(ctx.block()); - return new SCatch(location(ctx), type, name, block); + return new SCatch(location(ctx), new DResolvedType(location(ctx), Exception.class), + new SDeclaration(location(ctx.TYPE()), new DUnresolvedType(location(ctx.TYPE()), type), name, null), block); } @Override diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/DResolvedType.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/DResolvedType.java index 223b39068673..c1917944f260 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/DResolvedType.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/DResolvedType.java @@ -76,6 +76,6 @@ public Class getType() { @Override public String toString() { - return " (DResolvedType [" + PainlessLookupUtility.typeToCanonicalTypeName(type) + "])"; + return "(DResolvedType [" + PainlessLookupUtility.typeToCanonicalTypeName(type) + "])"; } } diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SCatch.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SCatch.java index ae5e421afa18..9fc6dc29fe21 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SCatch.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SCatch.java @@ -22,10 +22,10 @@ import org.elasticsearch.painless.ClassWriter; import org.elasticsearch.painless.Globals; import org.elasticsearch.painless.Locals; -import org.elasticsearch.painless.Locals.Variable; import org.elasticsearch.painless.Location; import org.elasticsearch.painless.MethodWriter; import org.elasticsearch.painless.ScriptRoot; +import org.elasticsearch.painless.lookup.PainlessLookupUtility; import org.objectweb.asm.Label; import org.objectweb.asm.Opcodes; @@ -37,27 +37,25 @@ */ public final class SCatch extends AStatement { - private final String type; - private final String name; + private final DType baseException; + private final SDeclaration declaration; private final SBlock block; - private Variable variable = null; - Label begin = null; Label end = null; Label exception = null; - public SCatch(Location location, String type, String name, SBlock block) { + public SCatch(Location location, DType baseException, SDeclaration declaration, SBlock block) { super(location); - this.type = Objects.requireNonNull(type); - this.name = Objects.requireNonNull(name); + this.baseException = Objects.requireNonNull(baseException); + this.declaration = Objects.requireNonNull(declaration); this.block = block; } @Override void extractVariables(Set variables) { - variables.add(name); + declaration.extractVariables(variables); if (block != null) { block.extractVariables(variables); @@ -66,18 +64,17 @@ void extractVariables(Set variables) { @Override void analyze(ScriptRoot scriptRoot, Locals locals) { - Class clazz = scriptRoot.getPainlessLookup().canonicalTypeNameToType(this.type); + declaration.analyze(scriptRoot, locals); - if (clazz == null) { - throw createError(new IllegalArgumentException("Not a type [" + this.type + "].")); - } + Class baseType = baseException.resolveType(scriptRoot.getPainlessLookup()).getType(); + Class type = declaration.variable.clazz; - if (!Exception.class.isAssignableFrom(clazz)) { - throw createError(new ClassCastException("Not an exception type [" + this.type + "].")); + if (baseType.isAssignableFrom(type) == false) { + throw createError(new ClassCastException( + "cannot cast from [" + PainlessLookupUtility.typeToCanonicalTypeName(type) + "] " + + "to [" + PainlessLookupUtility.typeToCanonicalTypeName(baseType) + "]")); } - variable = locals.addVariable(location, clazz, name, true); - if (block != null) { block.lastSource = lastSource; block.inLoop = inLoop; @@ -100,7 +97,8 @@ void write(ClassWriter classWriter, MethodWriter methodWriter, Globals globals) Label jump = new Label(); methodWriter.mark(jump); - methodWriter.visitVarInsn(MethodWriter.getType(variable.clazz).getOpcode(Opcodes.ISTORE), variable.getSlot()); + methodWriter.visitVarInsn( + MethodWriter.getType(declaration.variable.clazz).getOpcode(Opcodes.ISTORE), declaration.variable.getSlot()); if (block != null) { block.continu = continu; @@ -108,7 +106,7 @@ void write(ClassWriter classWriter, MethodWriter methodWriter, Globals globals) block.write(classWriter, methodWriter, globals); } - methodWriter.visitTryCatchBlock(begin, end, jump, MethodWriter.getType(variable.clazz).getInternalName()); + methodWriter.visitTryCatchBlock(begin, end, jump, MethodWriter.getType(declaration.variable.clazz).getInternalName()); if (exception != null && (block == null || !block.allEscape)) { methodWriter.goTo(exception); @@ -117,6 +115,6 @@ void write(ClassWriter classWriter, MethodWriter methodWriter, Globals globals) @Override public String toString() { - return singleLineToString(type, name, block); + return singleLineToString(baseException, declaration, block); } } diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SDeclaration.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SDeclaration.java index e5d8f1e88117..bcc2036aaffd 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SDeclaration.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SDeclaration.java @@ -40,7 +40,7 @@ public final class SDeclaration extends AStatement { private final String name; private AExpression expression; - private Variable variable = null; + Variable variable = null; public SDeclaration(Location location, DType type, String name, AExpression expression) { super(location); diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/node/NodeToStringTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/node/NodeToStringTests.java index 562b6e1e5e90..60310ab0c4cd 100644 --- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/node/NodeToStringTests.java +++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/node/NodeToStringTests.java @@ -853,7 +853,8 @@ public void testSFunction() { public void testSTryAndSCatch() { assertToString( "(SClass (STry (SBlock (SReturn (ENumeric 1)))\n" - + " (SCatch Exception e (SBlock (SReturn (ENumeric 2))))))", + + " (SCatch (DResolvedType [java.lang.Exception]) (SDeclaration (DUnresolvedType [Exception]) e) " + + "(SBlock (SReturn (ENumeric 2))))))", "try {\n" + " return 1\n" + "} catch (Exception e) {\n" @@ -863,7 +864,8 @@ public void testSTryAndSCatch() { "(SClass (STry (SBlock\n" + " (SDeclBlock (SDeclaration (DUnresolvedType [int]) i (ENumeric 1)))\n" + " (SReturn (ENumeric 1)))\n" - + " (SCatch Exception e (SBlock (SReturn (ENumeric 2))))))", + + " (SCatch (DResolvedType [java.lang.Exception]) (SDeclaration (DUnresolvedType [Exception]) e) " + + "(SBlock (SReturn (ENumeric 2))))))", "try {\n" + " int i = 1;" + " return 1\n" @@ -872,7 +874,7 @@ public void testSTryAndSCatch() { + "}"); assertToString( "(SClass (STry (SBlock (SReturn (ENumeric 1)))\n" - + " (SCatch Exception e (SBlock\n" + + " (SCatch (DResolvedType [java.lang.Exception]) (SDeclaration (DUnresolvedType [Exception]) e) (SBlock\n" + " (SDeclBlock (SDeclaration (DUnresolvedType [int]) i (ENumeric 1)))\n" + " (SReturn (ENumeric 2))))))", "try {\n" @@ -883,8 +885,10 @@ public void testSTryAndSCatch() { + "}"); assertToString( "(SClass (STry (SBlock (SReturn (ENumeric 1)))\n" - + " (SCatch NullPointerException e (SBlock (SReturn (ENumeric 2))))\n" - + " (SCatch Exception e (SBlock (SReturn (ENumeric 3))))))", + + " (SCatch (DResolvedType [java.lang.Exception]) (SDeclaration (DUnresolvedType [NullPointerException]) e) " + + "(SBlock (SReturn (ENumeric 2))))\n" + + " (SCatch (DResolvedType [java.lang.Exception]) (SDeclaration (DUnresolvedType [Exception]) e) " + + "(SBlock (SReturn (ENumeric 3))))))", "try {\n" + " return 1\n" + "} catch (NullPointerException e) {\n" diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java index 362c8870f652..f08600cdfd0e 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java @@ -232,7 +232,7 @@ private static Result handleConjunction(List conjunctionsWithUnknowns) { List conjunctions = conjunctionsWithUnknowns.stream().filter(r -> r.isUnknown() == false).collect(Collectors.toList()); if (conjunctions.isEmpty()) { if (conjunctionsWithUnknowns.isEmpty()) { - throw new IllegalArgumentException("Must have at least on conjunction sub result"); + throw new IllegalArgumentException("Must have at least one conjunction sub result"); } return conjunctionsWithUnknowns.get(0); // all conjunctions are unknown, so just return the first one } @@ -247,47 +247,53 @@ private static Result handleConjunction(List conjunctionsWithUnknowns) { int msm = 0; boolean verified = conjunctionsWithUnknowns.size() == conjunctions.size(); boolean matchAllDocs = true; - boolean hasDuplicateTerms = false; Set extractions = new HashSet<>(); Set seenRangeFields = new HashSet<>(); for (Result result : conjunctions) { - // In case that there are duplicate query extractions we need to be careful with - // incrementing msm, - // because that could lead to valid matches not becoming candidate matches: - // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3) - // doc: field: val1 val2 val3 - // So lets be protective and decrease the msm: + int resultMsm = result.minimumShouldMatch; for (QueryExtraction queryExtraction : result.extractions) { if (queryExtraction.range != null) { // In case of range queries each extraction does not simply increment the - // minimum_should_match - // for that percolator query like for a term based extraction, so that can lead - // to more false - // positives for percolator queries with range queries than term based queries. - // The is because the way number fields are extracted from the document to be - // percolated. - // Per field a single range is extracted and if a percolator query has two or - // more range queries - // on the same field, then the minimum should match can be higher than clauses - // in the CoveringQuery. - // Therefore right now the minimum should match is incremented once per number - // field when processing - // the percolator query at index time. - if (seenRangeFields.add(queryExtraction.range.fieldName)) { - resultMsm = 1; - } else { - resultMsm = 0; + // minimum_should_match for that percolator query like for a term based extraction, + // so that can lead to more false positives for percolator queries with range queries + // than term based queries. + // This is because the way number fields are extracted from the document to be + // percolated. Per field a single range is extracted and if a percolator query has two or + // more range queries on the same field, then the minimum should match can be higher than clauses + // in the CoveringQuery. Therefore right now the minimum should match is only incremented once per + // number field when processing the percolator query at index time. + // For multiple ranges within a single extraction (ie from an existing conjunction or disjunction) + // then this will already have been taken care of, so we only check against fieldnames from + // previously processed extractions, and don't add to the seenRangeFields list until all + // extractions from this result are processed + if (seenRangeFields.contains(queryExtraction.range.fieldName)) { + resultMsm = Math.max(0, resultMsm - 1); + verified = false; } } - - if (extractions.contains(queryExtraction)) { - resultMsm = Math.max(0, resultMsm - 1); - verified = false; + else { + // In case that there are duplicate term query extractions we need to be careful with + // incrementing msm, because that could lead to valid matches not becoming candidate matches: + // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3) + // doc: field: val1 val2 val3 + // So lets be protective and decrease the msm: + if (extractions.contains(queryExtraction)) { + resultMsm = Math.max(0, resultMsm - 1); + verified = false; + } } } msm += resultMsm; + // add range fields from this Result to the seenRangeFields set so that minimumShouldMatch is correctly + // calculated for subsequent Results + result.extractions.stream() + .map(e -> e.range) + .filter(Objects::nonNull) + .map(e -> e.fieldName) + .forEach(seenRangeFields::add); + if (result.verified == false // If some inner extractions are optional, the result can't be verified || result.minimumShouldMatch < result.extractions.size()) { @@ -299,7 +305,7 @@ private static Result handleConjunction(List conjunctionsWithUnknowns) { if (matchAllDocs) { return new Result(matchAllDocs, verified); } else { - return new Result(verified, extractions, hasDuplicateTerms ? 1 : msm); + return new Result(verified, extractions, msm); } } diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java index 91c815c40322..1c00d0555b41 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java @@ -78,6 +78,7 @@ import static org.elasticsearch.percolator.QueryAnalyzer.analyze; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; +import static org.hamcrest.collection.IsCollectionWithSize.hasSize; public class QueryAnalyzerTests extends ESTestCase { @@ -1208,4 +1209,135 @@ public void testIntervalQueries() { assertTermsEqual(result.extractions, new Term("field", "a")); } + public void testCombinedRangeAndTermWithMinimumShouldMatch() { + + Query disj = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 0, 10), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(); + + Result r = analyze(disj, Version.CURRENT); + assertThat(r.minimumShouldMatch, equalTo(1)); + assertThat(r.extractions, hasSize(2)); + assertFalse(r.matchAllDocs); + assertFalse(r.verified); + + Query q = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 0, 10), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.FILTER) + .setMinimumNumberShouldMatch(2) + .build(); + + Result result = analyze(q, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.extractions.size(), equalTo(2)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + q = new BooleanQuery.Builder() + .add(q, Occur.MUST) + .add(q, Occur.MUST) + .build(); + + result = analyze(q, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.extractions.size(), equalTo(2)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + Query q2 = new BooleanQuery.Builder() + .add(new TermQuery(new Term("f", "v1")), Occur.FILTER) + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .setMinimumNumberShouldMatch(1) + .build(); + + result = analyze(q2, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(3)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple range queries on different fields + Query q3 = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.SHOULD) + .add(IntPoint.newRangeQuery("i2", 15, 20), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .setMinimumNumberShouldMatch(1) + .build(); + result = analyze(q3, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(4)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple disjoint range queries on the same field + Query q4 = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.SHOULD) + .add(IntPoint.newRangeQuery("i", 25, 30), Occur.SHOULD) + .add(IntPoint.newRangeQuery("i", 35, 40), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .setMinimumNumberShouldMatch(1) + .build(); + result = analyze(q4, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(5)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple conjunction range queries on the same field + Query q5 = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.MUST) + .add(IntPoint.newRangeQuery("i", 25, 30), Occur.MUST) + .build(), Occur.MUST) + .add(IntPoint.newRangeQuery("i", 35, 40), Occur.MUST) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .build(); + result = analyze(q5, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(4)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple conjunction range queries on different fields + Query q6 = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.MUST) + .add(IntPoint.newRangeQuery("i2", 25, 30), Occur.MUST) + .build(), Occur.MUST) + .add(IntPoint.newRangeQuery("i", 35, 40), Occur.MUST) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .build(); + result = analyze(q6, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(3)); + assertThat(result.extractions, hasSize(4)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // mixed term and range conjunctions + Query q7 = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 1, 2), Occur.MUST) + .add(new TermQuery(new Term("f", "1")), Occur.MUST) + .build(), Occur.MUST) + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 1, 2), Occur.MUST) + .add(new TermQuery(new Term("f", "2")), Occur.MUST) + .build(), Occur.MUST) + .build(); + result = analyze(q7, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(3)); + assertThat(result.extractions, hasSize(3)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + } + } diff --git a/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..0fc96bc500ef --- /dev/null +++ b/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +d5bddd6b7660439e29bbce26ded283931c756d75 \ No newline at end of file diff --git a/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 2765cfafb052..000000000000 --- a/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8ee342fa6e6306e56b583251639a661250fada46 \ No newline at end of file diff --git a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..388bc9748b7f --- /dev/null +++ b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +4303858c346c51bbbc68c32eb25f7f372b09331c \ No newline at end of file diff --git a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index f653bf5c3b5d..000000000000 --- a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -7e31f2a38d1434eb50781efc65b0e028f08d7821 \ No newline at end of file diff --git a/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..07ff7fd907a2 --- /dev/null +++ b/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +b1a9182ed1b92a121c1587fe9710aa7a41f3f77a \ No newline at end of file diff --git a/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 2c3ee0313a9c..000000000000 --- a/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -9079d81a8ea2c7190ef09ca06a987d1cab2fdf17 \ No newline at end of file diff --git a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..95e603ec1888 --- /dev/null +++ b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +4df747b25286baecf5e790bf76bc40038c059691 \ No newline at end of file diff --git a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 5de2626b6ad2..000000000000 --- a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -f253f59d4e8bb6e55eb307b011ddb81ba0ebab92 \ No newline at end of file diff --git a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..4eaf91f30839 --- /dev/null +++ b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +88d3f8f9134b95884f3b80280b09aa2513b71297 \ No newline at end of file diff --git a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index fcb579806bfe..000000000000 --- a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -36547378493e6e84f63dc744df8d414cb2add1a4 \ No newline at end of file diff --git a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..e28b8d87cd55 --- /dev/null +++ b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +9ddccf575ee03a1329c8d1eb2e4ee7a6e3f3f56f \ No newline at end of file diff --git a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index d26f99ab24e7..000000000000 --- a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8b15a376efa7d4289b697144f34a819a9f8772f1 \ No newline at end of file diff --git a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..1b8ec8c5831c --- /dev/null +++ b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +e115e562a42c12a3292fb138607855c1fdfb0772 \ No newline at end of file diff --git a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 43a7650c70d7..000000000000 --- a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -d1bc4170e6981ca9af71d7a4ce46a3feb2f7b613 \ No newline at end of file diff --git a/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java b/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java index 7a2c3d780123..d8926b25e2c4 100644 --- a/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java +++ b/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java @@ -26,6 +26,8 @@ import com.sun.net.httpserver.HttpHandler; import fixture.gcs.FakeOAuth2HttpHandler; import fixture.gcs.GoogleCloudStorageHttpHandler; +import org.elasticsearch.action.ActionRunnable; +import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.metadata.RepositoryMetaData; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.SuppressForbidden; @@ -37,7 +39,9 @@ import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.env.Environment; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.blobstore.ESMockAPIBasedRepositoryIntegTestCase; import org.threeten.bp.Duration; @@ -101,6 +105,15 @@ protected Settings nodeSettings(int nodeOrdinal) { return settings.build(); } + public void testDeleteSingleItem() { + final String repoName = createRepository(randomName()); + final RepositoriesService repositoriesService = internalCluster().getMasterNodeInstance(RepositoriesService.class); + final BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(repoName); + PlainActionFuture.get(f -> repository.threadPool().generic().execute(ActionRunnable.run(f, () -> + repository.blobStore().blobContainer(repository.basePath()).deleteBlobsIgnoringIfNotExists(Collections.singletonList("foo")) + ))); + } + public void testChunkSize() { // default chunk size RepositoryMetaData repositoryMetaData = new RepositoryMetaData("repo", GoogleCloudStorageRepository.TYPE, Settings.EMPTY); diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml index e9ba863675df..c4815304e079 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml @@ -1,8 +1,5 @@ --- "Test Index and Search locale dependent mappings / dates": - - skip: - version: " - 6.1.99" - reason: JDK9 only supports this with a special sysproperty added in 6.2.0 - do: indices.create: index: test_index diff --git a/server/licenses/lucene-analyzers-common-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-analyzers-common-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..d6f8049f7b1e --- /dev/null +++ b/server/licenses/lucene-analyzers-common-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +061fb94ab616492721f8868dcaec3fbc989733be \ No newline at end of file diff --git a/server/licenses/lucene-analyzers-common-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-analyzers-common-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index c2ec1128c174..000000000000 --- a/server/licenses/lucene-analyzers-common-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1cb225781b19e758d216987e363b77fa4b041174 \ No newline at end of file diff --git a/server/licenses/lucene-backward-codecs-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-backward-codecs-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..243c4420beab --- /dev/null +++ b/server/licenses/lucene-backward-codecs-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +503f3d516889a99e1c0e2dbdba7bf9cc9900c54c \ No newline at end of file diff --git a/server/licenses/lucene-backward-codecs-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-backward-codecs-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index b6486fb3eeba..000000000000 --- a/server/licenses/lucene-backward-codecs-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -cbbf849e24ef0cc61312579acf6d6c5b72c99cf5 \ No newline at end of file diff --git a/server/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..d1657fccc5ee --- /dev/null +++ b/server/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +8ca36adea0a904ec725d57f509a62652a53ecff8 \ No newline at end of file diff --git a/server/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 4b736046f3ad..000000000000 --- a/server/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -aa74590851b6fcf536976f75448be52f6ca18a4a \ No newline at end of file diff --git a/server/licenses/lucene-grouping-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-grouping-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..f1f0684d9b38 --- /dev/null +++ b/server/licenses/lucene-grouping-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +f176fdcf8fc574f4cb1c549aaa4da0301afd34ba \ No newline at end of file diff --git a/server/licenses/lucene-grouping-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-grouping-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 97a3c7b927b8..000000000000 --- a/server/licenses/lucene-grouping-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1bd113010c183168d79fbc10a6b590fdacc3fa35 \ No newline at end of file diff --git a/server/licenses/lucene-highlighter-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-highlighter-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..a9ad6fb95cb8 --- /dev/null +++ b/server/licenses/lucene-highlighter-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +db5ea7b647309e5d29fa92bcbb6b11286d11436d \ No newline at end of file diff --git a/server/licenses/lucene-highlighter-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-highlighter-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index f2dd654d8d64..000000000000 --- a/server/licenses/lucene-highlighter-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4e44a435e14d12113ca9193182a302677fda155e \ No newline at end of file diff --git a/server/licenses/lucene-join-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-join-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..6ef1d079f63f --- /dev/null +++ b/server/licenses/lucene-join-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +36329bc2ea6a5640d4128206221456656de7bbe2 \ No newline at end of file diff --git a/server/licenses/lucene-join-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-join-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 9e8d72cc13fc..000000000000 --- a/server/licenses/lucene-join-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -eb8eacd015ef81ef2055ada357a92c9751308ef1 \ No newline at end of file diff --git a/server/licenses/lucene-memory-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-memory-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..eeb424851022 --- /dev/null +++ b/server/licenses/lucene-memory-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +083f492781b3d2c1d470bd1439c875ebf74a14eb \ No newline at end of file diff --git a/server/licenses/lucene-memory-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-memory-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index e6048ffd9122..000000000000 --- a/server/licenses/lucene-memory-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4dc565203bb1eab0222c52215891e207e7032209 \ No newline at end of file diff --git a/server/licenses/lucene-misc-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-misc-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..6f5d479c76d6 --- /dev/null +++ b/server/licenses/lucene-misc-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +9cd5ea7bc08d93053ca993bd6fc1c9cd0a1b91fd \ No newline at end of file diff --git a/server/licenses/lucene-misc-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-misc-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 480dcc632907..000000000000 --- a/server/licenses/lucene-misc-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ef596e6d2a7ac9c7dfc6196dad75dc719c81ce85 \ No newline at end of file diff --git a/server/licenses/lucene-queries-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-queries-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..30733a5a5776 --- /dev/null +++ b/server/licenses/lucene-queries-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +89e39f65d1c42b5849ccf3a8e6cc9b3b277c08a6 \ No newline at end of file diff --git a/server/licenses/lucene-queries-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-queries-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 2524672e062b..000000000000 --- a/server/licenses/lucene-queries-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b0c963e68dd71444f09336258c8f63425514426a \ No newline at end of file diff --git a/server/licenses/lucene-queryparser-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-queryparser-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..98b065176a41 --- /dev/null +++ b/server/licenses/lucene-queryparser-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +651f6a0075ee30b814c8b56020d95155424c0e67 \ No newline at end of file diff --git a/server/licenses/lucene-queryparser-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-queryparser-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 4ab7a7fe6f64..000000000000 --- a/server/licenses/lucene-queryparser-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -bfab3e9b0467662a8ff969da215dc4a999b73076 \ No newline at end of file diff --git a/server/licenses/lucene-sandbox-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-sandbox-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..484ce6b5c00f --- /dev/null +++ b/server/licenses/lucene-sandbox-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +935968488cc2bbcd3ced9c254f690e7c90447d9e \ No newline at end of file diff --git a/server/licenses/lucene-sandbox-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-sandbox-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 9361e9252f21..000000000000 --- a/server/licenses/lucene-sandbox-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -dadfc90e4cd032f8a4db5cc1e0bdddecea635edb \ No newline at end of file diff --git a/server/licenses/lucene-spatial-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-spatial-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..1bb42417cb14 --- /dev/null +++ b/server/licenses/lucene-spatial-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +0bbdd0002d8d87e54b5caff6c77a1627bf449d38 \ No newline at end of file diff --git a/server/licenses/lucene-spatial-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-spatial-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index ce5a13ec8d6b..000000000000 --- a/server/licenses/lucene-spatial-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -e72dd79d30781e4d05bc8397ae61d0b51d7ad522 \ No newline at end of file diff --git a/server/licenses/lucene-spatial-extras-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-spatial-extras-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..2bdbd889b445 --- /dev/null +++ b/server/licenses/lucene-spatial-extras-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +255b547571dcec118ff1a0560bb16e259f96b76a \ No newline at end of file diff --git a/server/licenses/lucene-spatial-extras-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-spatial-extras-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 4530b17e84e2..000000000000 --- a/server/licenses/lucene-spatial-extras-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -e6b6dbd0526287f25d98d7fe354d5e290c875b8a \ No newline at end of file diff --git a/server/licenses/lucene-spatial3d-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-spatial3d-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..e7036243119a --- /dev/null +++ b/server/licenses/lucene-spatial3d-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +739af6d9876f6aa7f2a3d46fa3f236a5d6ee3653 \ No newline at end of file diff --git a/server/licenses/lucene-spatial3d-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-spatial3d-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index a96977cf1340..000000000000 --- a/server/licenses/lucene-spatial3d-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6351edfc6dde2aefd8f6d8ef33ae5a6e08f88321 \ No newline at end of file diff --git a/server/licenses/lucene-suggest-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-suggest-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..72c92c101b05 --- /dev/null +++ b/server/licenses/lucene-suggest-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +20fa11a541a7ca3a50caa443a9abf0276b1194ea \ No newline at end of file diff --git a/server/licenses/lucene-suggest-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-suggest-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 090cf9ee734c..000000000000 --- a/server/licenses/lucene-suggest-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -921dd4ab493b9d70a0b1bf7b0fe8a6790b7e8036 \ No newline at end of file diff --git a/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java index 2d35de522b5f..db79122fa3da 100644 --- a/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -31,7 +31,6 @@ import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; @@ -136,7 +135,7 @@ protected FieldHighlighter getFieldHighlighter(String field, Query query, Set highlightFlags = getFlags(field); PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags); - CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags); + LabelledCharArrayMatcher[] automata = getAutomata(field, query, highlightFlags); UHComponents components = new UHComponents(field, fieldMatcher, query, terms, phraseHelper, automata, false , highlightFlags); OffsetSource offsetSource = getOptimizedOffsetSource(components); BreakIterator breakIterator = new SplittingBreakIterator(getBreakIterator(field), diff --git a/server/src/main/java/org/elasticsearch/cli/EnvironmentAwareCommand.java b/server/src/main/java/org/elasticsearch/cli/EnvironmentAwareCommand.java index 6fc3349c7623..1d3a31f0a72d 100644 --- a/server/src/main/java/org/elasticsearch/cli/EnvironmentAwareCommand.java +++ b/server/src/main/java/org/elasticsearch/cli/EnvironmentAwareCommand.java @@ -88,14 +88,19 @@ protected void execute(Terminal terminal, OptionSet options) throws Exception { /** Create an {@link Environment} for the command to use. Overrideable for tests. */ protected Environment createEnv(final Map settings) throws UserException { + return createEnv(Settings.EMPTY, settings); + } + + /** Create an {@link Environment} for the command to use. Overrideable for tests. */ + protected final Environment createEnv(final Settings baseSettings, final Map settings) throws UserException { final String esPathConf = System.getProperty("es.path.conf"); if (esPathConf == null) { throw new UserException(ExitCodes.CONFIG, "the system property [es.path.conf] must be set"); } - return InternalSettingsPreparer.prepareEnvironment(Settings.EMPTY, settings, - getConfigPath(esPathConf), - // HOSTNAME is set by elasticsearch-env and elasticsearch-env.bat so it is always available - () -> System.getenv("HOSTNAME")); + return InternalSettingsPreparer.prepareEnvironment(baseSettings, settings, + getConfigPath(esPathConf), + // HOSTNAME is set by elasticsearch-env and elasticsearch-env.bat so it is always available + () -> System.getenv("HOSTNAME")); } @SuppressForbidden(reason = "need path to construct environment") diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java index a65934c76776..800269520e36 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java @@ -26,7 +26,6 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.cli.EnvironmentAwareCommand; import org.elasticsearch.cli.Terminal; -import org.elasticsearch.cluster.ClusterModule; import org.elasticsearch.cluster.metadata.Manifest; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.common.collect.Tuple; @@ -42,7 +41,6 @@ public abstract class ElasticsearchNodeCommand extends EnvironmentAwareCommand { private static final Logger logger = LogManager.getLogger(ElasticsearchNodeCommand.class); - protected final NamedXContentRegistry namedXContentRegistry; protected static final String DELIMITER = "------------------------------------------------------------------------\n"; static final String STOP_WARNING_MSG = @@ -61,7 +59,6 @@ public abstract class ElasticsearchNodeCommand extends EnvironmentAwareCommand { public ElasticsearchNodeCommand(String description) { super(description); - namedXContentRegistry = new NamedXContentRegistry(ClusterModule.getNamedXWriteables()); } protected void processNodePaths(Terminal terminal, OptionSet options, Environment env) throws IOException { @@ -80,7 +77,7 @@ protected void processNodePaths(Terminal terminal, OptionSet options, Environmen protected Tuple loadMetaData(Terminal terminal, Path[] dataPaths) throws IOException { terminal.println(Terminal.Verbosity.VERBOSE, "Loading manifest file"); - final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, namedXContentRegistry, dataPaths); + final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths); if (manifest == null) { throw new ElasticsearchException(NO_MANIFEST_FILE_FOUND_MSG); @@ -89,8 +86,8 @@ protected Tuple loadMetaData(Terminal terminal, Path[] dataP throw new ElasticsearchException(GLOBAL_GENERATION_MISSING_MSG); } terminal.println(Terminal.Verbosity.VERBOSE, "Loading global metadata file"); - final MetaData metaData = MetaData.FORMAT.loadGeneration(logger, namedXContentRegistry, manifest.getGlobalGeneration(), - dataPaths); + final MetaData metaData = MetaData.FORMAT_PRESERVE_CUSTOMS.loadGeneration( + logger, NamedXContentRegistry.EMPTY, manifest.getGlobalGeneration(), dataPaths); if (metaData == null) { throw new ElasticsearchException(NO_GLOBAL_METADATA_MSG + " [generation = " + manifest.getGlobalGeneration() + "]"); } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java b/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java index c15e832142ea..05bc0116c13c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java @@ -28,6 +28,7 @@ import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeMetaData; import org.elasticsearch.node.Node; @@ -84,7 +85,7 @@ protected boolean validateBeforeLock(Terminal terminal, Environment env) { protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException { terminal.println(Terminal.Verbosity.VERBOSE, "Loading node metadata"); - final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, dataPaths); + final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths); if (nodeMetaData == null) { throw new ElasticsearchException(NO_NODE_METADATA_FOUND_MSG); } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java index d8fd88696e9f..f8e1b48c6dd8 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java @@ -45,7 +45,6 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -1421,8 +1420,6 @@ public void toXContent(XContentBuilder builder, IndexMetaData state) throws IOEx @Override public IndexMetaData fromXContent(XContentParser parser) throws IOException { - assert parser.getXContentRegistry() != NamedXContentRegistry.EMPTY - : "loading index metadata requires a working named xcontent registry"; return Builder.fromXContent(parser); } }; diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaData.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaData.java index ce67e5b72f1d..482d57bed54b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaData.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaData.java @@ -755,7 +755,7 @@ public static Diff readDiffFrom(StreamInput in) throws IOException { } public static MetaData fromXContent(XContentParser parser) throws IOException { - return Builder.fromXContent(parser); + return Builder.fromXContent(parser, false); } @Override @@ -1277,7 +1277,7 @@ public static void toXContent(MetaData metaData, XContentBuilder builder, ToXCon builder.endObject(); } - public static MetaData fromXContent(XContentParser parser) throws IOException { + public static MetaData fromXContent(XContentParser parser, boolean preserveUnknownCustoms) throws IOException { Builder builder = new Builder(); // we might get here after the meta-data element, or on a fresh parser @@ -1327,8 +1327,13 @@ public static MetaData fromXContent(XContentParser parser) throws IOException { Custom custom = parser.namedObject(Custom.class, currentFieldName, null); builder.putCustom(custom.getWriteableName(), custom); } catch (NamedObjectNotFoundException ex) { - logger.warn("Skipping unknown custom object with type {}", currentFieldName); - parser.skipChildren(); + if (preserveUnknownCustoms) { + logger.warn("Adding unknown custom object with type {}", currentFieldName); + builder.putCustom(currentFieldName, new UnknownGatewayOnlyCustom(parser.mapOrdered())); + } else { + logger.warn("Skipping unknown custom object with type {}", currentFieldName); + parser.skipChildren(); + } } } } else if (token.isValue()) { @@ -1349,6 +1354,45 @@ public static MetaData fromXContent(XContentParser parser) throws IOException { } } + public static class UnknownGatewayOnlyCustom implements Custom { + + private final Map contents; + + UnknownGatewayOnlyCustom(Map contents) { + this.contents = contents; + } + + @Override + public EnumSet context() { + return EnumSet.of(MetaData.XContentContext.API, MetaData.XContentContext.GATEWAY); + } + + @Override + public Diff diff(Custom previousState) { + throw new UnsupportedOperationException(); + } + + @Override + public String getWriteableName() { + throw new UnsupportedOperationException(); + } + + @Override + public Version getMinimalSupportedVersion() { + throw new UnsupportedOperationException(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return builder.mapContents(contents); + } + } + private static final ToXContent.Params FORMAT_PARAMS; static { Map params = new HashMap<>(2); @@ -1360,16 +1404,25 @@ public static MetaData fromXContent(XContentParser parser) throws IOException { /** * State format for {@link MetaData} to write to and load from disk */ - public static final MetaDataStateFormat FORMAT = new MetaDataStateFormat(GLOBAL_STATE_FILE_PREFIX) { + public static final MetaDataStateFormat FORMAT = createMetaDataStateFormat(false); - @Override - public void toXContent(XContentBuilder builder, MetaData state) throws IOException { - Builder.toXContent(state, builder, FORMAT_PARAMS); - } + /** + * Special state format for {@link MetaData} to write to and load from disk, preserving unknown customs + */ + public static final MetaDataStateFormat FORMAT_PRESERVE_CUSTOMS = createMetaDataStateFormat(true); - @Override - public MetaData fromXContent(XContentParser parser) throws IOException { - return Builder.fromXContent(parser); - } - }; + private static MetaDataStateFormat createMetaDataStateFormat(boolean preserveUnknownCustoms) { + return new MetaDataStateFormat(GLOBAL_STATE_FILE_PREFIX) { + + @Override + public void toXContent(XContentBuilder builder, MetaData state) throws IOException { + Builder.toXContent(state, builder, FORMAT_PARAMS); + } + + @Override + public MetaData fromXContent(XContentParser parser) throws IOException { + return Builder.fromXContent(parser, preserveUnknownCustoms); + } + }; + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java b/server/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java index 42b3fde5e0c9..9c862c7a0005 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java @@ -104,7 +104,7 @@ public enum Reason { */ REROUTE_CANCELLED, /** - * When a shard moves from started back to initializing, for example, during shadow replica + * When a shard moves from started back to initializing. */ REINITIALIZED, /** diff --git a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java index fcab160108b2..ce00c7755205 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -107,7 +107,7 @@ public class Lucene { public static final String LATEST_DOC_VALUES_FORMAT = "Lucene70"; public static final String LATEST_POSTINGS_FORMAT = "Lucene50"; - public static final String LATEST_CODEC = "Lucene80"; + public static final String LATEST_CODEC = "Lucene84"; static { Deprecated annotation = PostingsFormat.forName(LATEST_POSTINGS_FORMAT).getClass().getAnnotation(Deprecated.class); diff --git a/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java b/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java index 20b5552dfa8f..25b4f79866ea 100644 --- a/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java +++ b/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java @@ -29,6 +29,7 @@ import org.elasticsearch.cluster.metadata.Manifest; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.gateway.WriteStateException; @@ -165,7 +166,7 @@ private String toIndexName(NodeEnvironment.NodePath[] nodePaths, String uuid) { indexPaths[i] = nodePaths[i].resolve(uuid); } try { - IndexMetaData metaData = IndexMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, indexPaths); + IndexMetaData metaData = IndexMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, indexPaths); return metaData.getIndex().getName(); } catch (Exception e) { return "no name for uuid: " + uuid + ": " + e; @@ -194,7 +195,7 @@ private void rewriteManifest(Terminal terminal, Manifest manifest, Path[] dataPa private Manifest loadManifest(Terminal terminal, Path[] dataPaths) throws IOException { terminal.println(Terminal.Verbosity.VERBOSE, "Loading manifest"); - final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, namedXContentRegistry, dataPaths); + final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths); if (manifest == null) { terminal.println(Terminal.Verbosity.SILENT, PRE_V7_MESSAGE); diff --git a/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java b/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java index 34c7e9599e07..f50bdf081ef8 100644 --- a/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java +++ b/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java @@ -25,6 +25,7 @@ import org.elasticsearch.Version; import org.elasticsearch.cli.Terminal; import org.elasticsearch.cluster.coordination.ElasticsearchNodeCommand; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import java.io.IOException; import java.nio.file.Path; @@ -74,7 +75,7 @@ public OverrideNodeVersionCommand() { protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException { final Path[] nodePaths = Arrays.stream(toNodePaths(dataPaths)).map(p -> p.path).toArray(Path[]::new); final NodeMetaData nodeMetaData - = new NodeMetaData.NodeMetaDataStateFormat(true).loadLatestState(logger, namedXContentRegistry, nodePaths); + = new NodeMetaData.NodeMetaDataStateFormat(true).loadLatestState(logger, NamedXContentRegistry.EMPTY, nodePaths); if (nodeMetaData == null) { throw new ElasticsearchException(NO_METADATA_MESSAGE); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java index 485c40d5d9bb..0b1c96a6911d 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java +++ b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java @@ -22,7 +22,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; -import org.apache.lucene.codecs.lucene80.Lucene80Codec; +import org.apache.lucene.codecs.lucene84.Lucene84Codec; import org.elasticsearch.common.Nullable; import org.elasticsearch.index.mapper.MapperService; @@ -47,8 +47,8 @@ public class CodecService { public CodecService(@Nullable MapperService mapperService, Logger logger) { final var codecs = new HashMap(); if (mapperService == null) { - codecs.put(DEFAULT_CODEC, new Lucene80Codec()); - codecs.put(BEST_COMPRESSION_CODEC, new Lucene80Codec(Mode.BEST_COMPRESSION)); + codecs.put(DEFAULT_CODEC, new Lucene84Codec()); + codecs.put(BEST_COMPRESSION_CODEC, new Lucene84Codec(Mode.BEST_COMPRESSION)); } else { codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java index 4a154abd8ead..ccaa873af279 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java @@ -23,7 +23,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; -import org.apache.lucene.codecs.lucene80.Lucene80Codec; +import org.apache.lucene.codecs.lucene84.Lucene84Codec; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.index.mapper.CompletionFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; @@ -37,7 +37,7 @@ * per index in real time via the mapping API. If no specific postings format is * configured for a specific field the default postings format is used. */ -public class PerFieldMappingPostingFormatCodec extends Lucene80Codec { +public class PerFieldMappingPostingFormatCodec extends Lucene84Codec { private final Logger logger; private final MapperService mapperService; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java index 5fd06633bcfc..5f6b71d6522c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java @@ -24,7 +24,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.suggest.document.Completion50PostingsFormat; +import org.apache.lucene.search.suggest.document.Completion84PostingsFormat; import org.apache.lucene.search.suggest.document.CompletionAnalyzer; import org.apache.lucene.search.suggest.document.CompletionQuery; import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery; @@ -265,7 +265,7 @@ public boolean preservePositionIncrements() { */ public static synchronized PostingsFormat postingsFormat() { if (postingsFormat == null) { - postingsFormat = new Completion50PostingsFormat(); + postingsFormat = new Completion84PostingsFormat(); } return postingsFormat; } diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java b/server/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java index dc054f8b51d3..9183b1a82656 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java @@ -80,7 +80,7 @@ public IndicesQueryCache(Settings settings) { logger.debug("using [node] query cache with size [{}] max filter count [{}]", size, count); if (INDICES_QUERIES_CACHE_ALL_SEGMENTS_SETTING.get(settings)) { - cache = new ElasticsearchLRUQueryCache(count, size.getBytes(), context -> true); + cache = new ElasticsearchLRUQueryCache(count, size.getBytes(), context -> true, 1f); } else { cache = new ElasticsearchLRUQueryCache(count, size.getBytes()); } @@ -250,8 +250,8 @@ public void onClose(ShardId shardId) { private class ElasticsearchLRUQueryCache extends LRUQueryCache { - ElasticsearchLRUQueryCache(int maxSize, long maxRamBytesUsed, Predicate leavesToCache) { - super(maxSize, maxRamBytesUsed, leavesToCache); + ElasticsearchLRUQueryCache(int maxSize, long maxRamBytesUsed, Predicate leavesToCache, float skipFactor) { + super(maxSize, maxRamBytesUsed, leavesToCache, skipFactor); } ElasticsearchLRUQueryCache(int maxSize, long maxRamBytesUsed) { diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index c38462e24074..a48f893dfd40 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -35,6 +35,7 @@ import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateApplier; import org.elasticsearch.cluster.ClusterStateUpdateTask; +import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.RepositoryCleanupInProgress; import org.elasticsearch.cluster.RestoreInProgress; import org.elasticsearch.cluster.SnapshotDeletionsInProgress; @@ -42,6 +43,7 @@ import org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus; import org.elasticsearch.cluster.SnapshotsInProgress.ShardState; import org.elasticsearch.cluster.SnapshotsInProgress.State; +import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.MetaData; @@ -1051,8 +1053,16 @@ protected void doRun() { @Override public void onFailure(final Exception e) { Snapshot snapshot = entry.snapshot(); - logger.warn(() -> new ParameterizedMessage("[{}] failed to finalize snapshot", snapshot), e); - removeSnapshotFromClusterState(snapshot, null, e); + if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) { + // Failure due to not being master any more, don't try to remove snapshot from cluster state the next master + // will try ending this snapshot again + logger.debug(() -> new ParameterizedMessage( + "[{}] failed to update cluster state during snapshot finalization", snapshot), e); + endingSnapshots.remove(snapshot); + } else { + logger.warn(() -> new ParameterizedMessage("[{}] failed to finalize snapshot", snapshot), e); + removeSnapshotFromClusterState(snapshot, null, e); + } } }); } diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetaDataTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetaDataTests.java index 36a78119c766..7d2b10beb327 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetaDataTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetaDataTests.java @@ -365,7 +365,7 @@ public void testUnknownFieldClusterMetaData() throws IOException { .endObject() .endObject()); try (XContentParser parser = createParser(JsonXContent.jsonXContent, metadata)) { - MetaData.Builder.fromXContent(parser); + MetaData.Builder.fromXContent(parser, randomBoolean()); fail(); } catch (IllegalArgumentException e) { assertEquals("Unexpected field [random]", e.getMessage()); diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java index e2d0fcf5188a..0338a64b6fe7 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java @@ -140,7 +140,7 @@ public void testSimpleJsonFromAndTo() throws IOException { String metaDataSource = MetaData.Builder.toXContent(metaData); - MetaData parsedMetaData = MetaData.Builder.fromXContent(createParser(JsonXContent.jsonXContent, metaDataSource)); + MetaData parsedMetaData = MetaData.Builder.fromXContent(createParser(JsonXContent.jsonXContent, metaDataSource), false); IndexMetaData indexMetaData = parsedMetaData.index("test1"); assertThat(indexMetaData.primaryTerm(0), equalTo(1L)); diff --git a/server/src/test/java/org/elasticsearch/common/UUIDTests.java b/server/src/test/java/org/elasticsearch/common/UUIDTests.java index dcc440acbcd1..1d23570064fe 100644 --- a/server/src/test/java/org/elasticsearch/common/UUIDTests.java +++ b/server/src/test/java/org/elasticsearch/common/UUIDTests.java @@ -116,6 +116,7 @@ public void testUUIDThreaded(UUIDGenerator uuidSource) { assertEquals(count*uuids, globalSet.size()); } + @AwaitsFix(bugUrl="https://github.com/elastic/elasticsearch/issues/50048") public void testCompression() throws Exception { Logger logger = LogManager.getLogger(UUIDTests.class); // Low number so that the test runs quickly, but the results are more interesting with larger numbers diff --git a/server/src/test/java/org/elasticsearch/gateway/MetaDataStateFormatTests.java b/server/src/test/java/org/elasticsearch/gateway/MetaDataStateFormatTests.java index 40f3bd8a0162..c7dab0dc4d4a 100644 --- a/server/src/test/java/org/elasticsearch/gateway/MetaDataStateFormatTests.java +++ b/server/src/test/java/org/elasticsearch/gateway/MetaDataStateFormatTests.java @@ -61,6 +61,8 @@ import java.util.stream.StreamSupport; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; @@ -80,7 +82,7 @@ public void toXContent(XContentBuilder builder, MetaData state) { @Override public MetaData fromXContent(XContentParser parser) throws IOException { - return MetaData.Builder.fromXContent(parser); + return MetaData.Builder.fromXContent(parser, false); } }; Path tmp = createTempDir(); @@ -233,7 +235,23 @@ public static void corruptFile(Path fileToCorrupt, Logger logger) throws IOExcep } } - public void testLoadState() throws IOException { + public void testLoadStateWithoutMissingCustoms() throws IOException { + runLoadStateTest(false, false); + } + + public void testLoadStateWithoutMissingCustomsButPreserved() throws IOException { + runLoadStateTest(false, true); + } + + public void testLoadStateWithMissingCustomsButPreserved() throws IOException { + runLoadStateTest(true, true); + } + + public void testLoadStateWithMissingCustomsAndNotPreserved() throws IOException { + runLoadStateTest(true, false); + } + + private void runLoadStateTest(boolean hasMissingCustoms, boolean preserveUnknownCustoms) throws IOException { final Path[] dirs = new Path[randomIntBetween(1, 5)]; int numStates = randomIntBetween(1, 5); List meta = new ArrayList<>(); @@ -241,7 +259,7 @@ public void testLoadState() throws IOException { meta.add(randomMeta()); } Set corruptedFiles = new HashSet<>(); - MetaDataStateFormat format = metaDataFormat(); + MetaDataStateFormat format = metaDataFormat(preserveUnknownCustoms); for (int i = 0; i < dirs.length; i++) { dirs[i] = createTempDir(); Files.createDirectories(dirs[i].resolve(MetaDataStateFormat.STATE_DIR_NAME)); @@ -258,11 +276,12 @@ public void testLoadState() throws IOException { } List dirList = Arrays.asList(dirs); Collections.shuffle(dirList, random()); - MetaData loadedMetaData = format.loadLatestState(logger, xContentRegistry(), dirList.toArray(new Path[0])); + MetaData loadedMetaData = format.loadLatestState(logger, hasMissingCustoms ? + NamedXContentRegistry.EMPTY : xContentRegistry(), dirList.toArray(new Path[0])); MetaData latestMetaData = meta.get(numStates-1); assertThat(loadedMetaData.clusterUUID(), not(equalTo("_na_"))); assertThat(loadedMetaData.clusterUUID(), equalTo(latestMetaData.clusterUUID())); - ImmutableOpenMap indices = loadedMetaData.indices(); + ImmutableOpenMap indices = loadedMetaData.indices(); assertThat(indices.size(), equalTo(latestMetaData.indices().size())); for (IndexMetaData original : latestMetaData) { IndexMetaData deserialized = indices.get(original.getIndex().getName()); @@ -275,7 +294,23 @@ public void testLoadState() throws IOException { } // make sure the index tombstones are the same too - assertThat(loadedMetaData.indexGraveyard(), equalTo(latestMetaData.indexGraveyard())); + if (hasMissingCustoms) { + if (preserveUnknownCustoms) { + assertNotNull(loadedMetaData.custom(IndexGraveyard.TYPE)); + assertThat(loadedMetaData.custom(IndexGraveyard.TYPE), instanceOf(MetaData.UnknownGatewayOnlyCustom.class)); + + // check that we reserialize unknown metadata correctly again + final Path tempdir = createTempDir(); + metaDataFormat(randomBoolean()).write(loadedMetaData, tempdir); + final MetaData reloadedMetaData = metaDataFormat(randomBoolean()).loadLatestState(logger, xContentRegistry(), tempdir); + assertThat(reloadedMetaData.indexGraveyard(), equalTo(latestMetaData.indexGraveyard())); + } else { + assertNotNull(loadedMetaData.indexGraveyard()); + assertThat(loadedMetaData.indexGraveyard().getTombstones(), hasSize(0)); + } + } else { + assertThat(loadedMetaData.indexGraveyard(), equalTo(latestMetaData.indexGraveyard())); + } // now corrupt all the latest ones and make sure we fail to load the state for (int i = 0; i < dirs.length; i++) { @@ -419,7 +454,7 @@ public void testFailRandomlyAndReadAnyState() throws IOException { writeAndReadStateSuccessfully(format, paths); } - private static MetaDataStateFormat metaDataFormat() { + private static MetaDataStateFormat metaDataFormat(boolean preserveUnknownCustoms) { return new MetaDataStateFormat(MetaData.GLOBAL_STATE_FILE_PREFIX) { @Override public void toXContent(XContentBuilder builder, MetaData state) throws IOException { @@ -428,7 +463,7 @@ public void toXContent(XContentBuilder builder, MetaData state) throws IOExcepti @Override public MetaData fromXContent(XContentParser parser) throws IOException { - return MetaData.Builder.fromXContent(parser); + return MetaData.Builder.fromXContent(parser, preserveUnknownCustoms); } }; } diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java index fa775a84c72a..dc5b8031a6c5 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -19,11 +19,16 @@ package org.elasticsearch.index.codec; +import static org.hamcrest.Matchers.instanceOf; + +import java.io.IOException; +import java.util.Collections; + import org.apache.logging.log4j.LogManager; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; -import org.apache.lucene.codecs.lucene80.Lucene80Codec; +import org.apache.lucene.codecs.lucene84.Lucene84Codec; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; @@ -42,19 +47,14 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; -import java.io.IOException; -import java.util.Collections; - -import static org.hamcrest.Matchers.instanceOf; - @SuppressCodecs("*") // we test against default codec so never get a random one here! public class CodecTests extends ESTestCase { public void testResolveDefaultCodecs() throws Exception { CodecService codecService = createCodecService(); assertThat(codecService.codec("default"), instanceOf(PerFieldMappingPostingFormatCodec.class)); - assertThat(codecService.codec("default"), instanceOf(Lucene80Codec.class)); - assertThat(codecService.codec("Lucene80"), instanceOf(Lucene80Codec.class)); + assertThat(codecService.codec("default"), instanceOf(Lucene84Codec.class)); + assertThat(codecService.codec("Lucene84"), instanceOf(Lucene84Codec.class)); } public void testDefault() throws Exception { diff --git a/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java b/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java index ba2a725fed29..a374a745909a 100644 --- a/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java +++ b/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java @@ -167,10 +167,9 @@ public void handle(final HttpExchange exchange) throws IOException { } else if (line.startsWith("DELETE")) { final String name = line.substring(line.indexOf(uri) + uri.length(), line.lastIndexOf(" HTTP")); if (Strings.hasText(name)) { - if (blobs.entrySet().removeIf(blob -> blob.getKey().equals(URLDecoder.decode(name, UTF_8)))) { - batch.append("HTTP/1.1 204 NO_CONTENT").append('\n'); - batch.append('\n'); - } + blobs.remove(URLDecoder.decode(name, UTF_8)); + batch.append("HTTP/1.1 204 NO_CONTENT").append('\n'); + batch.append('\n'); } } } diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java index ce94adf73bcd..1b588f554fa5 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java @@ -422,7 +422,7 @@ private static NodeSelector parseVersionSelector(XContentParser parser) throws I if (false == parser.currentToken().isValue()) { throw new XContentParseException(parser.getTokenLocation(), "expected [version] to be a value"); } - Version[] range = SkipSection.parseVersionRange(parser.text()); + List skipVersionRanges = SkipSection.parseVersionRanges(parser.text()); return new NodeSelector() { @Override public void select(Iterable nodes) { @@ -433,7 +433,8 @@ public void select(Iterable nodes) { + node); } Version version = Version.fromString(node.getVersion()); - if (false == (version.onOrAfter(range[0]) && version.onOrBefore(range[1]))) { + boolean skip = skipVersionRanges.stream().anyMatch(v -> v.contains(version)); + if (false == skip) { itr.remove(); } } @@ -441,7 +442,7 @@ public void select(Iterable nodes) { @Override public String toString() { - return "version between [" + range[0] + "] and [" + range[1] + "]"; + return "version ranges "+skipVersionRanges; } }; } diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/SkipSection.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/SkipSection.java index e487f8e74da3..81eb47089201 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/SkipSection.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/SkipSection.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; /** @@ -98,33 +99,30 @@ public static SkipSection parse(XContentParser parser) throws IOException { public static final SkipSection EMPTY = new SkipSection(); - private final Version lowerVersion; - private final Version upperVersion; + private final List versionRanges; private final List features; private final String reason; private SkipSection() { - this.lowerVersion = null; - this.upperVersion = null; + this.versionRanges = new ArrayList<>(); this.features = new ArrayList<>(); this.reason = null; } public SkipSection(String versionRange, List features, String reason) { assert features != null; - Version[] versions = parseVersionRange(versionRange); - this.lowerVersion = versions[0]; - this.upperVersion = versions[1]; + this.versionRanges = parseVersionRanges(versionRange); + assert versionRanges.isEmpty() == false; this.features = features; this.reason = reason; } public Version getLowerVersion() { - return lowerVersion; + return versionRanges.get(0).getLower(); } public Version getUpperVersion() { - return upperVersion; + return versionRanges.get(versionRanges.size() - 1).getUpper(); } public List getFeatures() { @@ -139,10 +137,8 @@ public boolean skip(Version currentVersion) { if (isEmpty()) { return false; } - boolean skip = lowerVersion != null && upperVersion != null && currentVersion.onOrAfter(lowerVersion) - && currentVersion.onOrBefore(upperVersion); - skip |= Features.areAllSupported(features) == false; - return skip; + boolean skip = versionRanges.stream().anyMatch(range -> range.contains(currentVersion)); + return skip || Features.areAllSupported(features) == false; } public boolean isVersionCheck() { @@ -153,24 +149,30 @@ public boolean isEmpty() { return EMPTY.equals(this); } - static Version[] parseVersionRange(String versionRange) { - if (versionRange == null) { - return new Version[] { null, null }; + static List parseVersionRanges(String rawRanges) { + if (rawRanges == null) { + return Collections.singletonList(new VersionRange(null, null)); } - if (versionRange.trim().equals("all")) { - return new Version[]{VersionUtils.getFirstVersion(), Version.CURRENT}; - } - String[] skipVersions = versionRange.split("-"); - if (skipVersions.length > 2) { - throw new IllegalArgumentException("version range malformed: " + versionRange); + if (rawRanges.trim().equals("all")) { + return Collections.singletonList(new VersionRange(VersionUtils.getFirstVersion(), Version.CURRENT)); } + String[] ranges = rawRanges.split(","); + List versionRanges = new ArrayList<>(); + for (String rawRange : ranges) { + String[] skipVersions = rawRange.split("-", -1); + if (skipVersions.length > 2) { + throw new IllegalArgumentException("version range malformed: " + rawRanges); + } - String lower = skipVersions[0].trim(); - String upper = skipVersions[1].trim(); - return new Version[] { - lower.isEmpty() ? VersionUtils.getFirstVersion() : Version.fromString(lower), - upper.isEmpty() ? Version.CURRENT : Version.fromString(upper) - }; + String lower = skipVersions[0].trim(); + String upper = skipVersions[1].trim(); + VersionRange versionRange = new VersionRange( + lower.isEmpty() ? VersionUtils.getFirstVersion() : Version.fromString(lower), + upper.isEmpty() ? Version.CURRENT : Version.fromString(upper) + ); + versionRanges.add(versionRange); + } + return versionRanges; } public String getSkipMessage(String description) { diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/VersionRange.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/VersionRange.java new file mode 100644 index 000000000000..f1b1df2a1a16 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/VersionRange.java @@ -0,0 +1,49 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.test.rest.yaml.section; + +import org.elasticsearch.Version; + +public class VersionRange { + private final Version lower; + private final Version upper; + + public VersionRange(Version lower, Version upper) { + this.lower = lower; + this.upper = upper; + } + + public Version getLower() { + return lower; + } + + public Version getUpper() { + return upper; + } + + public boolean contains(Version currentVersion) { + return lower != null && upper != null && currentVersion.onOrAfter(lower) + && currentVersion.onOrBefore(upper); + } + + @Override + public String toString() { + return "[" + lower + " - " + upper + "]"; + } +} diff --git a/test/framework/src/test/java/org/elasticsearch/test/rest/yaml/section/SkipSectionTests.java b/test/framework/src/test/java/org/elasticsearch/test/rest/yaml/section/SkipSectionTests.java index e92ef2ce1357..45273912f1d5 100644 --- a/test/framework/src/test/java/org/elasticsearch/test/rest/yaml/section/SkipSectionTests.java +++ b/test/framework/src/test/java/org/elasticsearch/test/rest/yaml/section/SkipSectionTests.java @@ -33,6 +33,27 @@ public class SkipSectionTests extends AbstractClientYamlTestFragmentParserTestCase { + public void testSkipMultiRange() { + SkipSection section = new SkipSection("6.0.0 - 6.1.0, 7.1.0 - 7.5.0", + Collections.emptyList() , "foobar"); + + assertFalse(section.skip(Version.CURRENT)); + assertFalse(section.skip(Version.fromString("6.2.0"))); + assertFalse(section.skip(Version.fromString("7.0.0"))); + assertFalse(section.skip(Version.fromString("7.6.0"))); + + assertTrue(section.skip(Version.fromString("6.0.0"))); + assertTrue(section.skip(Version.fromString("6.1.0"))); + assertTrue(section.skip(Version.fromString("7.1.0"))); + assertTrue(section.skip(Version.fromString("7.5.0"))); + + section = new SkipSection("- 7.1.0, 7.2.0 - 7.5.0, 8.0.0 -", + Collections.emptyList() , "foobar"); + assertTrue(section.skip(Version.fromString("7.0.0"))); + assertTrue(section.skip(Version.fromString("7.3.0"))); + assertTrue(section.skip(Version.fromString("8.0.0"))); + } + public void testSkip() { SkipSection section = new SkipSection("6.0.0 - 6.1.0", randomBoolean() ? Collections.emptyList() : Collections.singletonList("warnings"), "foobar"); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java index 6731518f5b53..004c9ff98776 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java @@ -63,7 +63,23 @@ public static LicenseType parse(String type) throws IllegalArgumentException { /** * Backward compatible license type parsing for older license models */ - public static LicenseType resolve(String name) { + public static LicenseType resolve(License license) { + if (license.version == VERSION_START) { + // in 1.x: the acceptable values for 'subscription_type': none | dev | silver | gold | platinum + return resolve(license.subscriptionType); + } else { + // in 2.x: the acceptable values for 'type': trial | basic | silver | dev | gold | platinum + // in 5.x: the acceptable values for 'type': trial | basic | standard | dev | gold | platinum + // in 6.x: the acceptable values for 'type': trial | basic | standard | dev | gold | platinum + // in 7.x: the acceptable values for 'type': trial | basic | standard | dev | gold | platinum | enterprise + return resolve(license.type); + } + } + + /** + * Backward compatible license type parsing for older license models + */ + static LicenseType resolve(String name) { switch (name.toLowerCase(Locale.ROOT)) { case "missing": return null; @@ -165,8 +181,12 @@ public static int compare(OperationMode opMode1, OperationMode opMode2) { return Integer.compare(opMode1.id, opMode2.id); } - public static OperationMode resolve(String typeName) { - LicenseType type = LicenseType.resolve(typeName); + /** + * Determine the operating mode for a license type + * @see LicenseType#resolve(License) + * @see #parse(String) + */ + public static OperationMode resolve(LicenseType type) { if (type == null) { return MISSING; } @@ -187,6 +207,21 @@ public static OperationMode resolve(String typeName) { } } + /** + * Parses an {@code OperatingMode} from a String. + * The string must name an operating mode, and not a licensing level (that is, it cannot parse old style license levels + * such as "dev" or "silver"). + * @see #description() + */ + public static OperationMode parse(String mode) { + try { + return OperationMode.valueOf(mode.toUpperCase(Locale.ROOT)); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("unrecognised license operating mode [ " + mode + "], supported modes are [" + + Stream.of(values()).map(OperationMode::description).collect(Collectors.joining(",")) + "]"); + } + } + public String description() { return name().toLowerCase(Locale.ROOT); } @@ -212,13 +247,7 @@ private License(int version, String uid, String issuer, String issuedTo, long is } this.maxNodes = maxNodes; this.startDate = startDate; - if (version == VERSION_START) { - // in 1.x: the acceptable values for 'subscription_type': none | dev | silver | gold | platinum - this.operationMode = OperationMode.resolve(subscriptionType); - } else { - // in 2.x: the acceptable values for 'type': trial | basic | silver | dev | gold | platinum - this.operationMode = OperationMode.resolve(type); - } + this.operationMode = OperationMode.resolve(LicenseType.resolve(this)); validate(); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/LicenseService.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/LicenseService.java index f16cb2fbe393..af34d31c1442 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/LicenseService.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/LicenseService.java @@ -47,6 +47,7 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Service responsible for managing {@link LicensesMetaData}. @@ -64,6 +65,12 @@ public class LicenseService extends AbstractLifecycleComponent implements Cluste return SelfGeneratedLicense.validateSelfGeneratedType(type); }, Setting.Property.NodeScope); + static final List ALLOWABLE_UPLOAD_TYPES = getAllowableUploadTypes(); + + public static final Setting> ALLOWED_LICENSE_TYPES_SETTING = Setting.listSetting("xpack.license.upload.types", + ALLOWABLE_UPLOAD_TYPES.stream().map(License.LicenseType::getTypeName).collect(Collectors.toUnmodifiableList()), + License.LicenseType::parse, LicenseService::validateUploadTypesSetting, Setting.Property.NodeScope); + // pkg private for tests static final TimeValue NON_BASIC_SELF_GENERATED_LICENSE_DURATION = TimeValue.timeValueHours(30 * 24); @@ -104,6 +111,12 @@ public class LicenseService extends AbstractLifecycleComponent implements Cluste */ private List expirationCallbacks = new ArrayList<>(); + /** + * Which license types are permitted to be uploaded to the cluster + * @see #ALLOWED_LICENSE_TYPES_SETTING + */ + private final List allowedLicenseTypes; + /** * Max number of nodes licensed by generated trial license */ @@ -123,6 +136,7 @@ public LicenseService(Settings settings, ClusterService clusterService, Clock cl this.clock = clock; this.scheduler = new SchedulerEngine(settings, clock); this.licenseState = licenseState; + this.allowedLicenseTypes = ALLOWED_LICENSE_TYPES_SETTING.get(settings); this.operationModeFileWatcher = new OperationModeFileWatcher(resourceWatcherService, XPackPlugin.resolveConfigFile(env, "license_mode"), logger, () -> updateLicenseState(getLicensesMetaData())); @@ -196,8 +210,20 @@ public void registerLicense(final PutLicenseRequest request, final ActionListene final long now = clock.millis(); if (!LicenseVerifier.verifyLicense(newLicense) || newLicense.issueDate() > now || newLicense.startDate() > now) { listener.onResponse(new PutLicenseResponse(true, LicensesStatus.INVALID)); - } else if (newLicense.type().equals(License.LicenseType.BASIC.getTypeName())) { + return; + } + final License.LicenseType licenseType; + try { + licenseType = License.LicenseType.resolve(newLicense); + } catch (Exception e) { + listener.onFailure(e); + return; + } + if (licenseType == License.LicenseType.BASIC) { listener.onFailure(new IllegalArgumentException("Registering basic licenses is not allowed.")); + } else if (isAllowedLicenseType(licenseType) == false) { + listener.onFailure(new IllegalArgumentException( + "Registering [" + licenseType.getTypeName() + "] licenses is not allowed on this cluster")); } else if (newLicense.expiryDate() < now) { listener.onResponse(new PutLicenseResponse(true, LicensesStatus.EXPIRED)); } else { @@ -272,6 +298,11 @@ private static boolean licenseIsCompatible(License license, Version version) { } } + private boolean isAllowedLicenseType(License.LicenseType type) { + logger.debug("Checking license [{}] against allowed license types: {}", type, allowedLicenseTypes); + return allowedLicenseTypes.contains(type); + } + public static Map getAckMessages(License newLicense, License currentLicense) { Map acknowledgeMessages = new HashMap<>(); if (!License.isAutoGeneratedLicense(currentLicense.signature()) // current license is not auto-generated @@ -574,4 +605,20 @@ private static boolean isProductionMode(Settings settings, DiscoveryNode localNo private static boolean isBoundToLoopback(DiscoveryNode localNode) { return localNode.getAddress().address().getAddress().isLoopbackAddress(); } + + private static List getAllowableUploadTypes() { + return Stream.of(License.LicenseType.values()) + .filter(t -> t != License.LicenseType.BASIC) + .collect(Collectors.toUnmodifiableList()); + } + + private static void validateUploadTypesSetting(List value) { + if (ALLOWABLE_UPLOAD_TYPES.containsAll(value) == false) { + throw new IllegalArgumentException("Invalid value [" + + value.stream().map(License.LicenseType::getTypeName).collect(Collectors.joining(",")) + + "] for " + ALLOWED_LICENSE_TYPES_SETTING.getKey() + ", allowed values are [" + + ALLOWABLE_UPLOAD_TYPES.stream().map(License.LicenseType::getTypeName).collect(Collectors.joining(",")) + + "]"); + } + } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/OperationModeFileWatcher.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/OperationModeFileWatcher.java index b8e6446b9f49..ee08b9f7330c 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/OperationModeFileWatcher.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/OperationModeFileWatcher.java @@ -106,7 +106,7 @@ private synchronized void onChange(Path file) { // this UTF-8 conversion is much pickier than java String final String operationMode = new BytesRef(content).utf8ToString(); try { - newOperationMode = OperationMode.resolve(operationMode); + newOperationMode = OperationMode.parse(operationMode); } catch (IllegalArgumentException e) { logger.error( (Supplier) () -> new ParameterizedMessage( diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java index 7d5a3b5e9a53..5de1186767f4 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java @@ -138,7 +138,7 @@ public RemoteClusterLicenseChecker(final Client client, final Predicate> getSettings() { settings.addAll(XPackSettings.getAllSettings()); settings.add(LicenseService.SELF_GENERATED_LICENSE_TYPE); + settings.add(LicenseService.ALLOWED_LICENSE_TYPES_SETTING); // we add the `xpack.version` setting to all internal indices settings.add(Setting.simpleString("index.xpack.version", Setting.Property.IndexScope)); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java index 9fd7f8aa86fc..1142b5411fb0 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java @@ -225,7 +225,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(DEST.getPreferredName(), dest); builder.startObject(ANALYSIS.getPreferredName()); - builder.field(analysis.getWriteableName(), analysis); + builder.field(analysis.getWriteableName(), analysis, + new MapParams(Collections.singletonMap(VERSION.getPreferredName(), version == null ? null : version.toString()))); builder.endObject(); if (params.paramAsBoolean(ToXContentParams.FOR_INTERNAL_STORAGE, false)) { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/BoostedTreeParams.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/BoostedTreeParams.java index ed3cff7d73c0..0f06b08444f5 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/BoostedTreeParams.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/BoostedTreeParams.java @@ -49,7 +49,7 @@ static void declareFields(AbstractObjectParser parser) { private final Integer maximumNumberTrees; private final Double featureBagFraction; - BoostedTreeParams(@Nullable Double lambda, + public BoostedTreeParams(@Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta, @Nullable Integer maximumNumberTrees, @@ -76,7 +76,7 @@ static void declareFields(AbstractObjectParser parser) { this.featureBagFraction = featureBagFraction; } - BoostedTreeParams() { + public BoostedTreeParams() { this(null, null, null, null, null); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java index b4b258ea161f..ed4cb1fe18f8 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java @@ -5,8 +5,10 @@ */ package org.elasticsearch.xpack.core.ml.dataframe.analyses; +import org.elasticsearch.Version; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.Randomness; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ConstructingObjectParser; @@ -35,6 +37,7 @@ public class Classification implements DataFrameAnalysis { public static final ParseField PREDICTION_FIELD_NAME = new ParseField("prediction_field_name"); public static final ParseField NUM_TOP_CLASSES = new ParseField("num_top_classes"); public static final ParseField TRAINING_PERCENT = new ParseField("training_percent"); + public static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed"); private static final ConstructingObjectParser LENIENT_PARSER = createParser(true); private static final ConstructingObjectParser STRICT_PARSER = createParser(false); @@ -48,12 +51,14 @@ private static ConstructingObjectParser createParser(boole new BoostedTreeParams((Double) a[1], (Double) a[2], (Double) a[3], (Integer) a[4], (Double) a[5]), (String) a[6], (Integer) a[7], - (Double) a[8])); + (Double) a[8], + (Long) a[9])); parser.declareString(constructorArg(), DEPENDENT_VARIABLE); BoostedTreeParams.declareFields(parser); parser.declareString(optionalConstructorArg(), PREDICTION_FIELD_NAME); parser.declareInt(optionalConstructorArg(), NUM_TOP_CLASSES); parser.declareDouble(optionalConstructorArg(), TRAINING_PERCENT); + parser.declareLong(optionalConstructorArg(), RANDOMIZE_SEED); return parser; } @@ -82,12 +87,14 @@ public static Classification fromXContent(XContentParser parser, boolean ignoreU private final String predictionFieldName; private final int numTopClasses; private final double trainingPercent; + private final long randomizeSeed; public Classification(String dependentVariable, BoostedTreeParams boostedTreeParams, @Nullable String predictionFieldName, @Nullable Integer numTopClasses, - @Nullable Double trainingPercent) { + @Nullable Double trainingPercent, + @Nullable Long randomizeSeed) { if (numTopClasses != null && (numTopClasses < 0 || numTopClasses > 1000)) { throw ExceptionsHelper.badRequestException("[{}] must be an integer in [0, 1000]", NUM_TOP_CLASSES.getPreferredName()); } @@ -99,10 +106,11 @@ public Classification(String dependentVariable, this.predictionFieldName = predictionFieldName == null ? dependentVariable + "_prediction" : predictionFieldName; this.numTopClasses = numTopClasses == null ? DEFAULT_NUM_TOP_CLASSES : numTopClasses; this.trainingPercent = trainingPercent == null ? 100.0 : trainingPercent; + this.randomizeSeed = randomizeSeed == null ? Randomness.get().nextLong() : randomizeSeed; } public Classification(String dependentVariable) { - this(dependentVariable, new BoostedTreeParams(), null, null, null); + this(dependentVariable, new BoostedTreeParams(), null, null, null, null); } public Classification(StreamInput in) throws IOException { @@ -111,12 +119,21 @@ public Classification(StreamInput in) throws IOException { predictionFieldName = in.readOptionalString(); numTopClasses = in.readOptionalVInt(); trainingPercent = in.readDouble(); + if (in.getVersion().onOrAfter(Version.V_7_6_0)) { + randomizeSeed = in.readOptionalLong(); + } else { + randomizeSeed = Randomness.get().nextLong(); + } } public String getDependentVariable() { return dependentVariable; } + public BoostedTreeParams getBoostedTreeParams() { + return boostedTreeParams; + } + public String getPredictionFieldName() { return predictionFieldName; } @@ -129,6 +146,11 @@ public double getTrainingPercent() { return trainingPercent; } + @Nullable + public Long getRandomizeSeed() { + return randomizeSeed; + } + @Override public String getWriteableName() { return NAME.getPreferredName(); @@ -141,10 +163,15 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(predictionFieldName); out.writeOptionalVInt(numTopClasses); out.writeDouble(trainingPercent); + if (out.getVersion().onOrAfter(Version.V_7_6_0)) { + out.writeOptionalLong(randomizeSeed); + } } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + Version version = Version.fromString(params.param("version", Version.CURRENT.toString())); + builder.startObject(); builder.field(DEPENDENT_VARIABLE.getPreferredName(), dependentVariable); boostedTreeParams.toXContent(builder, params); @@ -153,6 +180,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(PREDICTION_FIELD_NAME.getPreferredName(), predictionFieldName); } builder.field(TRAINING_PERCENT.getPreferredName(), trainingPercent); + if (version.onOrAfter(Version.V_7_6_0)) { + builder.field(RANDOMIZE_SEED.getPreferredName(), randomizeSeed); + } builder.endObject(); return builder; } @@ -238,11 +268,12 @@ public boolean equals(Object o) { && Objects.equals(boostedTreeParams, that.boostedTreeParams) && Objects.equals(predictionFieldName, that.predictionFieldName) && Objects.equals(numTopClasses, that.numTopClasses) - && trainingPercent == that.trainingPercent; + && trainingPercent == that.trainingPercent + && randomizeSeed == that.randomizeSeed; } @Override public int hashCode() { - return Objects.hash(dependentVariable, boostedTreeParams, predictionFieldName, numTopClasses, trainingPercent); + return Objects.hash(dependentVariable, boostedTreeParams, predictionFieldName, numTopClasses, trainingPercent, randomizeSeed); } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java index 01388f01d807..8fffcd0f573d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java @@ -5,8 +5,10 @@ */ package org.elasticsearch.xpack.core.ml.dataframe.analyses; +import org.elasticsearch.Version; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.Randomness; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ConstructingObjectParser; @@ -32,6 +34,7 @@ public class Regression implements DataFrameAnalysis { public static final ParseField DEPENDENT_VARIABLE = new ParseField("dependent_variable"); public static final ParseField PREDICTION_FIELD_NAME = new ParseField("prediction_field_name"); public static final ParseField TRAINING_PERCENT = new ParseField("training_percent"); + public static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed"); private static final ConstructingObjectParser LENIENT_PARSER = createParser(true); private static final ConstructingObjectParser STRICT_PARSER = createParser(false); @@ -44,11 +47,13 @@ private static ConstructingObjectParser createParser(boolean l (String) a[0], new BoostedTreeParams((Double) a[1], (Double) a[2], (Double) a[3], (Integer) a[4], (Double) a[5]), (String) a[6], - (Double) a[7])); + (Double) a[7], + (Long) a[8])); parser.declareString(constructorArg(), DEPENDENT_VARIABLE); BoostedTreeParams.declareFields(parser); parser.declareString(optionalConstructorArg(), PREDICTION_FIELD_NAME); parser.declareDouble(optionalConstructorArg(), TRAINING_PERCENT); + parser.declareLong(optionalConstructorArg(), RANDOMIZE_SEED); return parser; } @@ -60,11 +65,13 @@ public static Regression fromXContent(XContentParser parser, boolean ignoreUnkno private final BoostedTreeParams boostedTreeParams; private final String predictionFieldName; private final double trainingPercent; + private final long randomizeSeed; public Regression(String dependentVariable, BoostedTreeParams boostedTreeParams, @Nullable String predictionFieldName, - @Nullable Double trainingPercent) { + @Nullable Double trainingPercent, + @Nullable Long randomizeSeed) { if (trainingPercent != null && (trainingPercent < 1.0 || trainingPercent > 100.0)) { throw ExceptionsHelper.badRequestException("[{}] must be a double in [1, 100]", TRAINING_PERCENT.getPreferredName()); } @@ -72,10 +79,11 @@ public Regression(String dependentVariable, this.boostedTreeParams = ExceptionsHelper.requireNonNull(boostedTreeParams, BoostedTreeParams.NAME); this.predictionFieldName = predictionFieldName == null ? dependentVariable + "_prediction" : predictionFieldName; this.trainingPercent = trainingPercent == null ? 100.0 : trainingPercent; + this.randomizeSeed = randomizeSeed == null ? Randomness.get().nextLong() : randomizeSeed; } public Regression(String dependentVariable) { - this(dependentVariable, new BoostedTreeParams(), null, null); + this(dependentVariable, new BoostedTreeParams(), null, null, null); } public Regression(StreamInput in) throws IOException { @@ -83,12 +91,21 @@ public Regression(StreamInput in) throws IOException { boostedTreeParams = new BoostedTreeParams(in); predictionFieldName = in.readOptionalString(); trainingPercent = in.readDouble(); + if (in.getVersion().onOrAfter(Version.V_7_6_0)) { + randomizeSeed = in.readOptionalLong(); + } else { + randomizeSeed = Randomness.get().nextLong(); + } } public String getDependentVariable() { return dependentVariable; } + public BoostedTreeParams getBoostedTreeParams() { + return boostedTreeParams; + } + public String getPredictionFieldName() { return predictionFieldName; } @@ -97,6 +114,11 @@ public double getTrainingPercent() { return trainingPercent; } + @Nullable + public Long getRandomizeSeed() { + return randomizeSeed; + } + @Override public String getWriteableName() { return NAME.getPreferredName(); @@ -108,10 +130,15 @@ public void writeTo(StreamOutput out) throws IOException { boostedTreeParams.writeTo(out); out.writeOptionalString(predictionFieldName); out.writeDouble(trainingPercent); + if (out.getVersion().onOrAfter(Version.V_7_6_0)) { + out.writeOptionalLong(randomizeSeed); + } } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + Version version = Version.fromString(params.param("version", Version.CURRENT.toString())); + builder.startObject(); builder.field(DEPENDENT_VARIABLE.getPreferredName(), dependentVariable); boostedTreeParams.toXContent(builder, params); @@ -119,6 +146,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(PREDICTION_FIELD_NAME.getPreferredName(), predictionFieldName); } builder.field(TRAINING_PERCENT.getPreferredName(), trainingPercent); + if (version.onOrAfter(Version.V_7_6_0)) { + builder.field(RANDOMIZE_SEED.getPreferredName(), randomizeSeed); + } builder.endObject(); return builder; } @@ -177,11 +207,12 @@ public boolean equals(Object o) { return Objects.equals(dependentVariable, that.dependentVariable) && Objects.equals(boostedTreeParams, that.boostedTreeParams) && Objects.equals(predictionFieldName, that.predictionFieldName) - && trainingPercent == that.trainingPercent; + && trainingPercent == that.trainingPercent + && randomizeSeed == randomizeSeed; } @Override public int hashCode() { - return Objects.hash(dependentVariable, boostedTreeParams, predictionFieldName, trainingPercent); + return Objects.hash(dependentVariable, boostedTreeParams, predictionFieldName, trainingPercent, randomizeSeed); } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/TrainedModelConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/TrainedModelConfig.java index 21e145546f8b..343a520d9b5d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/TrainedModelConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/TrainedModelConfig.java @@ -138,7 +138,7 @@ public static TrainedModelConfig.Builder fromXContent(XContentParser parser, boo throw new IllegalArgumentException("[" + ESTIMATED_OPERATIONS.getPreferredName() + "] must be greater than or equal to 0"); } this.estimatedOperations = estimatedOperations; - this.licenseLevel = License.OperationMode.resolve(ExceptionsHelper.requireNonNull(licenseLevel, LICENSE_LEVEL)); + this.licenseLevel = License.OperationMode.parse(ExceptionsHelper.requireNonNull(licenseLevel, LICENSE_LEVEL)); } public TrainedModelConfig(StreamInput in) throws IOException { @@ -153,7 +153,7 @@ public TrainedModelConfig(StreamInput in) throws IOException { input = new TrainedModelInput(in); estimatedHeapMemory = in.readVLong(); estimatedOperations = in.readVLong(); - licenseLevel = License.OperationMode.resolve(in.readString()); + licenseLevel = License.OperationMode.parse(in.readString()); } public String getModelId() { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseFIPSTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseFIPSTests.java index c432a207fcb7..eb357661d50c 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseFIPSTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseFIPSTests.java @@ -34,6 +34,11 @@ public void testFIPSCheckWithAllowedLicense() throws Exception { licenseService.start(); PlainActionFuture responseFuture = new PlainActionFuture<>(); licenseService.registerLicense(request, responseFuture); + if (responseFuture.isDone()) { + // If the future is done, it means request/license validation failed. + // In which case, this `actionGet` should throw a more useful exception than the verify below. + responseFuture.actionGet(); + } verify(clusterService).submitStateUpdateTask(any(String.class), any(ClusterStateUpdateTask.class)); } @@ -67,6 +72,11 @@ public void testFIPSCheckWithoutAllowedLicense() throws Exception { setInitialState(null, licenseState, settings); licenseService.start(); licenseService.registerLicense(request, responseFuture); + if (responseFuture.isDone()) { + // If the future is done, it means request/license validation failed. + // In which case, this `actionGet` should throw a more useful exception than the verify below. + responseFuture.actionGet(); + } verify(clusterService).submitStateUpdateTask(any(String.class), any(ClusterStateUpdateTask.class)); } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeTests.java index 648f48ff2ea1..a1fbfbe6c6a4 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeTests.java @@ -57,7 +57,8 @@ public void testResolveUnknown() { for (String type : types) { try { - OperationMode.resolve(type); + final License.LicenseType licenseType = License.LicenseType.resolve(type); + OperationMode.resolve(licenseType); fail(String.format(Locale.ROOT, "[%s] should not be recognized as an operation mode", type)); } @@ -69,7 +70,8 @@ public void testResolveUnknown() { private static void assertResolve(OperationMode expected, String... types) { for (String type : types) { - assertThat(OperationMode.resolve(type), equalTo(expected)); + License.LicenseType licenseType = License.LicenseType.resolve(type); + assertThat(OperationMode.resolve(licenseType), equalTo(expected)); } } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeUpdateTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeUpdateTests.java index a69331287918..20df885261fe 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeUpdateTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeUpdateTests.java @@ -34,7 +34,7 @@ public void init() throws Exception { } public void testLicenseOperationModeUpdate() throws Exception { - String type = randomFrom("trial", "basic", "standard", "gold", "platinum"); + License.LicenseType type = randomFrom(License.LicenseType.values()); License license = License.builder() .uid("id") .expiryDate(0) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseServiceTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseServiceTests.java index 750b3d67c5f6..b1b22f15c259 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseServiceTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseServiceTests.java @@ -6,12 +6,47 @@ package org.elasticsearch.license; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.cluster.AckedClusterStateUpdateTask; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateUpdateTask; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.TestEnvironment; +import org.elasticsearch.license.licensor.LicenseSigner; +import org.elasticsearch.protocol.xpack.license.LicensesStatus; +import org.elasticsearch.protocol.xpack.license.PutLicenseResponse; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.TestMatchers; +import org.elasticsearch.watcher.ResourceWatcherService; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; +import java.io.IOException; +import java.nio.file.Path; +import java.time.Clock; import java.time.LocalDate; import java.time.ZoneOffset; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.startsWith; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; /** * Due to changes in JDK9 where locale data is used from CLDR, the licence message will differ in jdk 8 and jdk9+ @@ -30,4 +65,133 @@ public void testLogExpirationWarning() { assertThat(message, startsWith("License [will expire] on [Thursday, November 15, 2018].\n")); } } + + /** + * Tests loading a license when {@link LicenseService#ALLOWED_LICENSE_TYPES_SETTING} is on its default value (all license types) + */ + public void testRegisterLicenseWithoutTypeRestrictions() throws Exception { + assertRegisterValidLicense(Settings.EMPTY, + randomValueOtherThan(License.LicenseType.BASIC, () -> randomFrom(License.LicenseType.values()))); + } + + /** + * Tests loading a license when {@link LicenseService#ALLOWED_LICENSE_TYPES_SETTING} is set, + * and the uploaded license type matches + */ + public void testSuccessfullyRegisterLicenseMatchingTypeRestrictions() throws Exception { + final List allowed = randomSubsetOf( + randomIntBetween(1, LicenseService.ALLOWABLE_UPLOAD_TYPES.size() - 1), LicenseService.ALLOWABLE_UPLOAD_TYPES); + final List allowedNames = allowed.stream().map(License.LicenseType::getTypeName).collect(Collectors.toUnmodifiableList()); + final Settings settings = Settings.builder() + .putList("xpack.license.upload.types", allowedNames) + .build(); + assertRegisterValidLicense(settings, randomFrom(allowed)); + } + + /** + * Tests loading a license when {@link LicenseService#ALLOWED_LICENSE_TYPES_SETTING} is set, + * and the uploaded license type does not match + */ + public void testFailToRegisterLicenseNotMatchingTypeRestrictions() throws Exception { + final List allowed = randomSubsetOf( + randomIntBetween(1, LicenseService.ALLOWABLE_UPLOAD_TYPES.size() - 2), LicenseService.ALLOWABLE_UPLOAD_TYPES); + final List allowedNames = allowed.stream().map(License.LicenseType::getTypeName).collect(Collectors.toUnmodifiableList()); + final Settings settings = Settings.builder() + .putList("xpack.license.upload.types", allowedNames) + .build(); + final License.LicenseType notAllowed = randomValueOtherThanMany( + test -> allowed.contains(test), + () -> randomFrom(LicenseService.ALLOWABLE_UPLOAD_TYPES)); + assertRegisterDisallowedLicenseType(settings, notAllowed); + } + + private void assertRegisterValidLicense(Settings baseSettings, License.LicenseType licenseType) throws IOException { + tryRegisterLicense(baseSettings, licenseType, + future -> assertThat(future.actionGet().status(), equalTo(LicensesStatus.VALID))); + } + + private void assertRegisterDisallowedLicenseType(Settings baseSettings, License.LicenseType licenseType) throws IOException { + tryRegisterLicense(baseSettings, licenseType, future -> { + final IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, future::actionGet); + assertThat(exception, TestMatchers.throwableWithMessage( + "Registering [" + licenseType.getTypeName() + "] licenses is not allowed on " + "this cluster")); + }); + } + + private void tryRegisterLicense(Settings baseSettings, License.LicenseType licenseType, + Consumer> assertion) throws IOException { + final Settings settings = Settings.builder() + .put(baseSettings) + .put("path.home", createTempDir()) + .put("discovery.type", "single-node") // So we skip TLS checks + .build(); + + final ClusterState clusterState = Mockito.mock(ClusterState.class); + Mockito.when(clusterState.metaData()).thenReturn(MetaData.EMPTY_META_DATA); + + final ClusterService clusterService = Mockito.mock(ClusterService.class); + Mockito.when(clusterService.state()).thenReturn(clusterState); + + final Clock clock = randomBoolean() ? Clock.systemUTC() : Clock.systemDefaultZone(); + final Environment env = TestEnvironment.newEnvironment(settings); + final ResourceWatcherService resourceWatcherService = Mockito.mock(ResourceWatcherService.class); + final XPackLicenseState licenseState = Mockito.mock(XPackLicenseState.class); + final LicenseService service = new LicenseService(settings, clusterService, clock, env, resourceWatcherService, licenseState); + + final PutLicenseRequest request = new PutLicenseRequest(); + request.license(spec(licenseType, TimeValue.timeValueDays(randomLongBetween(1, 1000))), XContentType.JSON); + final PlainActionFuture future = new PlainActionFuture<>(); + service.registerLicense(request, future); + + if (future.isDone()) { + // If validation failed, the future might be done without calling the updater task. + assertion.accept(future); + } else { + ArgumentCaptor taskCaptor = ArgumentCaptor.forClass(ClusterStateUpdateTask.class); + verify(clusterService, times(1)).submitStateUpdateTask(any(), taskCaptor.capture()); + + final ClusterStateUpdateTask task = taskCaptor.getValue(); + assertThat(task, instanceOf(AckedClusterStateUpdateTask.class)); + ((AckedClusterStateUpdateTask) task).onAllNodesAcked(null); + + assertion.accept(future); + } + } + + private BytesReference spec(License.LicenseType type, TimeValue expires) throws IOException { + final License signed = sign(buildLicense(type, expires)); + return toSpec(signed); + } + + private BytesReference toSpec(License license) throws IOException { + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + builder.startObject(); + builder.startObject("license"); + license.toInnerXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + builder.endObject(); + builder.flush(); + return BytesReference.bytes(builder); + } + + private License sign(License license) throws IOException { + final Path publicKey = getDataPath("/public.key"); + final Path privateKey = getDataPath("/private.key"); + final LicenseSigner signer = new LicenseSigner(privateKey, publicKey); + + return signer.sign(license); + } + + private License buildLicense(License.LicenseType type, TimeValue expires) { + return License.builder() + .uid(new UUID(randomLong(), randomLong()).toString()) + .type(type) + .expiryDate(System.currentTimeMillis() + expires.millis()) + .issuer(randomAlphaOfLengthBetween(5, 60)) + .issuedTo(randomAlphaOfLengthBetween(5, 60)) + .issueDate(System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(randomLongBetween(1, 5000))) + .maxNodes(randomIntBetween(1, 500)) + .signature(null) + .build(); + } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicensesMetaDataSerializationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicensesMetaDataSerializationTests.java index d7799959f6cc..084d965a6e74 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicensesMetaDataSerializationTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicensesMetaDataSerializationTests.java @@ -80,7 +80,7 @@ public void testLicenseMetadataParsingDoesNotSwallowOtherMetaData() throws Excep builder = metaDataBuilder.build().toXContent(builder, params); builder.endObject(); // deserialize metadata again - MetaData metaData = MetaData.Builder.fromXContent(createParser(builder)); + MetaData metaData = MetaData.Builder.fromXContent(createParser(builder), randomBoolean()); // check that custom metadata still present assertThat(metaData.custom(licensesMetaData.getWriteableName()), notNullValue()); assertThat(metaData.custom(repositoriesMetaData.getWriteableName()), notNullValue()); diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java index d6b2c077388e..880bea888465 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java @@ -9,6 +9,7 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.Version; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.Writeable; @@ -20,17 +21,20 @@ import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParseException; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.search.SearchModule; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.test.AbstractSerializingTestCase; import org.elasticsearch.xpack.core.ml.dataframe.analyses.MlDataFrameAnalysisNamedXContentProvider; import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetectionTests; +import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression; import org.elasticsearch.xpack.core.ml.utils.ToXContentParams; import org.junit.Before; @@ -42,10 +46,13 @@ import java.util.List; import java.util.Map; -import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasEntry; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.startsWith; public class DataFrameAnalyticsConfigTests extends AbstractSerializingTestCase { @@ -339,6 +346,44 @@ public void testPreventVersionInjection() throws IOException { } } + public void testToXContent_GivenAnalysisWithRandomizeSeedAndVersionIsCurrent() throws IOException { + Regression regression = new Regression("foo"); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + DataFrameAnalyticsConfig config = new DataFrameAnalyticsConfig.Builder() + .setVersion(Version.CURRENT) + .setId("test_config") + .setSource(new DataFrameAnalyticsSource(new String[] {"source_index"}, null, null)) + .setDest(new DataFrameAnalyticsDest("dest_index", null)) + .setAnalysis(regression) + .build(); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + config.toXContent(builder, ToXContent.EMPTY_PARAMS); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } + + public void testToXContent_GivenAnalysisWithRandomizeSeedAndVersionIsBeforeItWasIntroduced() throws IOException { + Regression regression = new Regression("foo"); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + DataFrameAnalyticsConfig config = new DataFrameAnalyticsConfig.Builder() + .setVersion(Version.V_7_5_0) + .setId("test_config") + .setSource(new DataFrameAnalyticsSource(new String[] {"source_index"}, null, null)) + .setDest(new DataFrameAnalyticsDest("dest_index", null)) + .setAnalysis(regression) + .build(); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + config.toXContent(builder, ToXContent.EMPTY_PARAMS); + String json = Strings.toString(builder); + assertThat(json, not(containsString("randomize_seed"))); + } + } + private static void assertTooSmall(ElasticsearchStatusException e) { assertThat(e.getMessage(), startsWith("model_memory_limit must be at least 1kb.")); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/ClassificationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/ClassificationTests.java index 61d6b4dfe3f7..8308ef8dad28 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/ClassificationTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/ClassificationTests.java @@ -6,20 +6,28 @@ package org.elasticsearch.xpack.core.ml.dataframe.analyses; import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.Version; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.index.mapper.BooleanFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.test.AbstractSerializingTestCase; import java.io.IOException; +import java.util.Collections; import java.util.Map; import java.util.Set; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; public class ClassificationTests extends AbstractSerializingTestCase { @@ -42,7 +50,9 @@ public static Classification createRandom() { String predictionFieldName = randomBoolean() ? null : randomAlphaOfLength(10); Integer numTopClasses = randomBoolean() ? null : randomIntBetween(0, 1000); Double trainingPercent = randomBoolean() ? null : randomDoubleBetween(1.0, 100.0, true); - return new Classification(dependentVariableName, boostedTreeParams, predictionFieldName, numTopClasses, trainingPercent); + Long randomizeSeed = randomBoolean() ? null : randomLong(); + return new Classification(dependentVariableName, boostedTreeParams, predictionFieldName, numTopClasses, trainingPercent, + randomizeSeed); } @Override @@ -52,71 +62,71 @@ protected Writeable.Reader instanceReader() { public void testConstructor_GivenTrainingPercentIsLessThanOne() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 0.999)); + () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 0.999, randomLong())); assertThat(e.getMessage(), equalTo("[training_percent] must be a double in [1, 100]")); } public void testConstructor_GivenTrainingPercentIsGreaterThan100() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 100.0001)); + () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 100.0001, randomLong())); assertThat(e.getMessage(), equalTo("[training_percent] must be a double in [1, 100]")); } public void testConstructor_GivenNumTopClassesIsLessThanZero() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", -1, 1.0)); + () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", -1, 1.0, randomLong())); assertThat(e.getMessage(), equalTo("[num_top_classes] must be an integer in [0, 1000]")); } public void testConstructor_GivenNumTopClassesIsGreaterThan1000() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 1001, 1.0)); + () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 1001, 1.0, randomLong())); assertThat(e.getMessage(), equalTo("[num_top_classes] must be an integer in [0, 1000]")); } public void testGetPredictionFieldName() { - Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 50.0); + Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 50.0, randomLong()); assertThat(classification.getPredictionFieldName(), equalTo("result")); - classification = new Classification("foo", BOOSTED_TREE_PARAMS, null, 3, 50.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, null, 3, 50.0, randomLong()); assertThat(classification.getPredictionFieldName(), equalTo("foo_prediction")); } public void testGetNumTopClasses() { - Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 7, 1.0); + Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 7, 1.0, randomLong()); assertThat(classification.getNumTopClasses(), equalTo(7)); // Boundary condition: num_top_classes == 0 - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 0, 1.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 0, 1.0, randomLong()); assertThat(classification.getNumTopClasses(), equalTo(0)); // Boundary condition: num_top_classes == 1000 - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 1000, 1.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 1000, 1.0, randomLong()); assertThat(classification.getNumTopClasses(), equalTo(1000)); // num_top_classes == null, default applied - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", null, 1.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", null, 1.0, randomLong()); assertThat(classification.getNumTopClasses(), equalTo(2)); } public void testGetTrainingPercent() { - Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 50.0); + Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 50.0, randomLong()); assertThat(classification.getTrainingPercent(), equalTo(50.0)); // Boundary condition: training_percent == 1.0 - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 1.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 1.0, randomLong()); assertThat(classification.getTrainingPercent(), equalTo(1.0)); // Boundary condition: training_percent == 100.0 - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 100.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 100.0, randomLong()); assertThat(classification.getTrainingPercent(), equalTo(100.0)); // training_percent == null, default applied - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, null); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, null, randomLong()); assertThat(classification.getTrainingPercent(), equalTo(100.0)); } @@ -155,4 +165,48 @@ public void testGetParams() { public void testFieldCardinalityLimitsIsNonNull() { assertThat(createTestInstance().getFieldCardinalityLimits(), is(not(nullValue()))); } + + public void testToXContent_GivenVersionBeforeRandomizeSeedWasIntroduced() throws IOException { + Classification classification = createRandom(); + assertThat(classification.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + classification.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", "7.5.0"))); + String json = Strings.toString(builder); + assertThat(json, not(containsString("randomize_seed"))); + } + } + + public void testToXContent_GivenVersionAfterRandomizeSeedWasIntroduced() throws IOException { + Classification classification = createRandom(); + assertThat(classification.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + classification.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", Version.CURRENT.toString()))); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } + + public void testToXContent_GivenVersionIsNull() throws IOException { + Classification classification = createRandom(); + assertThat(classification.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + classification.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", null))); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } + + public void testToXContent_GivenEmptyParams() throws IOException { + Classification classification = createRandom(); + assertThat(classification.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + classification.toXContent(builder, ToXContent.EMPTY_PARAMS); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/RegressionTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/RegressionTests.java index f3d5312280e8..58e19f6ef6a2 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/RegressionTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/RegressionTests.java @@ -6,16 +6,24 @@ package org.elasticsearch.xpack.core.ml.dataframe.analyses; import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.Version; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.test.AbstractSerializingTestCase; import java.io.IOException; +import java.util.Collections; import java.util.Map; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; public class RegressionTests extends AbstractSerializingTestCase { @@ -37,7 +45,8 @@ public static Regression createRandom() { BoostedTreeParams boostedTreeParams = BoostedTreeParamsTests.createRandom(); String predictionFieldName = randomBoolean() ? null : randomAlphaOfLength(10); Double trainingPercent = randomBoolean() ? null : randomDoubleBetween(1.0, 100.0, true); - return new Regression(dependentVariableName, boostedTreeParams, predictionFieldName, trainingPercent); + Long randomizeSeed = randomBoolean() ? null : randomLong(); + return new Regression(dependentVariableName, boostedTreeParams, predictionFieldName, trainingPercent, randomizeSeed); } @Override @@ -47,40 +56,40 @@ protected Writeable.Reader instanceReader() { public void testConstructor_GivenTrainingPercentIsLessThanOne() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Regression("foo", BOOSTED_TREE_PARAMS, "result", 0.999)); + () -> new Regression("foo", BOOSTED_TREE_PARAMS, "result", 0.999, randomLong())); assertThat(e.getMessage(), equalTo("[training_percent] must be a double in [1, 100]")); } public void testConstructor_GivenTrainingPercentIsGreaterThan100() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Regression("foo", BOOSTED_TREE_PARAMS, "result", 100.0001)); + () -> new Regression("foo", BOOSTED_TREE_PARAMS, "result", 100.0001, randomLong())); assertThat(e.getMessage(), equalTo("[training_percent] must be a double in [1, 100]")); } public void testGetPredictionFieldName() { - Regression regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 50.0); + Regression regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 50.0, randomLong()); assertThat(regression.getPredictionFieldName(), equalTo("result")); - regression = new Regression("foo", BOOSTED_TREE_PARAMS, null, 50.0); + regression = new Regression("foo", BOOSTED_TREE_PARAMS, null, 50.0, randomLong()); assertThat(regression.getPredictionFieldName(), equalTo("foo_prediction")); } public void testGetTrainingPercent() { - Regression regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 50.0); + Regression regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 50.0, randomLong()); assertThat(regression.getTrainingPercent(), equalTo(50.0)); // Boundary condition: training_percent == 1.0 - regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 1.0); + regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 1.0, randomLong()); assertThat(regression.getTrainingPercent(), equalTo(1.0)); // Boundary condition: training_percent == 100.0 - regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 100.0); + regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 100.0, randomLong()); assertThat(regression.getTrainingPercent(), equalTo(100.0)); // training_percent == null, default applied - regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", null); + regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", null, randomLong()); assertThat(regression.getTrainingPercent(), equalTo(100.0)); } @@ -100,4 +109,48 @@ public void testGetStateDocId() { String randomId = randomAlphaOfLength(10); assertThat(regression.getStateDocId(randomId), equalTo(randomId + "_regression_state#1")); } + + public void testToXContent_GivenVersionBeforeRandomizeSeedWasIntroduced() throws IOException { + Regression regression = createRandom(); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + regression.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", "7.5.0"))); + String json = Strings.toString(builder); + assertThat(json, not(containsString("randomize_seed"))); + } + } + + public void testToXContent_GivenVersionAfterRandomizeSeedWasIntroduced() throws IOException { + Regression regression = createRandom(); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + regression.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", Version.CURRENT.toString()))); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } + + public void testToXContent_GivenVersionIsNull() throws IOException { + Regression regression = createRandom(); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + regression.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", null))); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } + + public void testToXContent_GivenEmptyParams() throws IOException { + Regression regression = createRandom(); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + regression.toXContent(builder, ToXContent.EMPTY_PARAMS); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } } diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java index f5db9ae690a9..0e49043fcfbe 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java @@ -20,6 +20,7 @@ import org.elasticsearch.search.SearchHit; import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.xpack.core.ml.dataframe.analyses.BoostedTreeParams; import org.elasticsearch.xpack.core.ml.dataframe.analyses.BoostedTreeParamsTests; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.classification.Accuracy; @@ -31,6 +32,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import static java.util.stream.Collectors.toList; import static org.hamcrest.Matchers.allOf; @@ -158,7 +160,7 @@ public void testWithOnlyTrainingRowsAndTrainingPercentIsFifty( sourceIndex, destIndex, null, - new Classification(dependentVariable, BoostedTreeParamsTests.createRandom(), null, numTopClasses, 50.0)); + new Classification(dependentVariable, BoostedTreeParamsTests.createRandom(), null, numTopClasses, 50.0, null)); registerAnalytics(config); putAnalytics(config); @@ -269,6 +271,47 @@ public void testDependentVariableCardinalityTooHighButWithQueryMakesItWithinRang assertProgress(jobId, 100, 100, 100, 100); } + public void testTwoJobsWithSameRandomizeSeedUseSameTrainingSet() throws Exception { + String sourceIndex = "classification_two_jobs_with_same_randomize_seed_source"; + String dependentVariable = KEYWORD_FIELD; + + // We use 100 rows as we can't set this too low. If too low it is possible + // we only train with rows of one of the two classes which leads to a failure. + indexData(sourceIndex, 100, 0, dependentVariable); + + String firstJobId = "classification_two_jobs_with_same_randomize_seed_1"; + String firstJobDestIndex = firstJobId + "_dest"; + + BoostedTreeParams boostedTreeParams = new BoostedTreeParams(1.0, 1.0, 1.0, 1, 1.0); + + DataFrameAnalyticsConfig firstJob = buildAnalytics(firstJobId, sourceIndex, firstJobDestIndex, null, + new Classification(dependentVariable, boostedTreeParams, null, 1, 50.0, null)); + registerAnalytics(firstJob); + putAnalytics(firstJob); + + String secondJobId = "classification_two_jobs_with_same_randomize_seed_2"; + String secondJobDestIndex = secondJobId + "_dest"; + + long randomizeSeed = ((Classification) firstJob.getAnalysis()).getRandomizeSeed(); + DataFrameAnalyticsConfig secondJob = buildAnalytics(secondJobId, sourceIndex, secondJobDestIndex, null, + new Classification(dependentVariable, boostedTreeParams, null, 1, 50.0, randomizeSeed)); + + registerAnalytics(secondJob); + putAnalytics(secondJob); + + // Let's run both jobs in parallel and wait until they are finished + startAnalytics(firstJobId); + startAnalytics(secondJobId); + waitUntilAnalyticsIsStopped(firstJobId); + waitUntilAnalyticsIsStopped(secondJobId); + + // Now we compare they both used the same training rows + Set firstRunTrainingRowsIds = getTrainingRowsIds(firstJobDestIndex); + Set secondRunTrainingRowsIds = getTrainingRowsIds(secondJobDestIndex); + + assertThat(secondRunTrainingRowsIds, equalTo(firstRunTrainingRowsIds)); + } + private void initialize(String jobId) { this.jobId = jobId; this.sourceIndex = jobId + "_source_index"; @@ -340,10 +383,10 @@ private static Map getMlResultsObjectFromDestDoc(Map void assertTopClasses( - Map resultsObject, - int numTopClasses, - String dependentVariable, - List dependentVariableValues) { + Map resultsObject, + int numTopClasses, + String dependentVariable, + List dependentVariableValues) { assertThat(resultsObject.containsKey("top_classes"), is(true)); List> topClasses = (List>) resultsObject.get("top_classes"); assertThat(topClasses, hasSize(numTopClasses)); diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java index 29ef54d3f752..8ff82c28b36e 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction; @@ -45,7 +46,10 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -252,4 +256,22 @@ private static List fetchAllAuditMessages(String dataFrameAnalyticsId) { .map(hit -> (String) hit.getSourceAsMap().get("message")) .collect(Collectors.toList()); } + + protected static Set getTrainingRowsIds(String index) { + Set trainingRowsIds = new HashSet<>(); + SearchResponse hits = client().prepareSearch(index).setSize(10000).get(); + for (SearchHit hit : hits.getHits()) { + Map sourceAsMap = hit.getSourceAsMap(); + assertThat(sourceAsMap.containsKey("ml"), is(true)); + @SuppressWarnings("unchecked") + Map resultsObject = (Map) sourceAsMap.get("ml"); + + assertThat(resultsObject.containsKey("is_training"), is(true)); + if (Boolean.TRUE.equals(resultsObject.get("is_training"))) { + trainingRowsIds.add(hit.getId()); + } + } + assertThat(trainingRowsIds.isEmpty(), is(false)); + return trainingRowsIds; + } } diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java index 71ea840c53ea..84d408daacc6 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java @@ -16,6 +16,7 @@ import org.elasticsearch.search.SearchHit; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState; +import org.elasticsearch.xpack.core.ml.dataframe.analyses.BoostedTreeParams; import org.elasticsearch.xpack.core.ml.dataframe.analyses.BoostedTreeParamsTests; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression; import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex; @@ -25,6 +26,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; @@ -139,7 +141,7 @@ public void testWithOnlyTrainingRowsAndTrainingPercentIsFifty() throws Exception sourceIndex, destIndex, null, - new Regression(DEPENDENT_VARIABLE_FIELD, BoostedTreeParamsTests.createRandom(), null, 50.0)); + new Regression(DEPENDENT_VARIABLE_FIELD, BoostedTreeParamsTests.createRandom(), null, 50.0, null)); registerAnalytics(config); putAnalytics(config); @@ -235,6 +237,43 @@ public void testStopAndRestart() throws Exception { assertInferenceModelPersisted(jobId); } + public void testTwoJobsWithSameRandomizeSeedUseSameTrainingSet() throws Exception { + String sourceIndex = "regression_two_jobs_with_same_randomize_seed_source"; + indexData(sourceIndex, 10, 0); + + String firstJobId = "regression_two_jobs_with_same_randomize_seed_1"; + String firstJobDestIndex = firstJobId + "_dest"; + + BoostedTreeParams boostedTreeParams = new BoostedTreeParams(1.0, 1.0, 1.0, 1, 1.0); + + DataFrameAnalyticsConfig firstJob = buildAnalytics(firstJobId, sourceIndex, firstJobDestIndex, null, + new Regression(DEPENDENT_VARIABLE_FIELD, boostedTreeParams, null, 50.0, null)); + registerAnalytics(firstJob); + putAnalytics(firstJob); + + String secondJobId = "regression_two_jobs_with_same_randomize_seed_2"; + String secondJobDestIndex = secondJobId + "_dest"; + + long randomizeSeed = ((Regression) firstJob.getAnalysis()).getRandomizeSeed(); + DataFrameAnalyticsConfig secondJob = buildAnalytics(secondJobId, sourceIndex, secondJobDestIndex, null, + new Regression(DEPENDENT_VARIABLE_FIELD, boostedTreeParams, null, 50.0, randomizeSeed)); + + registerAnalytics(secondJob); + putAnalytics(secondJob); + + // Let's run both jobs in parallel and wait until they are finished + startAnalytics(firstJobId); + startAnalytics(secondJobId); + waitUntilAnalyticsIsStopped(firstJobId); + waitUntilAnalyticsIsStopped(secondJobId); + + // Now we compare they both used the same training rows + Set firstRunTrainingRowsIds = getTrainingRowsIds(firstJobDestIndex); + Set secondRunTrainingRowsIds = getTrainingRowsIds(secondJobDestIndex); + + assertThat(secondRunTrainingRowsIds, equalTo(firstRunTrainingRowsIds)); + } + private void initialize(String jobId) { this.jobId = jobId; this.sourceIndex = jobId + "_source_index"; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutDataFrameAnalyticsAction.java index 2884cd331779..1cbed7ed7661 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutDataFrameAnalyticsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutDataFrameAnalyticsAction.java @@ -111,7 +111,7 @@ protected ClusterBlockException checkBlock(PutDataFrameAnalyticsAction.Request r protected void masterOperation(Task task, PutDataFrameAnalyticsAction.Request request, ClusterState state, ActionListener listener) { validateConfig(request.getConfig()); - DataFrameAnalyticsConfig memoryCappedConfig = + DataFrameAnalyticsConfig preparedForPutConfig = new DataFrameAnalyticsConfig.Builder(request.getConfig(), maxModelMemoryLimit) .setCreateTime(Instant.now()) .setVersion(Version.CURRENT) @@ -120,11 +120,11 @@ protected void masterOperation(Task task, PutDataFrameAnalyticsAction.Request re if (licenseState.isAuthAllowed()) { final String username = securityContext.getUser().principal(); RoleDescriptor.IndicesPrivileges sourceIndexPrivileges = RoleDescriptor.IndicesPrivileges.builder() - .indices(memoryCappedConfig.getSource().getIndex()) + .indices(preparedForPutConfig.getSource().getIndex()) .privileges("read") .build(); RoleDescriptor.IndicesPrivileges destIndexPrivileges = RoleDescriptor.IndicesPrivileges.builder() - .indices(memoryCappedConfig.getDest().getIndex()) + .indices(preparedForPutConfig.getDest().getIndex()) .privileges("read", "index", "create_index") .build(); @@ -135,16 +135,16 @@ protected void masterOperation(Task task, PutDataFrameAnalyticsAction.Request re privRequest.indexPrivileges(sourceIndexPrivileges, destIndexPrivileges); ActionListener privResponseListener = ActionListener.wrap( - r -> handlePrivsResponse(username, memoryCappedConfig, r, listener), + r -> handlePrivsResponse(username, preparedForPutConfig, r, listener), listener::onFailure); client.execute(HasPrivilegesAction.INSTANCE, privRequest, privResponseListener); } else { updateDocMappingAndPutConfig( - memoryCappedConfig, + preparedForPutConfig, threadPool.getThreadContext().getHeaders(), ActionListener.wrap( - indexResponse -> listener.onResponse(new PutDataFrameAnalyticsAction.Response(memoryCappedConfig)), + indexResponse -> listener.onResponse(new PutDataFrameAnalyticsAction.Response(preparedForPutConfig)), listener::onFailure )); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java index 76fc58802794..8e89113be7eb 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java @@ -233,15 +233,7 @@ private void startAnalytics(DataFrameAnalyticsTask task, DataFrameAnalyticsConfi DataFrameAnalyticsTaskState analyzingState = new DataFrameAnalyticsTaskState(DataFrameAnalyticsState.ANALYZING, task.getAllocationId(), null); task.updatePersistentTaskState(analyzingState, ActionListener.wrap( - updatedTask -> processManager.runJob(task, config, dataExtractorFactory, - error -> { - if (error != null) { - task.updateState(DataFrameAnalyticsState.FAILED, error.getMessage()); - } else { - auditor.info(config.getId(), Messages.DATA_FRAME_ANALYTICS_AUDIT_FINISHED_ANALYSIS); - task.markAsCompleted(); - } - }), + updatedTask -> processManager.runJob(task, config, dataExtractorFactory), error -> { if (ExceptionsHelper.unwrapCause(error) instanceof ResourceNotFoundException) { // Task has stopped diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManager.java index 815d8478a527..ce981ad17a98 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManager.java @@ -8,6 +8,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.util.SetOnce; import org.elasticsearch.action.admin.indices.refresh.RefreshAction; import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; import org.elasticsearch.action.search.SearchResponse; @@ -90,19 +91,19 @@ public AnalyticsProcessManager(Client client, this.trainedModelProvider = Objects.requireNonNull(trainedModelProvider); } - public void runJob(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig config, DataFrameDataExtractorFactory dataExtractorFactory, - Consumer finishHandler) { + public void runJob(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig config, DataFrameDataExtractorFactory dataExtractorFactory) { executorServiceForJob.execute(() -> { - ProcessContext processContext = new ProcessContext(config.getId()); + ProcessContext processContext = new ProcessContext(config); synchronized (processContextByAllocation) { if (task.isStopping()) { // The task was requested to stop before we created the process context - finishHandler.accept(null); + auditor.info(config.getId(), Messages.DATA_FRAME_ANALYTICS_AUDIT_FINISHED_ANALYSIS); + task.markAsCompleted(); return; } if (processContextByAllocation.putIfAbsent(task.getAllocationId(), processContext) != null) { - finishHandler.accept( - ExceptionsHelper.serverError("[" + config.getId() + "] Could not create process as one already exists")); + task.updateState( + DataFrameAnalyticsState.FAILED, "[" + config.getId() + "] Could not create process as one already exists"); return; } } @@ -113,13 +114,13 @@ public void runJob(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig config, // Fetch existing model state (if any) BytesReference state = getModelState(config); - if (processContext.startProcess(dataExtractorFactory, config, task, state)) { - executorServiceForProcess.execute(() -> processResults(processContext)); - executorServiceForProcess.execute(() -> processData(task, config, processContext.dataExtractor, - processContext.process, processContext.resultProcessor, finishHandler, state)); + if (processContext.startProcess(dataExtractorFactory, task, state)) { + executorServiceForProcess.execute(() -> processContext.resultProcessor.get().process(processContext.process.get())); + executorServiceForProcess.execute(() -> processData(task, processContext, state)); } else { processContextByAllocation.remove(task.getAllocationId()); - finishHandler.accept(null); + auditor.info(config.getId(), Messages.DATA_FRAME_ANALYTICS_AUDIT_FINISHED_ANALYSIS); + task.markAsCompleted(); } }); } @@ -140,26 +141,18 @@ private BytesReference getModelState(DataFrameAnalyticsConfig config) { } } - private void processResults(ProcessContext processContext) { + private void processData(DataFrameAnalyticsTask task, ProcessContext processContext, BytesReference state) { + DataFrameAnalyticsConfig config = processContext.config; + DataFrameDataExtractor dataExtractor = processContext.dataExtractor.get(); + AnalyticsProcess process = processContext.process.get(); + AnalyticsResultProcessor resultProcessor = processContext.resultProcessor.get(); try { - processContext.resultProcessor.process(processContext.process); - } catch (Exception e) { - processContext.setFailureReason(e.getMessage()); - } - } - - private void processData(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig config, DataFrameDataExtractor dataExtractor, - AnalyticsProcess process, AnalyticsResultProcessor resultProcessor, - Consumer finishHandler, BytesReference state) { - - try { - ProcessContext processContext = processContextByAllocation.get(task.getAllocationId()); writeHeaderRecord(dataExtractor, process); writeDataRows(dataExtractor, process, config.getAnalysis(), task.getProgressTracker()); process.writeEndOfDataMessage(); process.flushStream(); - restoreState(config, state, process, finishHandler); + restoreState(task, config, state, process); LOGGER.info("[{}] Waiting for result processor to complete", config.getId()); resultProcessor.awaitForCompletion(); @@ -168,26 +161,34 @@ private void processData(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig c refreshDest(config); LOGGER.info("[{}] Result processor has completed", config.getId()); } catch (Exception e) { - if (task.isStopping() == false) { - String errorMsg = new ParameterizedMessage("[{}] Error while processing data [{}]", config.getId(), e.getMessage()) - .getFormattedMessage(); + if (task.isStopping()) { + // Errors during task stopping are expected but we still want to log them just in case. + String errorMsg = + new ParameterizedMessage( + "[{}] Error while processing data [{}]; task is stopping", config.getId(), e.getMessage()).getFormattedMessage(); + LOGGER.debug(errorMsg, e); + } else { + String errorMsg = + new ParameterizedMessage("[{}] Error while processing data [{}]", config.getId(), e.getMessage()).getFormattedMessage(); LOGGER.error(errorMsg, e); - processContextByAllocation.get(task.getAllocationId()).setFailureReason(errorMsg); + processContext.setFailureReason(errorMsg); } } finally { closeProcess(task); - ProcessContext processContext = processContextByAllocation.remove(task.getAllocationId()); + processContextByAllocation.remove(task.getAllocationId()); LOGGER.debug("Removed process context for task [{}]; [{}] processes still running", config.getId(), processContextByAllocation.size()); if (processContext.getFailureReason() == null) { // This results in marking the persistent task as complete LOGGER.info("[{}] Marking task completed", config.getId()); - finishHandler.accept(null); + auditor.info(config.getId(), Messages.DATA_FRAME_ANALYTICS_AUDIT_FINISHED_ANALYSIS); + task.markAsCompleted(); } else { LOGGER.error("[{}] Marking task failed; {}", config.getId(), processContext.getFailureReason()); task.updateState(DataFrameAnalyticsState.FAILED, processContext.getFailureReason()); + // Note: We are not marking the task as failed here as we want the user to be able to inspect the failure reason. } } } @@ -239,8 +240,8 @@ private void writeHeaderRecord(DataFrameDataExtractor dataExtractor, AnalyticsPr process.writeRecord(headerRecord); } - private void restoreState(DataFrameAnalyticsConfig config, @Nullable BytesReference state, AnalyticsProcess process, - Consumer failureHandler) { + private void restoreState(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig config, @Nullable BytesReference state, + AnalyticsProcess process) { if (config.getAnalysis().persistsState() == false) { LOGGER.debug("[{}] Analysis does not support state", config.getId()); return; @@ -258,7 +259,7 @@ private void restoreState(DataFrameAnalyticsConfig config, @Nullable BytesRefere process.restoreState(state); } catch (Exception e) { LOGGER.error(new ParameterizedMessage("[{}] Failed to restore state", process.getConfig().jobId()), e); - failureHandler.accept(ExceptionsHelper.serverError("Failed to restore state", e)); + task.updateState(DataFrameAnalyticsState.FAILED, "Failed to restore state: " + e.getMessage()); } } @@ -293,9 +294,10 @@ private void closeProcess(DataFrameAnalyticsTask task) { ProcessContext processContext = processContextByAllocation.get(task.getAllocationId()); try { - processContext.process.close(); + processContext.process.get().close(); LOGGER.info("[{}] Closed process", configId); } catch (Exception e) { + LOGGER.error("[" + configId + "] Error closing data frame analyzer process", e); String errorMsg = new ParameterizedMessage( "[{}] Error closing data frame analyzer process [{}]", configId, e.getMessage()).getFormattedMessage(); processContext.setFailureReason(errorMsg); @@ -323,42 +325,41 @@ int getProcessContextCount() { class ProcessContext { - private final String id; - private volatile AnalyticsProcess process; - private volatile DataFrameDataExtractor dataExtractor; - private volatile AnalyticsResultProcessor resultProcessor; - private volatile boolean processKilled; - private volatile String failureReason; + private final DataFrameAnalyticsConfig config; + private final SetOnce> process = new SetOnce<>(); + private final SetOnce dataExtractor = new SetOnce<>(); + private final SetOnce resultProcessor = new SetOnce<>(); + private final SetOnce failureReason = new SetOnce<>(); - ProcessContext(String id) { - this.id = Objects.requireNonNull(id); + ProcessContext(DataFrameAnalyticsConfig config) { + this.config = Objects.requireNonNull(config); } - synchronized String getFailureReason() { - return failureReason; + String getFailureReason() { + return failureReason.get(); } - synchronized void setFailureReason(String failureReason) { - // Only set the new reason if there isn't one already as we want to keep the first reason - if (this.failureReason == null && failureReason != null) { - this.failureReason = failureReason; + void setFailureReason(String failureReason) { + if (failureReason == null) { + return; } + // Only set the new reason if there isn't one already as we want to keep the first reason (most likely the root cause). + this.failureReason.trySet(failureReason); } synchronized void stop() { - LOGGER.debug("[{}] Stopping process", id); - processKilled = true; - if (dataExtractor != null) { - dataExtractor.cancel(); + LOGGER.debug("[{}] Stopping process", config.getId()); + if (dataExtractor.get() != null) { + dataExtractor.get().cancel(); } - if (resultProcessor != null) { - resultProcessor.cancel(); + if (resultProcessor.get() != null) { + resultProcessor.get().cancel(); } - if (process != null) { + if (process.get() != null) { try { - process.kill(); + process.get().kill(); } catch (IOException e) { - LOGGER.error(new ParameterizedMessage("[{}] Failed to kill process", id), e); + LOGGER.error(new ParameterizedMessage("[{}] Failed to kill process", config.getId()), e); } } } @@ -366,16 +367,17 @@ synchronized void stop() { /** * @return {@code true} if the process was started or {@code false} if it was not because it was stopped in the meantime */ - synchronized boolean startProcess(DataFrameDataExtractorFactory dataExtractorFactory, DataFrameAnalyticsConfig config, - DataFrameAnalyticsTask task, @Nullable BytesReference state) { - if (processKilled) { + synchronized boolean startProcess(DataFrameDataExtractorFactory dataExtractorFactory, + DataFrameAnalyticsTask task, + @Nullable BytesReference state) { + if (task.isStopping()) { // The job was stopped before we started the process so no need to start it return false; } - dataExtractor = dataExtractorFactory.newExtractor(false); + dataExtractor.set(dataExtractorFactory.newExtractor(false)); AnalyticsProcessConfig analyticsProcessConfig = - createProcessConfig(config, dataExtractor, dataExtractorFactory.getExtractedFields()); + createProcessConfig(dataExtractor.get(), dataExtractorFactory.getExtractedFields()); LOGGER.trace("[{}] creating analytics process with config [{}]", config.getId(), Strings.toString(analyticsProcessConfig)); // If we have no rows, that means there is no data so no point in starting the native process // just finish the task @@ -383,19 +385,16 @@ synchronized boolean startProcess(DataFrameDataExtractorFactory dataExtractorFac LOGGER.info("[{}] no data found to analyze. Will not start analytics native process.", config.getId()); return false; } - process = createProcess(task, config, analyticsProcessConfig, state); - DataFrameRowsJoiner dataFrameRowsJoiner = new DataFrameRowsJoiner(config.getId(), client, - dataExtractorFactory.newExtractor(true)); - resultProcessor = new AnalyticsResultProcessor( - config, dataFrameRowsJoiner, task.getProgressTracker(), trainedModelProvider, auditor, dataExtractor.getFieldNames()); + process.set(createProcess(task, config, analyticsProcessConfig, state)); + resultProcessor.set(createResultProcessor(task, dataExtractorFactory)); return true; } - private AnalyticsProcessConfig createProcessConfig( - DataFrameAnalyticsConfig config, DataFrameDataExtractor dataExtractor, ExtractedFields extractedFields) { + private AnalyticsProcessConfig createProcessConfig(DataFrameDataExtractor dataExtractor, + ExtractedFields extractedFields) { DataFrameDataExtractor.DataSummary dataSummary = dataExtractor.collectDataSummary(); Set categoricalFields = dataExtractor.getCategoricalFields(config.getAnalysis()); - AnalyticsProcessConfig processConfig = new AnalyticsProcessConfig( + return new AnalyticsProcessConfig( config.getId(), dataSummary.rows, dataSummary.cols, @@ -405,7 +404,14 @@ private AnalyticsProcessConfig createProcessConfig( categoricalFields, config.getAnalysis(), extractedFields); - return processConfig; + } + + private AnalyticsResultProcessor createResultProcessor(DataFrameAnalyticsTask task, + DataFrameDataExtractorFactory dataExtractorFactory) { + DataFrameRowsJoiner dataFrameRowsJoiner = + new DataFrameRowsJoiner(config.getId(), client, dataExtractorFactory.newExtractor(true)); + return new AnalyticsResultProcessor( + config, dataFrameRowsJoiner, task.getProgressTracker(), trainedModelProvider, auditor, dataExtractor.get().getFieldNames()); } } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/CustomProcessorFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/CustomProcessorFactory.java index fd52a3fd8da5..77f0b127a263 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/CustomProcessorFactory.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/CustomProcessorFactory.java @@ -24,12 +24,12 @@ public CustomProcessor create(DataFrameAnalysis analysis) { if (analysis instanceof Regression) { Regression regression = (Regression) analysis; return new DatasetSplittingCustomProcessor( - fieldNames, regression.getDependentVariable(), regression.getTrainingPercent()); + fieldNames, regression.getDependentVariable(), regression.getTrainingPercent(), regression.getRandomizeSeed()); } if (analysis instanceof Classification) { Classification classification = (Classification) analysis; return new DatasetSplittingCustomProcessor( - fieldNames, classification.getDependentVariable(), classification.getTrainingPercent()); + fieldNames, classification.getDependentVariable(), classification.getTrainingPercent(), classification.getRandomizeSeed()); } return row -> {}; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessor.java index ed42cf519885..bf6284aa7a5c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessor.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessor.java @@ -5,7 +5,6 @@ */ package org.elasticsearch.xpack.ml.dataframe.process.customprocessing; -import org.elasticsearch.common.Randomness; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import java.util.List; @@ -23,12 +22,13 @@ class DatasetSplittingCustomProcessor implements CustomProcessor { private final int dependentVariableIndex; private final double trainingPercent; - private final Random random = Randomness.get(); + private final Random random; private boolean isFirstRow = true; - DatasetSplittingCustomProcessor(List fieldNames, String dependentVariable, double trainingPercent) { + DatasetSplittingCustomProcessor(List fieldNames, String dependentVariable, double trainingPercent, long randomizeSeed) { this.dependentVariableIndex = findDependentVariableIndex(fieldNames, dependentVariable); this.trainingPercent = trainingPercent; + this.random = new Random(randomizeSeed); } private static int findDependentVariableIndex(List fieldNames, String dependentVariable) { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManagerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManagerTests.java index 4a0d5fa7f36d..915d6c29efb4 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManagerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManagerTests.java @@ -14,6 +14,7 @@ import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfigTests; +import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState; import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsTask; import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractor; import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory; @@ -22,12 +23,10 @@ import org.elasticsearch.xpack.ml.inference.persistence.TrainedModelProvider; import org.elasticsearch.xpack.ml.notifications.DataFrameAnalyticsAuditor; import org.junit.Before; -import org.mockito.ArgumentCaptor; import org.mockito.InOrder; import java.util.List; import java.util.concurrent.ExecutorService; -import java.util.function.Consumer; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; @@ -37,7 +36,6 @@ import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.when; @@ -66,8 +64,6 @@ public class AnalyticsProcessManagerTests extends ESTestCase { private DataFrameAnalyticsConfig dataFrameAnalyticsConfig; private DataFrameDataExtractorFactory dataExtractorFactory; private DataFrameDataExtractor dataExtractor; - private Consumer finishHandler; - private ArgumentCaptor exceptionCaptor; private AnalyticsProcessManager processManager; @SuppressWarnings("unchecked") @@ -97,9 +93,6 @@ public void setUpMocks() { dataExtractorFactory = mock(DataFrameDataExtractorFactory.class); when(dataExtractorFactory.newExtractor(anyBoolean())).thenReturn(dataExtractor); when(dataExtractorFactory.getExtractedFields()).thenReturn(mock(ExtractedFields.class)); - finishHandler = mock(Consumer.class); - - exceptionCaptor = ArgumentCaptor.forClass(Exception.class); processManager = new AnalyticsProcessManager( client, executorServiceForJob, executorServiceForProcess, processFactory, auditor, trainedModelProvider); @@ -108,54 +101,68 @@ public void setUpMocks() { public void testRunJob_TaskIsStopping() { when(task.isStopping()).thenReturn(true); - processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory, finishHandler); + processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory); assertThat(processManager.getProcessContextCount(), equalTo(0)); - verify(finishHandler).accept(null); - verifyNoMoreInteractions(finishHandler); + InOrder inOrder = inOrder(task); + inOrder.verify(task).isStopping(); + inOrder.verify(task).markAsCompleted(); + verifyNoMoreInteractions(task); } public void testRunJob_ProcessContextAlreadyExists() { - processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory, finishHandler); + processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory); assertThat(processManager.getProcessContextCount(), equalTo(1)); - processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory, finishHandler); + processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory); assertThat(processManager.getProcessContextCount(), equalTo(1)); - verify(finishHandler).accept(exceptionCaptor.capture()); - verifyNoMoreInteractions(finishHandler); - - Exception e = exceptionCaptor.getValue(); - assertThat(e.getMessage(), equalTo("[config-id] Could not create process as one already exists")); + InOrder inOrder = inOrder(task); + inOrder.verify(task).isStopping(); + inOrder.verify(task).getAllocationId(); + inOrder.verify(task).isStopping(); + inOrder.verify(task).getProgressTracker(); + inOrder.verify(task).isStopping(); + inOrder.verify(task).getAllocationId(); + inOrder.verify(task).updateState(DataFrameAnalyticsState.FAILED, "[config-id] Could not create process as one already exists"); + verifyNoMoreInteractions(task); } public void testRunJob_EmptyDataFrame() { when(dataExtractor.collectDataSummary()).thenReturn(new DataFrameDataExtractor.DataSummary(0, NUM_COLS)); - processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory, finishHandler); + processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory); assertThat(processManager.getProcessContextCount(), equalTo(0)); // Make sure the process context did not leak - InOrder inOrder = inOrder(dataExtractor, executorServiceForProcess, process, finishHandler); + InOrder inOrder = inOrder(dataExtractor, executorServiceForProcess, process, task); + inOrder.verify(task).isStopping(); + inOrder.verify(task).getAllocationId(); + inOrder.verify(task).isStopping(); inOrder.verify(dataExtractor).collectDataSummary(); inOrder.verify(dataExtractor).getCategoricalFields(dataFrameAnalyticsConfig.getAnalysis()); - inOrder.verify(finishHandler).accept(null); - verifyNoMoreInteractions(dataExtractor, executorServiceForProcess, process, finishHandler); + inOrder.verify(task).getAllocationId(); + inOrder.verify(task).markAsCompleted(); + verifyNoMoreInteractions(dataExtractor, executorServiceForProcess, process, task); } public void testRunJob_Ok() { - processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory, finishHandler); + processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory); assertThat(processManager.getProcessContextCount(), equalTo(1)); - InOrder inOrder = inOrder(dataExtractor, executorServiceForProcess, process, finishHandler); + InOrder inOrder = inOrder(dataExtractor, executorServiceForProcess, process, task); + inOrder.verify(task).isStopping(); + inOrder.verify(task).getAllocationId(); + inOrder.verify(task).isStopping(); inOrder.verify(dataExtractor).collectDataSummary(); inOrder.verify(dataExtractor).getCategoricalFields(dataFrameAnalyticsConfig.getAnalysis()); inOrder.verify(process).isProcessAlive(); + inOrder.verify(task).getProgressTracker(); inOrder.verify(dataExtractor).getFieldNames(); inOrder.verify(executorServiceForProcess, times(2)).execute(any()); // 'processData' and 'processResults' threads - verifyNoMoreInteractions(dataExtractor, executorServiceForProcess, process, finishHandler); + verifyNoMoreInteractions(dataExtractor, executorServiceForProcess, process, task); } public void testProcessContext_GetSetFailureReason() { - AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(CONFIG_ID); + AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(dataFrameAnalyticsConfig); assertThat(processContext.getFailureReason(), is(nullValue())); processContext.setFailureReason("reason1"); @@ -167,50 +174,57 @@ public void testProcessContext_GetSetFailureReason() { processContext.setFailureReason("reason2"); assertThat(processContext.getFailureReason(), equalTo("reason1")); - verifyNoMoreInteractions(dataExtractor, process, finishHandler); + verifyNoMoreInteractions(dataExtractor, process, task); } - public void testProcessContext_StartProcess_ProcessAlreadyKilled() { - AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(CONFIG_ID); + public void testProcessContext_StartProcess_TaskAlreadyStopped() { + when(task.isStopping()).thenReturn(true); + + AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(dataFrameAnalyticsConfig); processContext.stop(); - assertThat(processContext.startProcess(dataExtractorFactory, dataFrameAnalyticsConfig, task, null), is(false)); + assertThat(processContext.startProcess(dataExtractorFactory, task, null), is(false)); - verifyNoMoreInteractions(dataExtractor, process, finishHandler); + InOrder inOrder = inOrder(dataExtractor, process, task); + inOrder.verify(task).isStopping(); + verifyNoMoreInteractions(dataExtractor, process, task); } public void testProcessContext_StartProcess_EmptyDataFrame() { when(dataExtractor.collectDataSummary()).thenReturn(new DataFrameDataExtractor.DataSummary(0, NUM_COLS)); - AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(CONFIG_ID); - assertThat(processContext.startProcess(dataExtractorFactory, dataFrameAnalyticsConfig, task, null), is(false)); + AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(dataFrameAnalyticsConfig); + assertThat(processContext.startProcess(dataExtractorFactory, task, null), is(false)); - InOrder inOrder = inOrder(dataExtractor, process, finishHandler); + InOrder inOrder = inOrder(dataExtractor, process, task); + inOrder.verify(task).isStopping(); inOrder.verify(dataExtractor).collectDataSummary(); inOrder.verify(dataExtractor).getCategoricalFields(dataFrameAnalyticsConfig.getAnalysis()); - verifyNoMoreInteractions(dataExtractor, process, finishHandler); + verifyNoMoreInteractions(dataExtractor, process, task); } public void testProcessContext_StartAndStop() throws Exception { - AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(CONFIG_ID); - assertThat(processContext.startProcess(dataExtractorFactory, dataFrameAnalyticsConfig, task, null), is(true)); + AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(dataFrameAnalyticsConfig); + assertThat(processContext.startProcess(dataExtractorFactory, task, null), is(true)); processContext.stop(); - InOrder inOrder = inOrder(dataExtractor, process, finishHandler); + InOrder inOrder = inOrder(dataExtractor, process, task); // startProcess + inOrder.verify(task).isStopping(); inOrder.verify(dataExtractor).collectDataSummary(); inOrder.verify(dataExtractor).getCategoricalFields(dataFrameAnalyticsConfig.getAnalysis()); inOrder.verify(process).isProcessAlive(); + inOrder.verify(task).getProgressTracker(); inOrder.verify(dataExtractor).getFieldNames(); // stop inOrder.verify(dataExtractor).cancel(); inOrder.verify(process).kill(); - verifyNoMoreInteractions(dataExtractor, process, finishHandler); + verifyNoMoreInteractions(dataExtractor, process, task); } public void testProcessContext_Stop() { - AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(CONFIG_ID); + AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(dataFrameAnalyticsConfig); processContext.stop(); - verifyNoMoreInteractions(dataExtractor, process, finishHandler); + verifyNoMoreInteractions(dataExtractor, process, task); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessorTests.java index d5973f878246..d18adc3dcdb4 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessorTests.java @@ -24,6 +24,7 @@ public class DatasetSplittingCustomProcessorTests extends ESTestCase { private List fields; private int dependentVariableIndex; private String dependentVariable; + private long randomizeSeed; @Before public void setUpTests() { @@ -34,10 +35,11 @@ public void setUpTests() { } dependentVariableIndex = randomIntBetween(0, fieldCount - 1); dependentVariable = fields.get(dependentVariableIndex); + randomizeSeed = randomLong(); } public void testProcess_GivenRowsWithoutDependentVariableValue() { - CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 50.0); + CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 50.0, randomizeSeed); for (int i = 0; i < 100; i++) { String[] row = new String[fields.size()]; @@ -55,7 +57,7 @@ public void testProcess_GivenRowsWithoutDependentVariableValue() { } public void testProcess_GivenRowsWithDependentVariableValue_AndTrainingPercentIsHundred() { - CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 100.0); + CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 100.0, randomizeSeed); for (int i = 0; i < 100; i++) { String[] row = new String[fields.size()]; @@ -75,7 +77,7 @@ public void testProcess_GivenRowsWithDependentVariableValue_AndTrainingPercentIs public void testProcess_GivenRowsWithDependentVariableValue_AndTrainingPercentIsRandom() { double trainingPercent = randomDoubleBetween(1.0, 100.0, true); double trainingFraction = trainingPercent / 100; - CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, trainingPercent); + CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, trainingPercent, randomizeSeed); int runCount = 20; int rowsCount = 1000; @@ -121,7 +123,7 @@ public void testProcess_GivenRowsWithDependentVariableValue_AndTrainingPercentIs } public void testProcess_ShouldHaveAtLeastOneTrainingRow() { - CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 1.0); + CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 1.0, randomizeSeed); // We have some non-training rows and then a training row to check // we maintain the first training row and not just the first row diff --git a/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 b/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..d1657fccc5ee --- /dev/null +++ b/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +8ca36adea0a904ec725d57f509a62652a53ecff8 \ No newline at end of file diff --git a/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 b/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 4b736046f3ad..000000000000 --- a/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -aa74590851b6fcf536976f75448be52f6ca18a4a \ No newline at end of file diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.get_status.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.get_status.json new file mode 100644 index 000000000000..163ad5558c3d --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.get_status.json @@ -0,0 +1,19 @@ +{ + "slm.get_status":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/current/slm-get-status.html" + }, + "stability":"stable", + "url":{ + "paths":[ + { + "path":"/_slm/status", + "methods":[ + "GET" + ] + } + ] + }, + "params":{} + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.start.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.start.json new file mode 100644 index 000000000000..21ae3d509786 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.start.json @@ -0,0 +1,19 @@ +{ + "slm.start":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/current/slm-start.html" + }, + "stability":"stable", + "url":{ + "paths":[ + { + "path":"/_slm/start", + "methods":[ + "POST" + ] + } + ] + }, + "params":{} + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.stop.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.stop.json new file mode 100644 index 000000000000..63b74ab9c2f7 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.stop.json @@ -0,0 +1,19 @@ +{ + "slm.stop":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/current/slm-stop.html" + }, + "stability":"stable", + "url":{ + "paths":[ + { + "path":"/_slm/stop", + "methods":[ + "POST" + ] + } + ] + }, + "params":{} + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_crud.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_crud.yml index a1d78b744405..4335a50382a9 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_crud.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_crud.yml @@ -1456,7 +1456,8 @@ setup: "eta": 0.5, "maximum_number_trees": 400, "feature_bag_fraction": 0.3, - "training_percent": 60.3 + "training_percent": 60.3, + "randomize_seed": 42 } } } @@ -1472,7 +1473,8 @@ setup: "maximum_number_trees": 400, "feature_bag_fraction": 0.3, "prediction_field_name": "foo_prediction", - "training_percent": 60.3 + "training_percent": 60.3, + "randomize_seed": 42 } }} - is_true: create_time @@ -1796,7 +1798,8 @@ setup: "eta": 0.5, "maximum_number_trees": 400, "feature_bag_fraction": 0.3, - "training_percent": 60.3 + "training_percent": 60.3, + "randomize_seed": 24 } } } @@ -1813,6 +1816,7 @@ setup: "feature_bag_fraction": 0.3, "prediction_field_name": "foo_prediction", "training_percent": 60.3, + "randomize_seed": 24, "num_top_classes": 2 } }} @@ -1836,7 +1840,8 @@ setup: }, "analysis": { "regression": { - "dependent_variable": "foo" + "dependent_variable": "foo", + "randomize_seed": 42 } } } @@ -1848,7 +1853,8 @@ setup: "regression":{ "dependent_variable": "foo", "prediction_field_name": "foo_prediction", - "training_percent": 100.0 + "training_percent": 100.0, + "randomize_seed": 42 } }} - is_true: create_time diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/actions/email/ExecutableEmailAction.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/actions/email/ExecutableEmailAction.java index fcc4eb0e9422..1f8e87cad1f9 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/actions/email/ExecutableEmailAction.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/actions/email/ExecutableEmailAction.java @@ -6,6 +6,8 @@ package org.elasticsearch.xpack.watcher.actions.email; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.logging.log4j.util.Supplier; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.xpack.core.watcher.actions.Action; import org.elasticsearch.xpack.core.watcher.actions.ExecutableAction; @@ -57,6 +59,8 @@ public Action.Result execute(String actionId, WatchExecutionContext ctx, Payload Attachment attachment = parser.toAttachment(ctx, payload, emailAttachment); attachments.put(attachment.id(), attachment); } catch (ElasticsearchException | IOException e) { + logger().error( + (Supplier) () -> new ParameterizedMessage("failed to execute action [{}/{}]", ctx.watch().id(), actionId), e); return new EmailAction.Result.FailureWithException(action.type(), e); } } diff --git a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherMetaDataSerializationTests.java b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherMetaDataSerializationTests.java index 0556b8535e42..75e5bc1073e6 100644 --- a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherMetaDataSerializationTests.java +++ b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherMetaDataSerializationTests.java @@ -64,7 +64,7 @@ public void testWatcherMetadataParsingDoesNotSwallowOtherMetaData() throws Excep builder = metaDataBuilder.build().toXContent(builder, params); builder.endObject(); // deserialize metadata again - MetaData metaData = MetaData.Builder.fromXContent(createParser(builder)); + MetaData metaData = MetaData.Builder.fromXContent(createParser(builder), randomBoolean()); // check that custom metadata still present assertThat(metaData.custom(watcherMetaData.getWriteableName()), notNullValue()); assertThat(metaData.custom(repositoriesMetaData.getWriteableName()), notNullValue()); diff --git a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml index b0cb91c4c0f5..7780691b2bbb 100644 --- a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml +++ b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml @@ -60,6 +60,7 @@ - match: { data_frame_analytics.0.dest.index: "old_cluster_regression_job_results" } - match: { data_frame_analytics.0.analysis.regression.dependent_variable: "foo" } - match: { data_frame_analytics.0.analysis.regression.training_percent: 100.0 } + - is_true: data_frame_analytics.0.analysis.regression.randomize_seed --- "Get old regression job stats": diff --git a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml index 28ec80c6373a..14438883f0da 100644 --- a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml +++ b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml @@ -40,6 +40,7 @@ - match: { data_frame_analytics.0.dest.index: "old_cluster_regression_job_results" } - match: { data_frame_analytics.0.analysis.regression.dependent_variable: "foo" } - match: { data_frame_analytics.0.analysis.regression.training_percent: 100.0 } + - is_true: data_frame_analytics.0.analysis.regression.randomize_seed --- "Get old cluster regression job stats":