diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimits.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimits.java new file mode 100644 index 0000000000000..f69b9ccbf9ff4 --- /dev/null +++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimits.java @@ -0,0 +1,139 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml.job.config; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Objects; + +/** + * Analysis limits for autodetect. In particular, + * this is a collection of parameters that allow limiting + * the resources used by the job. + */ +public class AnalysisLimits implements ToXContentObject { + + /** + * Serialisation field names + */ + public static final ParseField MODEL_MEMORY_LIMIT = new ParseField("model_memory_limit"); + public static final ParseField CATEGORIZATION_EXAMPLES_LIMIT = new ParseField("categorization_examples_limit"); + + public static final ConstructingObjectParser PARSER = + new ConstructingObjectParser<>("analysis_limits", true, a -> new AnalysisLimits((Long) a[0], (Long) a[1])); + + static { + PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> { + if (p.currentToken() == XContentParser.Token.VALUE_STRING) { + return ByteSizeValue.parseBytesSizeValue(p.text(), MODEL_MEMORY_LIMIT.getPreferredName()).getMb(); + } else if (p.currentToken() == XContentParser.Token.VALUE_NUMBER) { + return p.longValue(); + } + throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]"); + }, MODEL_MEMORY_LIMIT, ObjectParser.ValueType.VALUE); + PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), CATEGORIZATION_EXAMPLES_LIMIT); + } + + /** + * The model memory limit in MiBs. + * It is initialised to null, which implies that the server-side default will be used. + */ + private final Long modelMemoryLimit; + + /** + * It is initialised to null. + * A value of null will result in the server-side default being used. + */ + private final Long categorizationExamplesLimit; + + public AnalysisLimits(Long categorizationExamplesLimit) { + this(null, categorizationExamplesLimit); + } + + public AnalysisLimits(Long modelMemoryLimit, Long categorizationExamplesLimit) { + this.modelMemoryLimit = modelMemoryLimit; + this.categorizationExamplesLimit = categorizationExamplesLimit; + } + + /** + * Maximum size of the model in MB before the anomaly detector + * will drop new samples to prevent the model using any more + * memory. + * + * @return The set memory limit or null if not set + */ + @Nullable + public Long getModelMemoryLimit() { + return modelMemoryLimit; + } + + /** + * Gets the limit to the number of examples that are stored per category + * + * @return the limit or null if not set + */ + @Nullable + public Long getCategorizationExamplesLimit() { + return categorizationExamplesLimit; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (modelMemoryLimit != null) { + builder.field(MODEL_MEMORY_LIMIT.getPreferredName(), modelMemoryLimit + "mb"); + } + if (categorizationExamplesLimit != null) { + builder.field(CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(), categorizationExamplesLimit); + } + builder.endObject(); + return builder; + } + + /** + * Overridden equality test + */ + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other instanceof AnalysisLimits == false) { + return false; + } + + AnalysisLimits that = (AnalysisLimits) other; + return Objects.equals(this.modelMemoryLimit, that.modelMemoryLimit) && + Objects.equals(this.categorizationExamplesLimit, that.categorizationExamplesLimit); + } + + @Override + public int hashCode() { + return Objects.hash(modelMemoryLimit, categorizationExamplesLimit); + } +} diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfig.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfig.java new file mode 100644 index 0000000000000..dc7f047b80404 --- /dev/null +++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfig.java @@ -0,0 +1,339 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml.job.config; + +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.ToXContentFragment; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * Configuration for the categorization analyzer. + * + * The syntax is a subset of what can be supplied to the {@linkplain RestAnalyzeAction _analyze endpoint}. + * To summarize, the first option is to specify the name of an out-of-the-box analyzer: + * + * "categorization_analyzer" : "standard" + * + * + * The second option is to specify a custom analyzer by combining the char_filters, tokenizer + * and token_filters fields. In turn, each of these can be specified as the name of an out-of-the-box + * one or as an object defining a custom one. For example: + * + * "char_filters" : [ + * "html_strip", + * { "type" : "pattern_replace", "pattern": "SQL: .*" } + * ], + * "tokenizer" : "thai", + * "token_filters" : [ + * "lowercase", + * { "type" : "pattern_replace", "pattern": "^[0-9].*" } + * ] + * + */ +public class CategorizationAnalyzerConfig implements ToXContentFragment { + + public static final ParseField CATEGORIZATION_ANALYZER = new ParseField("categorization_analyzer"); + private static final ParseField TOKENIZER = RestAnalyzeAction.Fields.TOKENIZER; + private static final ParseField TOKEN_FILTERS = RestAnalyzeAction.Fields.TOKEN_FILTERS; + private static final ParseField CHAR_FILTERS = RestAnalyzeAction.Fields.CHAR_FILTERS; + + /** + * This method is only used in the unit tests - in production code this config is always parsed as a fragment. + */ + static CategorizationAnalyzerConfig buildFromXContentObject(XContentParser parser) throws IOException { + + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new IllegalArgumentException("Expected start object but got [" + parser.currentToken() + "]"); + } + if (parser.nextToken() != XContentParser.Token.FIELD_NAME) { + throw new IllegalArgumentException("Expected field name but got [" + parser.currentToken() + "]"); + } + parser.nextToken(); + CategorizationAnalyzerConfig categorizationAnalyzerConfig = buildFromXContentFragment(parser); + parser.nextToken(); + return categorizationAnalyzerConfig; + } + + /** + * Parse a categorization_analyzer configuration. A custom parser is needed due to the + * complexity of the format, with many elements able to be specified as either the name of a built-in + * element or an object containing a custom definition. + */ + static CategorizationAnalyzerConfig buildFromXContentFragment(XContentParser parser) throws IOException { + + CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder(); + + XContentParser.Token token = parser.currentToken(); + if (token == XContentParser.Token.VALUE_STRING) { + builder.setAnalyzer(parser.text()); + } else if (token != XContentParser.Token.START_OBJECT) { + throw new IllegalArgumentException("[" + CATEGORIZATION_ANALYZER + "] should be analyzer's name or settings [" + token + "]"); + } else { + String currentFieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (CHAR_FILTERS.match(currentFieldName, parser.getDeprecationHandler()) + && token == XContentParser.Token.START_ARRAY) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token == XContentParser.Token.VALUE_STRING) { + builder.addCharFilter(parser.text()); + } else if (token == XContentParser.Token.START_OBJECT) { + builder.addCharFilter(parser.map()); + } else { + throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER + + "] array element should contain char_filter's name or settings [" + token + "]"); + } + } + } else if (TOKENIZER.match(currentFieldName, parser.getDeprecationHandler())) { + if (token == XContentParser.Token.VALUE_STRING) { + builder.setTokenizer(parser.text()); + } else if (token == XContentParser.Token.START_OBJECT) { + builder.setTokenizer(parser.map()); + } else { + throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER + + "] should be tokenizer's name or settings [" + token + "]"); + } + } else if (TOKEN_FILTERS.match(currentFieldName, parser.getDeprecationHandler()) + && token == XContentParser.Token.START_ARRAY) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token == XContentParser.Token.VALUE_STRING) { + builder.addTokenFilter(parser.text()); + } else if (token == XContentParser.Token.START_OBJECT) { + builder.addTokenFilter(parser.map()); + } else { + throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER + + "] array element should contain token_filter's name or settings [" + token + "]"); + } + } + } + } + } + + return builder.build(); + } + + /** + * Simple store of either a name of a built-in analyzer element or a custom definition. + */ + public static final class NameOrDefinition implements ToXContentFragment { + + // Exactly one of these two members is not null + public final String name; + public final Settings definition; + + NameOrDefinition(String name) { + this.name = Objects.requireNonNull(name); + this.definition = null; + } + + NameOrDefinition(ParseField field, Map definition) { + this.name = null; + Objects.requireNonNull(definition); + try { + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + builder.map(definition); + this.definition = Settings.builder().loadFromSource(Strings.toString(builder), builder.contentType()).build(); + } catch (IOException e) { + throw new IllegalArgumentException("Failed to parse [" + definition + "] in [" + field.getPreferredName() + "]", e); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if (definition == null) { + builder.value(name); + } else { + builder.startObject(); + definition.toXContent(builder, params); + builder.endObject(); + } + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NameOrDefinition that = (NameOrDefinition) o; + return Objects.equals(name, that.name) && + Objects.equals(definition, that.definition); + } + + @Override + public int hashCode() { + return Objects.hash(name, definition); + } + + @Override + public String toString() { + if (definition == null) { + return name; + } else { + return definition.toDelimitedString(';'); + } + } + } + + private final String analyzer; + private final List charFilters; + private final NameOrDefinition tokenizer; + private final List tokenFilters; + + private CategorizationAnalyzerConfig(String analyzer, List charFilters, NameOrDefinition tokenizer, + List tokenFilters) { + this.analyzer = analyzer; + this.charFilters = Collections.unmodifiableList(charFilters); + this.tokenizer = tokenizer; + this.tokenFilters = Collections.unmodifiableList(tokenFilters); + } + + public String getAnalyzer() { + return analyzer; + } + + public List getCharFilters() { + return charFilters; + } + + public NameOrDefinition getTokenizer() { + return tokenizer; + } + + public List getTokenFilters() { + return tokenFilters; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if (analyzer != null) { + builder.field(CATEGORIZATION_ANALYZER.getPreferredName(), analyzer); + } else { + builder.startObject(CATEGORIZATION_ANALYZER.getPreferredName()); + if (charFilters.isEmpty() == false) { + builder.startArray(CHAR_FILTERS.getPreferredName()); + for (NameOrDefinition charFilter : charFilters) { + charFilter.toXContent(builder, params); + } + builder.endArray(); + } + if (tokenizer != null) { + builder.field(TOKENIZER.getPreferredName(), tokenizer); + } + if (tokenFilters.isEmpty() == false) { + builder.startArray(TOKEN_FILTERS.getPreferredName()); + for (NameOrDefinition tokenFilter : tokenFilters) { + tokenFilter.toXContent(builder, params); + } + builder.endArray(); + } + builder.endObject(); + } + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + CategorizationAnalyzerConfig that = (CategorizationAnalyzerConfig) o; + return Objects.equals(analyzer, that.analyzer) && + Objects.equals(charFilters, that.charFilters) && + Objects.equals(tokenizer, that.tokenizer) && + Objects.equals(tokenFilters, that.tokenFilters); + } + + @Override + public int hashCode() { + return Objects.hash(analyzer, charFilters, tokenizer, tokenFilters); + } + + public static class Builder { + + private String analyzer; + private List charFilters = new ArrayList<>(); + private NameOrDefinition tokenizer; + private List tokenFilters = new ArrayList<>(); + + public Builder() { + } + + public Builder(CategorizationAnalyzerConfig categorizationAnalyzerConfig) { + this.analyzer = categorizationAnalyzerConfig.analyzer; + this.charFilters = new ArrayList<>(categorizationAnalyzerConfig.charFilters); + this.tokenizer = categorizationAnalyzerConfig.tokenizer; + this.tokenFilters = new ArrayList<>(categorizationAnalyzerConfig.tokenFilters); + } + + public Builder setAnalyzer(String analyzer) { + this.analyzer = analyzer; + return this; + } + + public Builder addCharFilter(String charFilter) { + this.charFilters.add(new NameOrDefinition(charFilter)); + return this; + } + + public Builder addCharFilter(Map charFilter) { + this.charFilters.add(new NameOrDefinition(CHAR_FILTERS, charFilter)); + return this; + } + + public Builder setTokenizer(String tokenizer) { + this.tokenizer = new NameOrDefinition(tokenizer); + return this; + } + + public Builder setTokenizer(Map tokenizer) { + this.tokenizer = new NameOrDefinition(TOKENIZER, tokenizer); + return this; + } + + public Builder addTokenFilter(String tokenFilter) { + this.tokenFilters.add(new NameOrDefinition(tokenFilter)); + return this; + } + + public Builder addTokenFilter(Map tokenFilter) { + this.tokenFilters.add(new NameOrDefinition(TOKEN_FILTERS, tokenFilter)); + return this; + } + + /** + * Create a config + */ + public CategorizationAnalyzerConfig build() { + return new CategorizationAnalyzerConfig(analyzer, charFilters, tokenizer, tokenFilters); + } + } +} diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescription.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescription.java new file mode 100644 index 0000000000000..f469512f64973 --- /dev/null +++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescription.java @@ -0,0 +1,282 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml.job.config; + +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser.ValueType; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Locale; +import java.util.Objects; + +/** + * Describes the format of the data used in the job and how it should + * be interpreted by the ML job. + *

+ * {@link #getTimeField()} is the name of the field containing the timestamp and + * {@link #getTimeFormat()} is the format code for the date string in as described by + * {@link java.time.format.DateTimeFormatter}. + */ +public class DataDescription implements ToXContentObject { + /** + * Enum of the acceptable data formats. + */ + public enum DataFormat { + XCONTENT, + + /** + * This is deprecated + */ + DELIMITED; + + /** + * Case-insensitive from string method. + * Works with either XCONTENT, XContent, etc. + * + * @param value String representation + * @return The data format + */ + public static DataFormat forString(String value) { + return DataFormat.valueOf(value.toUpperCase(Locale.ROOT)); + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } + + private static final ParseField DATA_DESCRIPTION_FIELD = new ParseField("data_description"); + private static final ParseField FORMAT_FIELD = new ParseField("format"); + private static final ParseField TIME_FIELD_NAME_FIELD = new ParseField("time_field"); + private static final ParseField TIME_FORMAT_FIELD = new ParseField("time_format"); + private static final ParseField FIELD_DELIMITER_FIELD = new ParseField("field_delimiter"); + private static final ParseField QUOTE_CHARACTER_FIELD = new ParseField("quote_character"); + + /** + * Special time format string for epoch times (seconds) + */ + public static final String EPOCH = "epoch"; + + /** + * Special time format string for epoch times (milli-seconds) + */ + public static final String EPOCH_MS = "epoch_ms"; + + /** + * By default autodetect expects the timestamp in a field with this name + */ + public static final String DEFAULT_TIME_FIELD = "time"; + + /** + * The default field delimiter expected by the native autodetect + * program. + */ + public static final char DEFAULT_DELIMITER = '\t'; + + /** + * The default quote character used to escape text in + * delimited data formats + */ + public static final char DEFAULT_QUOTE_CHAR = '"'; + + private final DataFormat dataFormat; + private final String timeFieldName; + private final String timeFormat; + private final Character fieldDelimiter; + private final Character quoteCharacter; + + public static final ObjectParser PARSER = + new ObjectParser<>(DATA_DESCRIPTION_FIELD.getPreferredName(), true, Builder::new); + + static { + PARSER.declareString(Builder::setFormat, FORMAT_FIELD); + PARSER.declareString(Builder::setTimeField, TIME_FIELD_NAME_FIELD); + PARSER.declareString(Builder::setTimeFormat, TIME_FORMAT_FIELD); + PARSER.declareField(Builder::setFieldDelimiter, DataDescription::extractChar, FIELD_DELIMITER_FIELD, ValueType.STRING); + PARSER.declareField(Builder::setQuoteCharacter, DataDescription::extractChar, QUOTE_CHARACTER_FIELD, ValueType.STRING); + } + + public DataDescription(DataFormat dataFormat, String timeFieldName, String timeFormat, Character fieldDelimiter, + Character quoteCharacter) { + this.dataFormat = dataFormat; + this.timeFieldName = timeFieldName; + this.timeFormat = timeFormat; + this.fieldDelimiter = fieldDelimiter; + this.quoteCharacter = quoteCharacter; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (dataFormat != DataFormat.XCONTENT) { + builder.field(FORMAT_FIELD.getPreferredName(), dataFormat); + } + builder.field(TIME_FIELD_NAME_FIELD.getPreferredName(), timeFieldName); + builder.field(TIME_FORMAT_FIELD.getPreferredName(), timeFormat); + if (fieldDelimiter != null) { + builder.field(FIELD_DELIMITER_FIELD.getPreferredName(), String.valueOf(fieldDelimiter)); + } + if (quoteCharacter != null) { + builder.field(QUOTE_CHARACTER_FIELD.getPreferredName(), String.valueOf(quoteCharacter)); + } + builder.endObject(); + return builder; + } + + /** + * The format of the data to be processed. + * Defaults to {@link DataDescription.DataFormat#XCONTENT} + * + * @return The data format + */ + public DataFormat getFormat() { + return dataFormat; + } + + /** + * The name of the field containing the timestamp + * + * @return A String if set or null + */ + public String getTimeField() { + return timeFieldName; + } + + /** + * Either {@value #EPOCH}, {@value #EPOCH_MS} or a SimpleDateTime format string. + * If not set (is null or an empty string) or set to + * {@value #EPOCH_MS} (the default) then the date is assumed to be in + * milliseconds from the epoch. + * + * @return A String if set or null + */ + public String getTimeFormat() { + return timeFormat; + } + + /** + * If the data is in a delimited format with a header e.g. csv or tsv + * this is the delimiter character used. This is only applicable if + * {@linkplain #getFormat()} is {@link DataDescription.DataFormat#DELIMITED}. + * The default value for delimited format is {@value #DEFAULT_DELIMITER}. + * + * @return A char + */ + public Character getFieldDelimiter() { + return fieldDelimiter; + } + + /** + * The quote character used in delimited formats. + * The default value for delimited format is {@value #DEFAULT_QUOTE_CHAR}. + * + * @return The delimited format quote character + */ + public Character getQuoteCharacter() { + return quoteCharacter; + } + + private static Character extractChar(XContentParser parser) throws IOException { + if (parser.currentToken() == XContentParser.Token.VALUE_STRING) { + String charStr = parser.text(); + if (charStr.length() != 1) { + throw new IllegalArgumentException("String must be a single character, found [" + charStr + "]"); + } + return charStr.charAt(0); + } + throw new IllegalArgumentException("Unsupported token [" + parser.currentToken() + "]"); + } + + /** + * Overridden equality test + */ + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other instanceof DataDescription == false) { + return false; + } + + DataDescription that = (DataDescription) other; + + return this.dataFormat == that.dataFormat && + Objects.equals(this.quoteCharacter, that.quoteCharacter) && + Objects.equals(this.timeFieldName, that.timeFieldName) && + Objects.equals(this.timeFormat, that.timeFormat) && + Objects.equals(this.fieldDelimiter, that.fieldDelimiter); + } + + @Override + public int hashCode() { + return Objects.hash(dataFormat, quoteCharacter, timeFieldName, timeFormat, fieldDelimiter); + } + + public static class Builder { + + private DataFormat dataFormat = DataFormat.XCONTENT; + private String timeFieldName = DEFAULT_TIME_FIELD; + private String timeFormat = EPOCH_MS; + private Character fieldDelimiter; + private Character quoteCharacter; + + public void setFormat(DataFormat format) { + dataFormat = Objects.requireNonNull(format); + } + + private void setFormat(String format) { + setFormat(DataFormat.forString(format)); + } + + public void setTimeField(String fieldName) { + timeFieldName = Objects.requireNonNull(fieldName); + } + + public void setTimeFormat(String format) { + timeFormat = Objects.requireNonNull(format); + } + + public void setFieldDelimiter(Character delimiter) { + fieldDelimiter = delimiter; + } + + public void setQuoteCharacter(Character value) { + quoteCharacter = value; + } + + public DataDescription build() { + if (dataFormat == DataFormat.DELIMITED) { + if (fieldDelimiter == null) { + fieldDelimiter = DEFAULT_DELIMITER; + } + if (quoteCharacter == null) { + quoteCharacter = DEFAULT_QUOTE_CHAR; + } + } + return new DataDescription(dataFormat, timeFieldName, timeFormat, fieldDelimiter, quoteCharacter); + } + } +} diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfig.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfig.java new file mode 100644 index 0000000000000..59b0252a7660e --- /dev/null +++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfig.java @@ -0,0 +1,88 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml.job.config; + +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Objects; + +public class ModelPlotConfig implements ToXContentObject { + + private static final ParseField TYPE_FIELD = new ParseField("model_plot_config"); + private static final ParseField ENABLED_FIELD = new ParseField("enabled"); + public static final ParseField TERMS_FIELD = new ParseField("terms"); + + public static final ConstructingObjectParser PARSER = + new ConstructingObjectParser<>(TYPE_FIELD.getPreferredName(), true, a -> new ModelPlotConfig((boolean) a[0], (String) a[1])); + + static { + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), ENABLED_FIELD); + PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TERMS_FIELD); + } + + private final boolean enabled; + private final String terms; + + public ModelPlotConfig(boolean enabled, String terms) { + this.enabled = enabled; + this.terms = terms; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(ENABLED_FIELD.getPreferredName(), enabled); + if (terms != null) { + builder.field(TERMS_FIELD.getPreferredName(), terms); + } + builder.endObject(); + return builder; + } + + public boolean isEnabled() { + return enabled; + } + + public String getTerms() { + return this.terms; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other instanceof ModelPlotConfig == false) { + return false; + } + + ModelPlotConfig that = (ModelPlotConfig) other; + return this.enabled == that.enabled && Objects.equals(this.terms, that.terms); + } + + @Override + public int hashCode() { + return Objects.hash(enabled, terms); + } +} diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimitsTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimitsTests.java new file mode 100644 index 0000000000000..5003da10780d4 --- /dev/null +++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimitsTests.java @@ -0,0 +1,105 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml.job.config; + +import org.elasticsearch.common.xcontent.DeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.io.IOException; + +import static org.hamcrest.Matchers.equalTo; + +public class AnalysisLimitsTests extends AbstractXContentTestCase { + + @Override + protected AnalysisLimits createTestInstance() { + return createRandomized(); + } + + public static AnalysisLimits createRandomized() { + return new AnalysisLimits(randomBoolean() ? (long) randomIntBetween(1, 1000000) : null, + randomBoolean() ? randomNonNegativeLong() : null); + } + + @Override + protected AnalysisLimits doParseInstance(XContentParser parser) { + return AnalysisLimits.PARSER.apply(parser, null); + } + + public void testParseModelMemoryLimitGivenPositiveNumber() throws IOException { + String json = "{\"model_memory_limit\": 2048}"; + XContentParser parser = XContentFactory.xContent(XContentType.JSON) + .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json); + + AnalysisLimits limits = AnalysisLimits.PARSER.apply(parser, null); + + assertThat(limits.getModelMemoryLimit(), equalTo(2048L)); + } + + public void testParseModelMemoryLimitGivenStringMultipleOfMBs() throws IOException { + String json = "{\"model_memory_limit\":\"4g\"}"; + XContentParser parser = XContentFactory.xContent(XContentType.JSON) + .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json); + + AnalysisLimits limits = AnalysisLimits.PARSER.apply(parser, null); + + assertThat(limits.getModelMemoryLimit(), equalTo(4096L)); + } + + public void testEquals_GivenEqual() { + AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L); + AnalysisLimits analysisLimits2 = new AnalysisLimits(10L, 20L); + + assertTrue(analysisLimits1.equals(analysisLimits1)); + assertTrue(analysisLimits1.equals(analysisLimits2)); + assertTrue(analysisLimits2.equals(analysisLimits1)); + } + + public void testEquals_GivenDifferentModelMemoryLimit() { + AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L); + AnalysisLimits analysisLimits2 = new AnalysisLimits(11L, 20L); + + assertFalse(analysisLimits1.equals(analysisLimits2)); + assertFalse(analysisLimits2.equals(analysisLimits1)); + } + + public void testEquals_GivenDifferentCategorizationExamplesLimit() { + AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L); + AnalysisLimits analysisLimits2 = new AnalysisLimits(10L, 21L); + + assertFalse(analysisLimits1.equals(analysisLimits2)); + assertFalse(analysisLimits2.equals(analysisLimits1)); + } + + public void testHashCode_GivenEqual() { + AnalysisLimits analysisLimits1 = new AnalysisLimits(5555L, 3L); + AnalysisLimits analysisLimits2 = new AnalysisLimits(5555L, 3L); + + assertEquals(analysisLimits1.hashCode(), analysisLimits2.hashCode()); + } + + @Override + protected boolean supportsUnknownFields() { + return true; + } +} diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfigTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfigTests.java new file mode 100644 index 0000000000000..36fb51ed10e72 --- /dev/null +++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfigTests.java @@ -0,0 +1,87 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml.job.config; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class CategorizationAnalyzerConfigTests extends AbstractXContentTestCase { + + @Override + protected CategorizationAnalyzerConfig createTestInstance() { + return createRandomized().build(); + } + + public static CategorizationAnalyzerConfig.Builder createRandomized() { + CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder(); + if (rarely()) { + builder.setAnalyzer(randomAlphaOfLength(10)); + } else { + if (randomBoolean()) { + for (String pattern : generateRandomStringArray(3, 40, false)) { + if (rarely()) { + builder.addCharFilter(randomAlphaOfLength(10)); + } else { + Map charFilter = new HashMap<>(); + charFilter.put("type", "pattern_replace"); + charFilter.put("pattern", pattern); + builder.addCharFilter(charFilter); + } + } + } + + if (rarely()) { + builder.setTokenizer(randomAlphaOfLength(10)); + } else { + Map tokenizer = new HashMap<>(); + tokenizer.put("type", "pattern"); + tokenizer.put("pattern", randomAlphaOfLength(10)); + builder.setTokenizer(tokenizer); + } + + if (randomBoolean()) { + for (String pattern : generateRandomStringArray(4, 40, false)) { + if (rarely()) { + builder.addTokenFilter(randomAlphaOfLength(10)); + } else { + Map tokenFilter = new HashMap<>(); + tokenFilter.put("type", "pattern_replace"); + tokenFilter.put("pattern", pattern); + builder.addTokenFilter(tokenFilter); + } + } + } + } + return builder; + } + + @Override + protected CategorizationAnalyzerConfig doParseInstance(XContentParser parser) throws IOException { + return CategorizationAnalyzerConfig.buildFromXContentObject(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return false; + } +} diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescriptionTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescriptionTests.java new file mode 100644 index 0000000000000..8ca2dc494f3c5 --- /dev/null +++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescriptionTests.java @@ -0,0 +1,185 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml.job.config; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.protocol.xpack.ml.job.config.DataDescription.DataFormat; +import org.elasticsearch.test.AbstractXContentTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.core.Is.is; + +public class DataDescriptionTests extends AbstractXContentTestCase { + + public void testDefault() { + DataDescription dataDescription = new DataDescription.Builder().build(); + assertThat(dataDescription.getFormat(), equalTo(DataFormat.XCONTENT)); + assertThat(dataDescription.getTimeField(), equalTo("time")); + assertThat(dataDescription.getTimeFormat(), equalTo("epoch_ms")); + assertThat(dataDescription.getFieldDelimiter(), is(nullValue())); + assertThat(dataDescription.getQuoteCharacter(), is(nullValue())); + } + + public void testDefaultDelimited() { + DataDescription.Builder dataDescriptionBuilder = new DataDescription.Builder(); + dataDescriptionBuilder.setFormat(DataFormat.DELIMITED); + DataDescription dataDescription = dataDescriptionBuilder.build(); + + assertThat(dataDescription.getFormat(), equalTo(DataFormat.DELIMITED)); + assertThat(dataDescription.getTimeField(), equalTo("time")); + assertThat(dataDescription.getTimeFormat(), equalTo("epoch_ms")); + assertThat(dataDescription.getFieldDelimiter(), is('\t')); + assertThat(dataDescription.getQuoteCharacter(), is('"')); + } + + public void testEquals_GivenDifferentDateFormat() { + DataDescription.Builder description1 = new DataDescription.Builder(); + description1.setFormat(DataFormat.XCONTENT); + description1.setQuoteCharacter('"'); + description1.setTimeField("timestamp"); + description1.setTimeFormat("epoch"); + description1.setFieldDelimiter(','); + + DataDescription.Builder description2 = new DataDescription.Builder(); + description2.setFormat(DataFormat.DELIMITED); + description2.setQuoteCharacter('"'); + description2.setTimeField("timestamp"); + description2.setTimeFormat("epoch"); + description2.setFieldDelimiter(','); + + assertFalse(description1.build().equals(description2.build())); + assertFalse(description2.build().equals(description1.build())); + } + + public void testEquals_GivenDifferentQuoteCharacter() { + DataDescription.Builder description1 = new DataDescription.Builder(); + description1.setFormat(DataFormat.XCONTENT); + description1.setQuoteCharacter('"'); + description1.setTimeField("timestamp"); + description1.setTimeFormat("epoch"); + description1.setFieldDelimiter(','); + + DataDescription.Builder description2 = new DataDescription.Builder(); + description2.setFormat(DataFormat.XCONTENT); + description2.setQuoteCharacter('\''); + description2.setTimeField("timestamp"); + description2.setTimeFormat("epoch"); + description2.setFieldDelimiter(','); + + assertFalse(description1.build().equals(description2.build())); + assertFalse(description2.build().equals(description1.build())); + } + + public void testEquals_GivenDifferentTimeField() { + DataDescription.Builder description1 = new DataDescription.Builder(); + description1.setFormat(DataFormat.XCONTENT); + description1.setQuoteCharacter('"'); + description1.setTimeField("timestamp"); + description1.setTimeFormat("epoch"); + description1.setFieldDelimiter(','); + + DataDescription.Builder description2 = new DataDescription.Builder(); + description2.setFormat(DataFormat.XCONTENT); + description2.setQuoteCharacter('"'); + description2.setTimeField("time"); + description2.setTimeFormat("epoch"); + description2.setFieldDelimiter(','); + + assertFalse(description1.build().equals(description2.build())); + assertFalse(description2.build().equals(description1.build())); + } + + public void testEquals_GivenDifferentTimeFormat() { + DataDescription.Builder description1 = new DataDescription.Builder(); + description1.setFormat(DataFormat.XCONTENT); + description1.setQuoteCharacter('"'); + description1.setTimeField("timestamp"); + description1.setTimeFormat("epoch"); + description1.setFieldDelimiter(','); + + DataDescription.Builder description2 = new DataDescription.Builder(); + description2.setFormat(DataFormat.XCONTENT); + description2.setQuoteCharacter('"'); + description2.setTimeField("timestamp"); + description2.setTimeFormat("epoch_ms"); + description2.setFieldDelimiter(','); + + assertFalse(description1.build().equals(description2.build())); + assertFalse(description2.build().equals(description1.build())); + } + + public void testEquals_GivenDifferentFieldDelimiter() { + DataDescription.Builder description1 = new DataDescription.Builder(); + description1.setFormat(DataFormat.XCONTENT); + description1.setQuoteCharacter('"'); + description1.setTimeField("timestamp"); + description1.setTimeFormat("epoch"); + description1.setFieldDelimiter(','); + + DataDescription.Builder description2 = new DataDescription.Builder(); + description2.setFormat(DataFormat.XCONTENT); + description2.setQuoteCharacter('"'); + description2.setTimeField("timestamp"); + description2.setTimeFormat("epoch"); + description2.setFieldDelimiter(';'); + + assertFalse(description1.build().equals(description2.build())); + assertFalse(description2.build().equals(description1.build())); + } + + @Override + protected DataDescription createTestInstance() { + DataDescription.Builder dataDescription = new DataDescription.Builder(); + if (randomBoolean()) { + dataDescription.setFormat(randomFrom(DataFormat.values())); + } + if (randomBoolean()) { + dataDescription.setTimeField(randomAlphaOfLengthBetween(1, 20)); + } + if (randomBoolean()) { + String format; + if (randomBoolean()) { + format = DataDescription.EPOCH; + } else if (randomBoolean()) { + format = DataDescription.EPOCH_MS; + } else { + format = "yyyy-MM-dd HH:mm:ss.SSS"; + } + dataDescription.setTimeFormat(format); + } + if (randomBoolean()) { + dataDescription.setFieldDelimiter(randomAlphaOfLength(1).charAt(0)); + } + if (randomBoolean()) { + dataDescription.setQuoteCharacter(randomAlphaOfLength(1).charAt(0)); + } + return dataDescription.build(); + } + + @Override + protected DataDescription doParseInstance(XContentParser parser) { + return DataDescription.PARSER.apply(parser, null).build(); + } + + @Override + protected boolean supportsUnknownFields() { + return true; + } +} diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfigTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfigTests.java new file mode 100644 index 0000000000000..23f13c732123a --- /dev/null +++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfigTests.java @@ -0,0 +1,40 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml.job.config; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +public class ModelPlotConfigTests extends AbstractXContentTestCase { + + @Override + protected ModelPlotConfig createTestInstance() { + return new ModelPlotConfig(randomBoolean(), randomAlphaOfLengthBetween(1, 30)); + } + + @Override + protected ModelPlotConfig doParseInstance(XContentParser parser) { + return ModelPlotConfig.PARSER.apply(parser, null); + } + + @Override + protected boolean supportsUnknownFields() { + return true; + } +}