From 76b7510d3d59c6e29bc25e15d0beffb2e0a580ff Mon Sep 17 00:00:00 2001
From: David Roberts <dave.roberts@elastic.co>
Date: Tue, 31 Jul 2018 15:25:12 +0100
Subject: [PATCH 1/2] [ML] Add some ML config classes to protocol library

This commit adds four ML config classes to the X-Pack protocol
library used by the high level REST client.

(Other commits will add the remaining config classes, plus results
and stats classes.)

These classes:

- Are immutable
- Have little/no validation of field values beyond null checks
- Are convertible to and from X-Content, but NOT wire transportable
- Have lenient parsers to maximize compatibility across versions
- Have the same class names, member names and getter/setter names
  as the corresponding classes in X-Pack core to ease migration
  for transport client users
- Don't reproduce all the methods that do calculations or
  transformations that the the corresponding classes in X-Pack core
  have
---
 .../xpack/ml/job/config/AnalysisLimits.java   | 139 +++++++
 .../config/CategorizationAnalyzerConfig.java  | 341 ++++++++++++++++++
 .../xpack/ml/job/config/DataDescription.java  | 286 +++++++++++++++
 .../xpack/ml/job/config/ModelPlotConfig.java  |  92 +++++
 .../ml/job/config/AnalysisLimitsTests.java    | 105 ++++++
 .../CategorizationAnalyzerConfigTests.java    |  87 +++++
 .../ml/job/config/DataDescriptionTests.java   | 185 ++++++++++
 .../ml/job/config/ModelPlotConfigTests.java   |  48 +++
 8 files changed, 1283 insertions(+)
 create mode 100644 x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimits.java
 create mode 100644 x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfig.java
 create mode 100644 x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescription.java
 create mode 100644 x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfig.java
 create mode 100644 x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimitsTests.java
 create mode 100644 x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfigTests.java
 create mode 100644 x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescriptionTests.java
 create mode 100644 x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfigTests.java

diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimits.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimits.java
new file mode 100644
index 0000000000000..f69b9ccbf9ff4
--- /dev/null
+++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimits.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.protocol.xpack.ml.job.config;
+
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.Objects;
+
+/**
+ * Analysis limits for autodetect. In particular,
+ * this is a collection of parameters that allow limiting
+ * the resources used by the job.
+ */
+public class AnalysisLimits implements ToXContentObject {
+
+    /**
+     * Serialisation field names
+     */
+    public static final ParseField MODEL_MEMORY_LIMIT = new ParseField("model_memory_limit");
+    public static final ParseField CATEGORIZATION_EXAMPLES_LIMIT = new ParseField("categorization_examples_limit");
+
+    public static final ConstructingObjectParser<AnalysisLimits, Void> PARSER =
+        new ConstructingObjectParser<>("analysis_limits", true, a -> new AnalysisLimits((Long) a[0], (Long) a[1]));
+
+    static {
+        PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
+            if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
+                return ByteSizeValue.parseBytesSizeValue(p.text(), MODEL_MEMORY_LIMIT.getPreferredName()).getMb();
+            } else if (p.currentToken() == XContentParser.Token.VALUE_NUMBER) {
+                return p.longValue();
+            }
+            throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
+        }, MODEL_MEMORY_LIMIT, ObjectParser.ValueType.VALUE);
+        PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), CATEGORIZATION_EXAMPLES_LIMIT);
+    }
+
+    /**
+     * The model memory limit in MiBs.
+     * It is initialised to <code>null</code>, which implies that the server-side default will be used.
+     */
+    private final Long modelMemoryLimit;
+
+    /**
+     * It is initialised to <code>null</code>.
+     * A value of <code>null</code> will result in the server-side default being used.
+     */
+    private final Long categorizationExamplesLimit;
+
+    public AnalysisLimits(Long categorizationExamplesLimit) {
+        this(null, categorizationExamplesLimit);
+    }
+
+    public AnalysisLimits(Long modelMemoryLimit, Long categorizationExamplesLimit) {
+        this.modelMemoryLimit = modelMemoryLimit;
+        this.categorizationExamplesLimit = categorizationExamplesLimit;
+    }
+
+    /**
+     * Maximum size of the model in MB before the anomaly detector
+     * will drop new samples to prevent the model using any more
+     * memory.
+     *
+     * @return The set memory limit or <code>null</code> if not set
+     */
+    @Nullable
+    public Long getModelMemoryLimit() {
+        return modelMemoryLimit;
+    }
+
+    /**
+     * Gets the limit to the number of examples that are stored per category
+     *
+     * @return the limit or <code>null</code> if not set
+     */
+    @Nullable
+    public Long getCategorizationExamplesLimit() {
+        return categorizationExamplesLimit;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        if (modelMemoryLimit != null) {
+            builder.field(MODEL_MEMORY_LIMIT.getPreferredName(), modelMemoryLimit + "mb");
+        }
+        if (categorizationExamplesLimit != null) {
+            builder.field(CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(), categorizationExamplesLimit);
+        }
+        builder.endObject();
+        return builder;
+    }
+
+    /**
+     * Overridden equality test
+     */
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) {
+            return true;
+        }
+
+        if (other instanceof AnalysisLimits == false) {
+            return false;
+        }
+
+        AnalysisLimits that = (AnalysisLimits) other;
+        return Objects.equals(this.modelMemoryLimit, that.modelMemoryLimit) &&
+                Objects.equals(this.categorizationExamplesLimit, that.categorizationExamplesLimit);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(modelMemoryLimit, categorizationExamplesLimit);
+    }
+}
diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfig.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfig.java
new file mode 100644
index 0000000000000..9c6bc58ba6a55
--- /dev/null
+++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfig.java
@@ -0,0 +1,341 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.protocol.xpack.ml.job.config;
+
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.ToXContentFragment;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentType;
+import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Configuration for the categorization analyzer.
+ *
+ * The syntax is a subset of what can be supplied to the {@linkplain RestAnalyzeAction <code>_analyze</code> endpoint}.
+ * To summarize, the first option is to specify the name of an out-of-the-box analyzer:
+ * <code>
+ *     "categorization_analyzer" : "standard"
+ * </code>
+ *
+ * The second option is to specify a custom analyzer by combining the <code>char_filters</code>, <code>tokenizer</code>
+ * and <code>token_filters</code> fields.  In turn, each of these can be specified as the name of an out-of-the-box
+ * one or as an object defining a custom one.  For example:
+ * <code>
+ *     "char_filters" : [
+ *         "html_strip",
+ *         { "type" : "pattern_replace", "pattern": "SQL: .*" }
+ *     ],
+ *     "tokenizer" : "thai",
+ *     "token_filters" : [
+ *         "lowercase",
+ *         { "type" : "pattern_replace", "pattern": "^[0-9].*" }
+ *     ]
+ * </code>
+ */
+public class CategorizationAnalyzerConfig implements ToXContentFragment {
+
+    public static final ParseField CATEGORIZATION_ANALYZER = new ParseField("categorization_analyzer");
+    private static final ParseField TOKENIZER = RestAnalyzeAction.Fields.TOKENIZER;
+    private static final ParseField TOKEN_FILTERS = RestAnalyzeAction.Fields.TOKEN_FILTERS;
+    private static final ParseField CHAR_FILTERS = RestAnalyzeAction.Fields.CHAR_FILTERS;
+
+    /**
+     * This method is only used in the unit tests - in production code this config is always parsed as a fragment.
+     */
+    static CategorizationAnalyzerConfig buildFromXContentObject(XContentParser parser) throws IOException {
+
+        if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
+            throw new IllegalArgumentException("Expected start object but got [" + parser.currentToken() + "]");
+        }
+        if (parser.nextToken() != XContentParser.Token.FIELD_NAME) {
+            throw new IllegalArgumentException("Expected field name but got [" + parser.currentToken() + "]");
+        }
+        parser.nextToken();
+        CategorizationAnalyzerConfig categorizationAnalyzerConfig = buildFromXContentFragment(parser);
+        parser.nextToken();
+        return categorizationAnalyzerConfig;
+    }
+
+    /**
+     * Parse a <code>categorization_analyzer</code> from configuration or cluster state.  A custom parser is needed
+     * due to the complexity of the format, with many elements able to be specified as either the name of a built-in
+     * element or an object containing a custom definition.
+     *
+     * The parser is strict when parsing config and lenient when parsing cluster state.
+     */
+    static CategorizationAnalyzerConfig buildFromXContentFragment(XContentParser parser) throws IOException {
+
+        CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder();
+
+        XContentParser.Token token = parser.currentToken();
+        if (token == XContentParser.Token.VALUE_STRING) {
+            builder.setAnalyzer(parser.text());
+        } else if (token != XContentParser.Token.START_OBJECT) {
+            throw new IllegalArgumentException("[" + CATEGORIZATION_ANALYZER + "] should be analyzer's name or settings [" + token + "]");
+        } else {
+            String currentFieldName = null;
+            while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+                if (token == XContentParser.Token.FIELD_NAME) {
+                    currentFieldName = parser.currentName();
+                } else if (CHAR_FILTERS.match(currentFieldName, parser.getDeprecationHandler())
+                        && token == XContentParser.Token.START_ARRAY) {
+                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
+                        if (token == XContentParser.Token.VALUE_STRING) {
+                            builder.addCharFilter(parser.text());
+                        } else if (token == XContentParser.Token.START_OBJECT) {
+                            builder.addCharFilter(parser.map());
+                        } else {
+                            throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER +
+                                    "] array element should contain char_filter's name or settings [" + token + "]");
+                        }
+                    }
+                } else if (TOKENIZER.match(currentFieldName, parser.getDeprecationHandler())) {
+                    if (token == XContentParser.Token.VALUE_STRING) {
+                        builder.setTokenizer(parser.text());
+                    } else if (token == XContentParser.Token.START_OBJECT) {
+                        builder.setTokenizer(parser.map());
+                    } else {
+                        throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER +
+                                "] should be tokenizer's name or settings [" + token + "]");
+                    }
+                } else if (TOKEN_FILTERS.match(currentFieldName, parser.getDeprecationHandler())
+                        && token == XContentParser.Token.START_ARRAY) {
+                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
+                        if (token == XContentParser.Token.VALUE_STRING) {
+                            builder.addTokenFilter(parser.text());
+                        } else if (token == XContentParser.Token.START_OBJECT) {
+                            builder.addTokenFilter(parser.map());
+                        } else {
+                            throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER +
+                                    "] array element should contain token_filter's name or settings [" + token + "]");
+                        }
+                    }
+                }
+            }
+        }
+
+        return builder.build();
+    }
+
+    /**
+     * Simple store of either a name of a built-in analyzer element or a custom definition.
+     */
+    public static class NameOrDefinition implements ToXContentFragment {
+
+        // Exactly one of these two members is not null
+        public final String name;
+        public final Settings definition;
+
+        NameOrDefinition(String name) {
+            this.name = Objects.requireNonNull(name);
+            this.definition = null;
+        }
+
+        NameOrDefinition(ParseField field, Map<String, Object> definition) {
+            this.name = null;
+            Objects.requireNonNull(definition);
+            try {
+                XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON);
+                builder.map(definition);
+                this.definition = Settings.builder().loadFromSource(Strings.toString(builder), builder.contentType()).build();
+            } catch (IOException e) {
+                throw new IllegalArgumentException("Failed to parse [" + definition + "] in [" + field.getPreferredName() + "]", e);
+            }
+        }
+
+        @Override
+        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+            if (definition == null) {
+                builder.value(name);
+            } else {
+                builder.startObject();
+                definition.toXContent(builder, params);
+                builder.endObject();
+            }
+            return builder;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+            NameOrDefinition that = (NameOrDefinition) o;
+            return Objects.equals(name, that.name) &&
+                    Objects.equals(definition, that.definition);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(name, definition);
+        }
+
+        @Override
+        public String toString() {
+            if (definition == null) {
+                return name;
+            } else {
+                return definition.toDelimitedString(';');
+            }
+        }
+    }
+
+    private final String analyzer;
+    private final List<NameOrDefinition> charFilters;
+    private final NameOrDefinition tokenizer;
+    private final List<NameOrDefinition> tokenFilters;
+
+    private CategorizationAnalyzerConfig(String analyzer, List<NameOrDefinition> charFilters, NameOrDefinition tokenizer,
+                                         List<NameOrDefinition> tokenFilters) {
+        this.analyzer = analyzer;
+        this.charFilters = Collections.unmodifiableList(charFilters);
+        this.tokenizer = tokenizer;
+        this.tokenFilters = Collections.unmodifiableList(tokenFilters);
+    }
+
+    public String getAnalyzer() {
+        return analyzer;
+    }
+
+    public List<NameOrDefinition> getCharFilters() {
+        return charFilters;
+    }
+
+    public NameOrDefinition getTokenizer() {
+        return tokenizer;
+    }
+
+    public List<NameOrDefinition> getTokenFilters() {
+        return tokenFilters;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        if (analyzer != null) {
+            builder.field(CATEGORIZATION_ANALYZER.getPreferredName(), analyzer);
+        } else {
+            builder.startObject(CATEGORIZATION_ANALYZER.getPreferredName());
+            if (charFilters.isEmpty() == false) {
+                builder.startArray(CHAR_FILTERS.getPreferredName());
+                for (NameOrDefinition charFilter : charFilters) {
+                    charFilter.toXContent(builder, params);
+                }
+                builder.endArray();
+            }
+            if (tokenizer != null) {
+                builder.field(TOKENIZER.getPreferredName(), tokenizer);
+            }
+            if (tokenFilters.isEmpty() == false) {
+                builder.startArray(TOKEN_FILTERS.getPreferredName());
+                for (NameOrDefinition tokenFilter : tokenFilters) {
+                    tokenFilter.toXContent(builder, params);
+                }
+                builder.endArray();
+            }
+            builder.endObject();
+        }
+        return builder;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        CategorizationAnalyzerConfig that = (CategorizationAnalyzerConfig) o;
+        return Objects.equals(analyzer, that.analyzer) &&
+                Objects.equals(charFilters, that.charFilters) &&
+                Objects.equals(tokenizer, that.tokenizer) &&
+                Objects.equals(tokenFilters, that.tokenFilters);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(analyzer, charFilters, tokenizer, tokenFilters);
+    }
+
+    public static class Builder {
+
+        private String analyzer;
+        private List<NameOrDefinition> charFilters = new ArrayList<>();
+        private NameOrDefinition tokenizer;
+        private List<NameOrDefinition> tokenFilters = new ArrayList<>();
+
+        public Builder() {
+        }
+
+        public Builder(CategorizationAnalyzerConfig categorizationAnalyzerConfig) {
+            this.analyzer = categorizationAnalyzerConfig.analyzer;
+            this.charFilters = new ArrayList<>(categorizationAnalyzerConfig.charFilters);
+            this.tokenizer = categorizationAnalyzerConfig.tokenizer;
+            this.tokenFilters = new ArrayList<>(categorizationAnalyzerConfig.tokenFilters);
+        }
+
+        public Builder setAnalyzer(String analyzer) {
+            this.analyzer = analyzer;
+            return this;
+        }
+
+        public Builder addCharFilter(String charFilter) {
+            this.charFilters.add(new NameOrDefinition(charFilter));
+            return this;
+        }
+
+        public Builder addCharFilter(Map<String, Object> charFilter) {
+            this.charFilters.add(new NameOrDefinition(CHAR_FILTERS, charFilter));
+            return this;
+        }
+
+        public Builder setTokenizer(String tokenizer) {
+            this.tokenizer = new NameOrDefinition(tokenizer);
+            return this;
+        }
+
+        public Builder setTokenizer(Map<String, Object> tokenizer) {
+            this.tokenizer = new NameOrDefinition(TOKENIZER, tokenizer);
+            return this;
+        }
+
+        public Builder addTokenFilter(String tokenFilter) {
+            this.tokenFilters.add(new NameOrDefinition(tokenFilter));
+            return this;
+        }
+
+        public Builder addTokenFilter(Map<String, Object> tokenFilter) {
+            this.tokenFilters.add(new NameOrDefinition(TOKEN_FILTERS, tokenFilter));
+            return this;
+        }
+
+        /**
+         * Create a config
+         */
+        public CategorizationAnalyzerConfig build() {
+            return new CategorizationAnalyzerConfig(analyzer, charFilters, tokenizer, tokenFilters);
+        }
+    }
+}
diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescription.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescription.java
new file mode 100644
index 0000000000000..15ce999d65b09
--- /dev/null
+++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescription.java
@@ -0,0 +1,286 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.protocol.xpack.ml.job.config;
+
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.ObjectParser.ValueType;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.Locale;
+import java.util.Objects;
+
+/**
+ * Describes the format of the data used in the job and how it should
+ * be interpreted by autodetect.
+ * <p>
+ * Data must either be in a textual delineated format (e.g. csv, tsv) or JSON
+ * the {@linkplain DataFormat} enum indicates which. {@link #getTimeField()}
+ * is the name of the field containing the timestamp and {@link #getTimeFormat()}
+ * is the format code for the date string in as described by
+ * {@link java.time.format.DateTimeFormatter}. The default quote character for
+ * delineated formats is {@value #DEFAULT_QUOTE_CHAR} but any other character can be
+ * used.
+ */
+public class DataDescription implements ToXContentObject {
+    /**
+     * Enum of the acceptable data formats.
+     */
+    public enum DataFormat {
+        XCONTENT,
+
+        /**
+         * This is deprecated
+         */
+        DELIMITED;
+
+        /**
+         * Case-insensitive from string method.
+         * Works with either XCONTENT, XContent, etc.
+         *
+         * @param value String representation
+         * @return The data format
+         */
+        public static DataFormat forString(String value) {
+            return DataFormat.valueOf(value.toUpperCase(Locale.ROOT));
+        }
+
+        @Override
+        public String toString() {
+            return name().toLowerCase(Locale.ROOT);
+        }
+    }
+
+    private static final ParseField DATA_DESCRIPTION_FIELD = new ParseField("data_description");
+    private static final ParseField FORMAT_FIELD = new ParseField("format");
+    private static final ParseField TIME_FIELD_NAME_FIELD = new ParseField("time_field");
+    private static final ParseField TIME_FORMAT_FIELD = new ParseField("time_format");
+    private static final ParseField FIELD_DELIMITER_FIELD = new ParseField("field_delimiter");
+    private static final ParseField QUOTE_CHARACTER_FIELD = new ParseField("quote_character");
+
+    /**
+     * Special time format string for epoch times (seconds)
+     */
+    public static final String EPOCH = "epoch";
+
+    /**
+     * Special time format string for epoch times (milli-seconds)
+     */
+    public static final String EPOCH_MS = "epoch_ms";
+
+    /**
+     * By default autodetect expects the timestamp in a field with this name
+     */
+    public static final String DEFAULT_TIME_FIELD = "time";
+
+    /**
+     * The default field delimiter expected by the native autodetect
+     * program.
+     */
+    public static final char DEFAULT_DELIMITER = '\t';
+
+    /**
+     * The default quote character used to escape text in
+     * delineated data formats
+     */
+    public static final char DEFAULT_QUOTE_CHAR = '"';
+
+    private final DataFormat dataFormat;
+    private final String timeFieldName;
+    private final String timeFormat;
+    private final Character fieldDelimiter;
+    private final Character quoteCharacter;
+
+    public static final ObjectParser<Builder, Void> PARSER =
+        new ObjectParser<>(DATA_DESCRIPTION_FIELD.getPreferredName(), true, Builder::new);
+
+    static {
+        PARSER.declareString(Builder::setFormat, FORMAT_FIELD);
+        PARSER.declareString(Builder::setTimeField, TIME_FIELD_NAME_FIELD);
+        PARSER.declareString(Builder::setTimeFormat, TIME_FORMAT_FIELD);
+        PARSER.declareField(Builder::setFieldDelimiter, DataDescription::extractChar, FIELD_DELIMITER_FIELD, ValueType.STRING);
+        PARSER.declareField(Builder::setQuoteCharacter, DataDescription::extractChar, QUOTE_CHARACTER_FIELD, ValueType.STRING);
+    }
+
+    public DataDescription(DataFormat dataFormat, String timeFieldName, String timeFormat, Character fieldDelimiter,
+                           Character quoteCharacter) {
+        this.dataFormat = dataFormat;
+        this.timeFieldName = timeFieldName;
+        this.timeFormat = timeFormat;
+        this.fieldDelimiter = fieldDelimiter;
+        this.quoteCharacter = quoteCharacter;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        if (dataFormat != DataFormat.XCONTENT) {
+            builder.field(FORMAT_FIELD.getPreferredName(), dataFormat);
+        }
+        builder.field(TIME_FIELD_NAME_FIELD.getPreferredName(), timeFieldName);
+        builder.field(TIME_FORMAT_FIELD.getPreferredName(), timeFormat);
+        if (fieldDelimiter != null) {
+            builder.field(FIELD_DELIMITER_FIELD.getPreferredName(), String.valueOf(fieldDelimiter));
+        }
+        if (quoteCharacter != null) {
+            builder.field(QUOTE_CHARACTER_FIELD.getPreferredName(), String.valueOf(quoteCharacter));
+        }
+        builder.endObject();
+        return builder;
+    }
+
+    /**
+     * The format of the data to be processed.
+     * Defaults to {@link DataDescription.DataFormat#XCONTENT}
+     *
+     * @return The data format
+     */
+    public DataFormat getFormat() {
+        return dataFormat;
+    }
+
+    /**
+     * The name of the field containing the timestamp
+     *
+     * @return A String if set or <code>null</code>
+     */
+    public String getTimeField() {
+        return timeFieldName;
+    }
+
+    /**
+     * Either {@value #EPOCH}, {@value #EPOCH_MS} or a SimpleDateTime format string.
+     * If not set (is <code>null</code> or an empty string) or set to
+     * {@value #EPOCH_MS} (the default) then the date is assumed to be in
+     * milliseconds from the epoch.
+     *
+     * @return A String if set or <code>null</code>
+     */
+    public String getTimeFormat() {
+        return timeFormat;
+    }
+
+    /**
+     * If the data is in a delineated format with a header e.g. csv or tsv
+     * this is the delimiter character used. This is only applicable if
+     * {@linkplain #getFormat()} is {@link DataDescription.DataFormat#DELIMITED}.
+     * The default value for delimited format is {@value #DEFAULT_DELIMITER}.
+     *
+     * @return A char
+     */
+    public Character getFieldDelimiter() {
+        return fieldDelimiter;
+    }
+
+    /**
+     * The quote character used in delineated formats.
+     * The default value for delimited format is {@value #DEFAULT_QUOTE_CHAR}.
+     *
+     * @return The delineated format quote character
+     */
+    public Character getQuoteCharacter() {
+        return quoteCharacter;
+    }
+
+    private static Character extractChar(XContentParser parser) throws IOException {
+        if (parser.currentToken() == XContentParser.Token.VALUE_STRING) {
+            String charStr = parser.text();
+            if (charStr.length() != 1) {
+                throw new IllegalArgumentException("String must be a single character, found [" + charStr + "]");
+            }
+            return charStr.charAt(0);
+        }
+        throw new IllegalArgumentException("Unsupported token [" + parser.currentToken() + "]");
+    }
+
+    /**
+     * Overridden equality test
+     */
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) {
+            return true;
+        }
+
+        if (other instanceof DataDescription == false) {
+            return false;
+        }
+
+        DataDescription that = (DataDescription) other;
+
+        return this.dataFormat == that.dataFormat &&
+                Objects.equals(this.quoteCharacter, that.quoteCharacter) &&
+                Objects.equals(this.timeFieldName, that.timeFieldName) &&
+                Objects.equals(this.timeFormat, that.timeFormat) &&
+                Objects.equals(this.fieldDelimiter, that.fieldDelimiter);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(dataFormat, quoteCharacter, timeFieldName, timeFormat, fieldDelimiter);
+    }
+
+    public static class Builder {
+
+        private DataFormat dataFormat = DataFormat.XCONTENT;
+        private String timeFieldName = DEFAULT_TIME_FIELD;
+        private String timeFormat = EPOCH_MS;
+        private Character fieldDelimiter;
+        private Character quoteCharacter;
+
+        public void setFormat(DataFormat format) {
+            dataFormat = Objects.requireNonNull(format);
+        }
+
+        private void setFormat(String format) {
+            setFormat(DataFormat.forString(format));
+        }
+
+        public void setTimeField(String fieldName) {
+            timeFieldName = Objects.requireNonNull(fieldName);
+        }
+
+        public void setTimeFormat(String format) {
+            timeFormat = Objects.requireNonNull(format);
+        }
+
+        public void setFieldDelimiter(Character delimiter) {
+            fieldDelimiter = delimiter;
+        }
+
+        public void setQuoteCharacter(Character value) {
+            quoteCharacter = value;
+        }
+
+        public DataDescription build() {
+            if (dataFormat == DataFormat.DELIMITED) {
+                if (fieldDelimiter == null) {
+                    fieldDelimiter = DEFAULT_DELIMITER;
+                }
+                if (quoteCharacter == null) {
+                    quoteCharacter = DEFAULT_QUOTE_CHAR;
+                }
+            }
+            return new DataDescription(dataFormat, timeFieldName, timeFormat, fieldDelimiter, quoteCharacter);
+        }
+    }
+}
diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfig.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfig.java
new file mode 100644
index 0000000000000..a6ec3d8b77ee3
--- /dev/null
+++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfig.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.protocol.xpack.ml.job.config;
+
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.Objects;
+
+public class ModelPlotConfig implements ToXContentObject {
+
+    private static final ParseField TYPE_FIELD = new ParseField("model_plot_config");
+    private static final ParseField ENABLED_FIELD = new ParseField("enabled");
+    public static final ParseField TERMS_FIELD = new ParseField("terms");
+
+    public static final ConstructingObjectParser<ModelPlotConfig, Void> PARSER =
+        new ConstructingObjectParser<>(TYPE_FIELD.getPreferredName(), true, a -> new ModelPlotConfig((boolean) a[0], (String) a[1]));
+
+    static {
+        PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), ENABLED_FIELD);
+        PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TERMS_FIELD);
+    }
+
+    private final boolean enabled;
+    private final String terms;
+
+    public ModelPlotConfig() {
+        this(true, null);
+    }
+
+    public ModelPlotConfig(boolean enabled, String terms) {
+        this.enabled = enabled;
+        this.terms = terms;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        builder.field(ENABLED_FIELD.getPreferredName(), enabled);
+        if (terms != null) {
+            builder.field(TERMS_FIELD.getPreferredName(), terms);
+        }
+        builder.endObject();
+        return builder;
+    }
+
+    public boolean isEnabled() {
+        return enabled;
+    }
+
+    public String getTerms() {
+        return this.terms;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) {
+            return true;
+        }
+
+        if (other instanceof ModelPlotConfig == false) {
+            return false;
+        }
+
+        ModelPlotConfig that = (ModelPlotConfig) other;
+        return this.enabled == that.enabled && Objects.equals(this.terms, that.terms);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(enabled, terms);
+    }
+}
diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimitsTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimitsTests.java
new file mode 100644
index 0000000000000..5003da10780d4
--- /dev/null
+++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/AnalysisLimitsTests.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.protocol.xpack.ml.job.config;
+
+import org.elasticsearch.common.xcontent.DeprecationHandler;
+import org.elasticsearch.common.xcontent.NamedXContentRegistry;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentType;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import java.io.IOException;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class AnalysisLimitsTests extends AbstractXContentTestCase<AnalysisLimits> {
+
+    @Override
+    protected AnalysisLimits createTestInstance() {
+        return createRandomized();
+    }
+
+    public static AnalysisLimits createRandomized() {
+        return new AnalysisLimits(randomBoolean() ? (long) randomIntBetween(1, 1000000) : null,
+                randomBoolean() ? randomNonNegativeLong() : null);
+    }
+
+    @Override
+    protected AnalysisLimits doParseInstance(XContentParser parser) {
+        return AnalysisLimits.PARSER.apply(parser, null);
+    }
+
+    public void testParseModelMemoryLimitGivenPositiveNumber() throws IOException {
+        String json = "{\"model_memory_limit\": 2048}";
+        XContentParser parser = XContentFactory.xContent(XContentType.JSON)
+                .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json);
+
+        AnalysisLimits limits = AnalysisLimits.PARSER.apply(parser, null);
+
+        assertThat(limits.getModelMemoryLimit(), equalTo(2048L));
+    }
+
+    public void testParseModelMemoryLimitGivenStringMultipleOfMBs() throws IOException {
+        String json = "{\"model_memory_limit\":\"4g\"}";
+        XContentParser parser = XContentFactory.xContent(XContentType.JSON)
+                .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json);
+
+        AnalysisLimits limits = AnalysisLimits.PARSER.apply(parser, null);
+
+        assertThat(limits.getModelMemoryLimit(), equalTo(4096L));
+    }
+
+    public void testEquals_GivenEqual() {
+        AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L);
+        AnalysisLimits analysisLimits2 = new AnalysisLimits(10L, 20L);
+
+        assertTrue(analysisLimits1.equals(analysisLimits1));
+        assertTrue(analysisLimits1.equals(analysisLimits2));
+        assertTrue(analysisLimits2.equals(analysisLimits1));
+    }
+
+    public void testEquals_GivenDifferentModelMemoryLimit() {
+        AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L);
+        AnalysisLimits analysisLimits2 = new AnalysisLimits(11L, 20L);
+
+        assertFalse(analysisLimits1.equals(analysisLimits2));
+        assertFalse(analysisLimits2.equals(analysisLimits1));
+    }
+
+    public void testEquals_GivenDifferentCategorizationExamplesLimit() {
+        AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L);
+        AnalysisLimits analysisLimits2 = new AnalysisLimits(10L, 21L);
+
+        assertFalse(analysisLimits1.equals(analysisLimits2));
+        assertFalse(analysisLimits2.equals(analysisLimits1));
+    }
+
+    public void testHashCode_GivenEqual() {
+        AnalysisLimits analysisLimits1 = new AnalysisLimits(5555L, 3L);
+        AnalysisLimits analysisLimits2 = new AnalysisLimits(5555L, 3L);
+
+        assertEquals(analysisLimits1.hashCode(), analysisLimits2.hashCode());
+    }
+
+    @Override
+    protected boolean supportsUnknownFields() {
+        return true;
+    }
+}
diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfigTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfigTests.java
new file mode 100644
index 0000000000000..36fb51ed10e72
--- /dev/null
+++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfigTests.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.protocol.xpack.ml.job.config;
+
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+public class CategorizationAnalyzerConfigTests extends AbstractXContentTestCase<CategorizationAnalyzerConfig> {
+
+    @Override
+    protected CategorizationAnalyzerConfig createTestInstance() {
+        return createRandomized().build();
+    }
+
+    public static CategorizationAnalyzerConfig.Builder createRandomized() {
+        CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder();
+        if (rarely()) {
+            builder.setAnalyzer(randomAlphaOfLength(10));
+        } else {
+            if (randomBoolean()) {
+                for (String pattern : generateRandomStringArray(3, 40, false)) {
+                    if (rarely()) {
+                        builder.addCharFilter(randomAlphaOfLength(10));
+                    } else {
+                        Map<String, Object> charFilter = new HashMap<>();
+                        charFilter.put("type", "pattern_replace");
+                        charFilter.put("pattern", pattern);
+                        builder.addCharFilter(charFilter);
+                    }
+                }
+            }
+
+            if (rarely()) {
+                builder.setTokenizer(randomAlphaOfLength(10));
+            } else {
+                Map<String, Object> tokenizer = new HashMap<>();
+                tokenizer.put("type", "pattern");
+                tokenizer.put("pattern", randomAlphaOfLength(10));
+                builder.setTokenizer(tokenizer);
+            }
+
+            if (randomBoolean()) {
+                for (String pattern : generateRandomStringArray(4, 40, false)) {
+                    if (rarely()) {
+                        builder.addTokenFilter(randomAlphaOfLength(10));
+                    } else {
+                        Map<String, Object> tokenFilter = new HashMap<>();
+                        tokenFilter.put("type", "pattern_replace");
+                        tokenFilter.put("pattern", pattern);
+                        builder.addTokenFilter(tokenFilter);
+                    }
+                }
+            }
+        }
+        return builder;
+    }
+
+    @Override
+    protected CategorizationAnalyzerConfig doParseInstance(XContentParser parser) throws IOException {
+        return CategorizationAnalyzerConfig.buildFromXContentObject(parser);
+    }
+
+    @Override
+    protected boolean supportsUnknownFields() {
+        return false;
+    }
+}
diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescriptionTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescriptionTests.java
new file mode 100644
index 0000000000000..8ca2dc494f3c5
--- /dev/null
+++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescriptionTests.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.protocol.xpack.ml.job.config;
+
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.protocol.xpack.ml.job.config.DataDescription.DataFormat;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.nullValue;
+import static org.hamcrest.core.Is.is;
+
+public class DataDescriptionTests extends AbstractXContentTestCase<DataDescription> {
+
+    public void testDefault() {
+        DataDescription dataDescription = new DataDescription.Builder().build();
+        assertThat(dataDescription.getFormat(), equalTo(DataFormat.XCONTENT));
+        assertThat(dataDescription.getTimeField(), equalTo("time"));
+        assertThat(dataDescription.getTimeFormat(), equalTo("epoch_ms"));
+        assertThat(dataDescription.getFieldDelimiter(), is(nullValue()));
+        assertThat(dataDescription.getQuoteCharacter(), is(nullValue()));
+    }
+
+    public void testDefaultDelimited() {
+        DataDescription.Builder dataDescriptionBuilder = new DataDescription.Builder();
+        dataDescriptionBuilder.setFormat(DataFormat.DELIMITED);
+        DataDescription dataDescription = dataDescriptionBuilder.build();
+
+        assertThat(dataDescription.getFormat(), equalTo(DataFormat.DELIMITED));
+        assertThat(dataDescription.getTimeField(), equalTo("time"));
+        assertThat(dataDescription.getTimeFormat(), equalTo("epoch_ms"));
+        assertThat(dataDescription.getFieldDelimiter(), is('\t'));
+        assertThat(dataDescription.getQuoteCharacter(), is('"'));
+    }
+
+    public void testEquals_GivenDifferentDateFormat() {
+        DataDescription.Builder description1 = new DataDescription.Builder();
+        description1.setFormat(DataFormat.XCONTENT);
+        description1.setQuoteCharacter('"');
+        description1.setTimeField("timestamp");
+        description1.setTimeFormat("epoch");
+        description1.setFieldDelimiter(',');
+
+        DataDescription.Builder description2 = new DataDescription.Builder();
+        description2.setFormat(DataFormat.DELIMITED);
+        description2.setQuoteCharacter('"');
+        description2.setTimeField("timestamp");
+        description2.setTimeFormat("epoch");
+        description2.setFieldDelimiter(',');
+
+        assertFalse(description1.build().equals(description2.build()));
+        assertFalse(description2.build().equals(description1.build()));
+    }
+
+    public void testEquals_GivenDifferentQuoteCharacter() {
+        DataDescription.Builder description1 = new DataDescription.Builder();
+        description1.setFormat(DataFormat.XCONTENT);
+        description1.setQuoteCharacter('"');
+        description1.setTimeField("timestamp");
+        description1.setTimeFormat("epoch");
+        description1.setFieldDelimiter(',');
+
+        DataDescription.Builder description2 = new DataDescription.Builder();
+        description2.setFormat(DataFormat.XCONTENT);
+        description2.setQuoteCharacter('\'');
+        description2.setTimeField("timestamp");
+        description2.setTimeFormat("epoch");
+        description2.setFieldDelimiter(',');
+
+        assertFalse(description1.build().equals(description2.build()));
+        assertFalse(description2.build().equals(description1.build()));
+    }
+
+    public void testEquals_GivenDifferentTimeField() {
+        DataDescription.Builder description1 = new DataDescription.Builder();
+        description1.setFormat(DataFormat.XCONTENT);
+        description1.setQuoteCharacter('"');
+        description1.setTimeField("timestamp");
+        description1.setTimeFormat("epoch");
+        description1.setFieldDelimiter(',');
+
+        DataDescription.Builder description2 = new DataDescription.Builder();
+        description2.setFormat(DataFormat.XCONTENT);
+        description2.setQuoteCharacter('"');
+        description2.setTimeField("time");
+        description2.setTimeFormat("epoch");
+        description2.setFieldDelimiter(',');
+
+        assertFalse(description1.build().equals(description2.build()));
+        assertFalse(description2.build().equals(description1.build()));
+    }
+
+    public void testEquals_GivenDifferentTimeFormat() {
+        DataDescription.Builder description1 = new DataDescription.Builder();
+        description1.setFormat(DataFormat.XCONTENT);
+        description1.setQuoteCharacter('"');
+        description1.setTimeField("timestamp");
+        description1.setTimeFormat("epoch");
+        description1.setFieldDelimiter(',');
+
+        DataDescription.Builder description2 = new DataDescription.Builder();
+        description2.setFormat(DataFormat.XCONTENT);
+        description2.setQuoteCharacter('"');
+        description2.setTimeField("timestamp");
+        description2.setTimeFormat("epoch_ms");
+        description2.setFieldDelimiter(',');
+
+        assertFalse(description1.build().equals(description2.build()));
+        assertFalse(description2.build().equals(description1.build()));
+    }
+
+    public void testEquals_GivenDifferentFieldDelimiter() {
+        DataDescription.Builder description1 = new DataDescription.Builder();
+        description1.setFormat(DataFormat.XCONTENT);
+        description1.setQuoteCharacter('"');
+        description1.setTimeField("timestamp");
+        description1.setTimeFormat("epoch");
+        description1.setFieldDelimiter(',');
+
+        DataDescription.Builder description2 = new DataDescription.Builder();
+        description2.setFormat(DataFormat.XCONTENT);
+        description2.setQuoteCharacter('"');
+        description2.setTimeField("timestamp");
+        description2.setTimeFormat("epoch");
+        description2.setFieldDelimiter(';');
+
+        assertFalse(description1.build().equals(description2.build()));
+        assertFalse(description2.build().equals(description1.build()));
+    }
+
+    @Override
+    protected DataDescription createTestInstance() {
+        DataDescription.Builder dataDescription = new DataDescription.Builder();
+        if (randomBoolean()) {
+            dataDescription.setFormat(randomFrom(DataFormat.values()));
+        }
+        if (randomBoolean()) {
+            dataDescription.setTimeField(randomAlphaOfLengthBetween(1, 20));
+        }
+        if (randomBoolean()) {
+            String format;
+            if (randomBoolean()) {
+                format = DataDescription.EPOCH;
+            } else if (randomBoolean()) {
+                format = DataDescription.EPOCH_MS;
+            } else {
+                format = "yyyy-MM-dd HH:mm:ss.SSS";
+            }
+            dataDescription.setTimeFormat(format);
+        }
+        if (randomBoolean()) {
+            dataDescription.setFieldDelimiter(randomAlphaOfLength(1).charAt(0));
+        }
+        if (randomBoolean()) {
+            dataDescription.setQuoteCharacter(randomAlphaOfLength(1).charAt(0));
+        }
+        return dataDescription.build();
+    }
+
+    @Override
+    protected DataDescription doParseInstance(XContentParser parser) {
+        return DataDescription.PARSER.apply(parser, null).build();
+    }
+
+    @Override
+    protected boolean supportsUnknownFields() {
+        return true;
+    }
+}
diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfigTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfigTests.java
new file mode 100644
index 0000000000000..6172a598c357e
--- /dev/null
+++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfigTests.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.protocol.xpack.ml.job.config;
+
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
+
+public class ModelPlotConfigTests extends AbstractXContentTestCase<ModelPlotConfig> {
+
+    public void testConstructorDefaults() {
+        assertThat(new ModelPlotConfig().isEnabled(), is(true));
+        assertThat(new ModelPlotConfig().getTerms(), is(nullValue()));
+    }
+
+    @Override
+    protected ModelPlotConfig createTestInstance() {
+        return new ModelPlotConfig(randomBoolean(), randomAlphaOfLengthBetween(1, 30));
+    }
+
+    @Override
+    protected ModelPlotConfig doParseInstance(XContentParser parser) {
+        return ModelPlotConfig.PARSER.apply(parser, null);
+    }
+
+    @Override
+    protected boolean supportsUnknownFields() {
+        return true;
+    }
+}

From 73fd0ec2eb34f75f06f80e342eb4e31cb5cede4b Mon Sep 17 00:00:00 2001
From: David Roberts <dave.roberts@elastic.co>
Date: Thu, 2 Aug 2018 15:41:13 +0100
Subject: [PATCH 2/2] Address review comments

---
 .../config/CategorizationAnalyzerConfig.java  |  8 +++-----
 .../xpack/ml/job/config/DataDescription.java  | 20 ++++++++-----------
 .../xpack/ml/job/config/ModelPlotConfig.java  |  4 ----
 .../ml/job/config/ModelPlotConfigTests.java   |  8 --------
 4 files changed, 11 insertions(+), 29 deletions(-)

diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfig.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfig.java
index 9c6bc58ba6a55..dc7f047b80404 100644
--- a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfig.java
+++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/CategorizationAnalyzerConfig.java
@@ -84,11 +84,9 @@ static CategorizationAnalyzerConfig buildFromXContentObject(XContentParser parse
     }
 
     /**
-     * Parse a <code>categorization_analyzer</code> from configuration or cluster state.  A custom parser is needed
-     * due to the complexity of the format, with many elements able to be specified as either the name of a built-in
+     * Parse a <code>categorization_analyzer</code> configuration.  A custom parser is needed due to the
+     * complexity of the format, with many elements able to be specified as either the name of a built-in
      * element or an object containing a custom definition.
-     *
-     * The parser is strict when parsing config and lenient when parsing cluster state.
      */
     static CategorizationAnalyzerConfig buildFromXContentFragment(XContentParser parser) throws IOException {
 
@@ -147,7 +145,7 @@ static CategorizationAnalyzerConfig buildFromXContentFragment(XContentParser par
     /**
      * Simple store of either a name of a built-in analyzer element or a custom definition.
      */
-    public static class NameOrDefinition implements ToXContentFragment {
+    public static final class NameOrDefinition implements ToXContentFragment {
 
         // Exactly one of these two members is not null
         public final String name;
diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescription.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescription.java
index 15ce999d65b09..f469512f64973 100644
--- a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescription.java
+++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/DataDescription.java
@@ -31,15 +31,11 @@
 
 /**
  * Describes the format of the data used in the job and how it should
- * be interpreted by autodetect.
+ * be interpreted by the ML job.
  * <p>
- * Data must either be in a textual delineated format (e.g. csv, tsv) or JSON
- * the {@linkplain DataFormat} enum indicates which. {@link #getTimeField()}
- * is the name of the field containing the timestamp and {@link #getTimeFormat()}
- * is the format code for the date string in as described by
- * {@link java.time.format.DateTimeFormatter}. The default quote character for
- * delineated formats is {@value #DEFAULT_QUOTE_CHAR} but any other character can be
- * used.
+ * {@link #getTimeField()} is the name of the field containing the timestamp and
+ * {@link #getTimeFormat()} is the format code for the date string in as described by
+ * {@link java.time.format.DateTimeFormatter}.
  */
 public class DataDescription implements ToXContentObject {
     /**
@@ -100,7 +96,7 @@ public String toString() {
 
     /**
      * The default quote character used to escape text in
-     * delineated data formats
+     * delimited data formats
      */
     public static final char DEFAULT_QUOTE_CHAR = '"';
 
@@ -180,7 +176,7 @@ public String getTimeFormat() {
     }
 
     /**
-     * If the data is in a delineated format with a header e.g. csv or tsv
+     * If the data is in a delimited format with a header e.g. csv or tsv
      * this is the delimiter character used. This is only applicable if
      * {@linkplain #getFormat()} is {@link DataDescription.DataFormat#DELIMITED}.
      * The default value for delimited format is {@value #DEFAULT_DELIMITER}.
@@ -192,10 +188,10 @@ public Character getFieldDelimiter() {
     }
 
     /**
-     * The quote character used in delineated formats.
+     * The quote character used in delimited formats.
      * The default value for delimited format is {@value #DEFAULT_QUOTE_CHAR}.
      *
-     * @return The delineated format quote character
+     * @return The delimited format quote character
      */
     public Character getQuoteCharacter() {
         return quoteCharacter;
diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfig.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfig.java
index a6ec3d8b77ee3..59b0252a7660e 100644
--- a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfig.java
+++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfig.java
@@ -43,10 +43,6 @@ public class ModelPlotConfig implements ToXContentObject {
     private final boolean enabled;
     private final String terms;
 
-    public ModelPlotConfig() {
-        this(true, null);
-    }
-
     public ModelPlotConfig(boolean enabled, String terms) {
         this.enabled = enabled;
         this.terms = terms;
diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfigTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfigTests.java
index 6172a598c357e..23f13c732123a 100644
--- a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfigTests.java
+++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/job/config/ModelPlotConfigTests.java
@@ -21,16 +21,8 @@
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.test.AbstractXContentTestCase;
 
-import static org.hamcrest.Matchers.is;
-import static org.hamcrest.Matchers.nullValue;
-
 public class ModelPlotConfigTests extends AbstractXContentTestCase<ModelPlotConfig> {
 
-    public void testConstructorDefaults() {
-        assertThat(new ModelPlotConfig().isEnabled(), is(true));
-        assertThat(new ModelPlotConfig().getTerms(), is(nullValue()));
-    }
-
     @Override
     protected ModelPlotConfig createTestInstance() {
         return new ModelPlotConfig(randomBoolean(), randomAlphaOfLengthBetween(1, 30));