From 2aca21fa73b6805a16fde8c273c437692b81b790 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Thu, 6 Sep 2018 16:56:28 +0100 Subject: [PATCH 1/6] [ML] Add a file structure determination endpoint This endpoint accepts an arbitrary file in the request body and attempts to determine the structure. If successful it also proposes mappings that could be used when indexing the file's contents, and calculates simple statistics for each of the fields that are useful in the data preparation step prior to configuring machine learning jobs. --- .../xpack/core/XPackClientPlugin.java | 2 + .../core/ml/action/FileStructureAction.java | 182 ++++++++++++++++++ .../ml/filestructurefinder/FieldStats.java | 26 ++- .../ml/filestructurefinder/FileStructure.java | 71 ++++++- .../FileStructureActionRequestTests.java | 30 +++ .../FileStructureActionResponseTests.java | 22 +++ .../filestructurefinder/FieldStatsTests.java | 16 +- .../FileStructureTests.java | 26 ++- .../xpack/ml/MachineLearning.java | 9 +- .../action/TransportFileStructureAction.java | 55 ++++++ .../FileStructureFinderManager.java | 11 +- .../ml/rest/RestFileStructureAction.java | 55 ++++++ .../api/xpack.ml.file_structure.json | 25 +++ .../rest-api-spec/test/ml/file_structure.yml | 44 +++++ 14 files changed, 553 insertions(+), 21 deletions(-) create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FileStructureAction.java create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionRequestTests.java create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionResponseTests.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFileStructureAction.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFileStructureAction.java create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.file_structure.json create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/test/ml/file_structure.yml diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index 03820b1f40b22..9822c4475fc1a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -55,6 +55,7 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction; import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; +import org.elasticsearch.xpack.core.ml.action.FileStructureAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; import org.elasticsearch.xpack.core.ml.action.FlushJobAction; import org.elasticsearch.xpack.core.ml.action.ForecastJobAction; @@ -265,6 +266,7 @@ public List> getClientActions() { GetCalendarEventsAction.INSTANCE, PostCalendarEventsAction.INSTANCE, PersistJobAction.INSTANCE, + FileStructureAction.INSTANCE, // security ClearRealmCacheAction.INSTANCE, ClearRolesCacheAction.INSTANCE, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FileStructureAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FileStructureAction.java new file mode 100644 index 0000000000000..43267fa07ef24 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FileStructureAction.java @@ -0,0 +1,182 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.action.Action; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionRequestBuilder; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.client.ElasticsearchClient; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.StatusToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure; + +import java.io.IOException; +import java.util.Objects; + +import static org.elasticsearch.action.ValidateActions.addValidationError; + +public class FileStructureAction extends Action { + + public static final FileStructureAction INSTANCE = new FileStructureAction(); + public static final String NAME = "cluster:monitor/xpack/ml/filestructure"; + + private FileStructureAction() { + super(NAME); + } + + @Override + public Response newResponse() { + return new Response(); + } + + static class RequestBuilder extends ActionRequestBuilder { + + RequestBuilder(ElasticsearchClient client, FileStructureAction action) { + super(client, action, new Request()); + } + } + + public static class Response extends ActionResponse implements StatusToXContentObject, Writeable { + + private FileStructure fileStructure; + + public Response(FileStructure fileStructure) { + this.fileStructure = fileStructure; + } + + public Response() { + } + + public FileStructure getFileStructure() { + return fileStructure; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + fileStructure = new FileStructure(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + fileStructure.writeTo(out); + } + + @Override + public RestStatus status() { + return RestStatus.OK; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + fileStructure.toXContent(builder, params); + return builder; + } + + @Override + public int hashCode() { + return Objects.hash(fileStructure); + } + + @Override + public boolean equals(Object other) { + + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + FileStructureAction.Response that = (FileStructureAction.Response) other; + return Objects.equals(fileStructure, that.fileStructure); + } + } + + public static class Request extends ActionRequest { + + public static final ParseField LINES_TO_SAMPLE = new ParseField("lines_to_sample"); + + private Integer linesToSample; + private BytesReference sample; + + public Request() { + } + + public Integer getLinesToSample() { + return linesToSample; + } + + public void setLinesToSample(Integer linesToSample) { + this.linesToSample = linesToSample; + } + + public BytesReference getSample() { + return sample; + } + + public void setSample(BytesReference sample) { + this.sample = sample; + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = null; + if (linesToSample != null && linesToSample <= 0) { + validationException = addValidationError("lines_to_sample must be positive if specified", validationException); + } + if (sample == null) { + validationException = addValidationError("sample must be specified", validationException); + } + return validationException; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + linesToSample = in.readOptionalVInt(); + sample = in.readBytesReference(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeOptionalVInt(linesToSample); + out.writeBytesReference(sample); + } + + @Override + public int hashCode() { + return Objects.hash(linesToSample, sample); + } + + @Override + public boolean equals(Object other) { + + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + Request that = (Request) other; + return Objects.equals(this.linesToSample, that.linesToSample) && + Objects.equals(this.sample, that.sample); + } + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java index a09aa522f7f87..8f624d000cc38 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java @@ -6,6 +6,9 @@ package org.elasticsearch.xpack.core.ml.filestructurefinder; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.ConstructingObjectParser; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -16,7 +19,7 @@ import java.util.Map; import java.util.Objects; -public class FieldStats implements ToXContentObject { +public class FieldStats implements ToXContentObject, Writeable { static final ParseField COUNT = new ParseField("count"); static final ParseField CARDINALITY = new ParseField("cardinality"); @@ -64,6 +67,27 @@ public FieldStats(long count, int cardinality, Double minValue, Double maxValue, this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits); } + public FieldStats(StreamInput in) throws IOException { + count = in.readVLong(); + cardinality = in.readVInt(); + minValue = in.readOptionalDouble(); + maxValue = in.readOptionalDouble(); + meanValue = in.readOptionalDouble(); + medianValue = in.readOptionalDouble(); + topHits = in.readList(StreamInput::readMap); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(count); + out.writeVInt(cardinality); + out.writeOptionalDouble(minValue); + out.writeOptionalDouble(maxValue); + out.writeOptionalDouble(meanValue); + out.writeOptionalDouble(medianValue); + out.writeCollection(topHits, StreamOutput::writeMap); + } + public long getCount() { return count; } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructure.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructure.java index 6993737e8547d..5484f9f9902f4 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructure.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructure.java @@ -6,6 +6,9 @@ package org.elasticsearch.xpack.core.ml.filestructurefinder; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -24,7 +27,7 @@ /** * Stores the file format determined by Machine Learning. */ -public class FileStructure implements ToXContentObject { +public class FileStructure implements ToXContentObject, Writeable { public enum Format { @@ -79,6 +82,8 @@ public String toString() { } } + public static final String EXPLAIN = "explain"; + static final ParseField NUM_LINES_ANALYZED = new ParseField("num_lines_analyzed"); static final ParseField NUM_MESSAGES_ANALYZED = new ParseField("num_messages_analyzed"); static final ParseField SAMPLE_START = new ParseField("sample_start"); @@ -176,6 +181,66 @@ public FileStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampl this.explanation = Collections.unmodifiableList(new ArrayList<>(explanation)); } + public FileStructure(StreamInput in) throws IOException { + numLinesAnalyzed = in.readVInt(); + numMessagesAnalyzed = in.readVInt(); + sampleStart = in.readString(); + charset = in.readString(); + hasByteOrderMarker = in.readOptionalBoolean(); + format = in.readEnum(Format.class); + multilineStartPattern = in.readOptionalString(); + excludeLinesPattern = in.readOptionalString(); + inputFields = in.readBoolean() ? Collections.unmodifiableList(in.readList(StreamInput::readString)) : null; + hasHeaderRow = in.readOptionalBoolean(); + delimiter = in.readBoolean() ? (char) in.readVInt() : null; + shouldTrimFields = in.readOptionalBoolean(); + grokPattern = in.readOptionalString(); + timestampFormats = in.readBoolean() ? Collections.unmodifiableList(in.readList(StreamInput::readString)) : null; + timestampField = in.readOptionalString(); + needClientTimezone = in.readBoolean(); + mappings = Collections.unmodifiableSortedMap(new TreeMap<>(in.readMap())); + fieldStats = Collections.unmodifiableSortedMap(new TreeMap<>(in.readMap(StreamInput::readString, FieldStats::new))); + explanation = Collections.unmodifiableList(in.readList(StreamInput::readString)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(numLinesAnalyzed); + out.writeVInt(numMessagesAnalyzed); + out.writeString(sampleStart); + out.writeString(charset); + out.writeOptionalBoolean(hasByteOrderMarker); + out.writeEnum(format); + out.writeOptionalString(multilineStartPattern); + out.writeOptionalString(excludeLinesPattern); + if (inputFields == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeCollection(inputFields, StreamOutput::writeString); + } + out.writeOptionalBoolean(hasHeaderRow); + if (delimiter == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeVInt(delimiter); + } + out.writeOptionalBoolean(shouldTrimFields); + out.writeOptionalString(grokPattern); + if (timestampFormats == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeCollection(timestampFormats, StreamOutput::writeString); + } + out.writeOptionalString(timestampField); + out.writeBoolean(needClientTimezone); + out.writeMap(mappings); + out.writeMap(fieldStats, StreamOutput::writeString, (out1, value) -> value.writeTo(out1)); + out.writeCollection(explanation, StreamOutput::writeString); + } + public int getNumLinesAnalyzed() { return numLinesAnalyzed; } @@ -300,7 +365,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } builder.endObject(); } - builder.field(EXPLANATION.getPreferredName(), explanation); + if (params.paramAsBoolean(EXPLAIN, false)) { + builder.field(EXPLANATION.getPreferredName(), explanation); + } builder.endObject(); return builder; diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionRequestTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionRequestTests.java new file mode 100644 index 0000000000000..b4c94f563bedc --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionRequestTests.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.test.AbstractStreamableTestCase; + +public class FileStructureActionRequestTests extends AbstractStreamableTestCase { + + @Override + protected FileStructureAction.Request createTestInstance() { + + FileStructureAction.Request request = new FileStructureAction.Request(); + + if (randomBoolean()) { + request.setLinesToSample(randomIntBetween(10, 2000)); + } + request.setSample(new BytesArray(randomByteArrayOfLength(randomIntBetween(1000, 20000)))); + + return request; + } + + @Override + protected FileStructureAction.Request createBlankInstance() { + return new FileStructureAction.Request(); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionResponseTests.java new file mode 100644 index 0000000000000..a64e4550bdb4e --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionResponseTests.java @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.test.AbstractStreamableTestCase; +import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructureTests; + +public class FileStructureActionResponseTests extends AbstractStreamableTestCase { + + @Override + protected FileStructureAction.Response createTestInstance() { + return new FileStructureAction.Response(FileStructureTests.createTestFileStructure()); + } + + @Override + protected FileStructureAction.Response createBlankInstance() { + return new FileStructureAction.Response(); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java index 2041fb26a6259..30f7c8f5576d0 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java @@ -5,16 +5,18 @@ */ package org.elasticsearch.xpack.core.ml.filestructurefinder; +import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.test.AbstractXContentTestCase; +import org.elasticsearch.test.AbstractSerializingTestCase; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -public class FieldStatsTests extends AbstractXContentTestCase { +public class FieldStatsTests extends AbstractSerializingTestCase { + @Override protected FieldStats createTestInstance() { return createTestFieldStats(); } @@ -51,11 +53,13 @@ static FieldStats createTestFieldStats() { return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits); } - protected FieldStats doParseInstance(XContentParser parser) { - return FieldStats.PARSER.apply(parser, null); + @Override + protected Writeable.Reader instanceReader() { + return FieldStats::new; } - protected boolean supportsUnknownFields() { - return false; + @Override + protected FieldStats doParseInstance(XContentParser parser) { + return FieldStats.PARSER.apply(parser, null); } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructureTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructureTests.java index 5e89a4840b585..6dcf675196508 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructureTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructureTests.java @@ -5,8 +5,10 @@ */ package org.elasticsearch.xpack.core.ml.filestructurefinder; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.test.AbstractXContentTestCase; +import org.elasticsearch.test.AbstractSerializingTestCase; import java.nio.charset.Charset; import java.util.Arrays; @@ -16,9 +18,14 @@ import java.util.Map; import java.util.TreeMap; -public class FileStructureTests extends AbstractXContentTestCase { +public class FileStructureTests extends AbstractSerializingTestCase { + @Override protected FileStructure createTestInstance() { + return createTestFileStructure(); + } + + public static FileStructure createTestFileStructure() { FileStructure.Format format = randomFrom(EnumSet.allOf(FileStructure.Format.class)); @@ -66,24 +73,31 @@ protected FileStructure createTestInstance() { } builder.setMappings(mappings); - //if (randomBoolean()) { + if (randomBoolean()) { Map fieldStats = new TreeMap<>(); for (String field : generateRandomStringArray(5, 20, false, false)) { fieldStats.put(field, FieldStatsTests.createTestFieldStats()); } builder.setFieldStats(fieldStats); - //} + } builder.setExplanation(Arrays.asList(generateRandomStringArray(10, 150, false, false))); return builder.build(); } + @Override + protected Writeable.Reader instanceReader() { + return FileStructure::new; + } + + @Override protected FileStructure doParseInstance(XContentParser parser) { return FileStructure.PARSER.apply(parser, null).build(); } - protected boolean supportsUnknownFields() { - return false; + @Override + protected ToXContent.Params getToXContentParams() { + return new ToXContent.MapParams(Collections.singletonMap(FileStructure.EXPLAIN, "true")); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 343833c080618..595a3aec7ecdb 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -65,6 +65,7 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction; import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; +import org.elasticsearch.xpack.core.ml.action.FileStructureAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; import org.elasticsearch.xpack.core.ml.action.FlushJobAction; import org.elasticsearch.xpack.core.ml.action.ForecastJobAction; @@ -118,6 +119,7 @@ import org.elasticsearch.xpack.ml.action.TransportDeleteForecastAction; import org.elasticsearch.xpack.ml.action.TransportDeleteJobAction; import org.elasticsearch.xpack.ml.action.TransportDeleteModelSnapshotAction; +import org.elasticsearch.xpack.ml.action.TransportFileStructureAction; import org.elasticsearch.xpack.ml.action.TransportFinalizeJobExecutionAction; import org.elasticsearch.xpack.ml.action.TransportFlushJobAction; import org.elasticsearch.xpack.ml.action.TransportForecastJobAction; @@ -180,6 +182,7 @@ import org.elasticsearch.xpack.ml.job.process.normalizer.NormalizerProcessFactory; import org.elasticsearch.xpack.ml.notifications.Auditor; import org.elasticsearch.xpack.ml.rest.RestDeleteExpiredDataAction; +import org.elasticsearch.xpack.ml.rest.RestFileStructureAction; import org.elasticsearch.xpack.ml.rest.RestMlInfoAction; import org.elasticsearch.xpack.ml.rest.calendar.RestDeleteCalendarAction; import org.elasticsearch.xpack.ml.rest.calendar.RestDeleteCalendarEventAction; @@ -500,7 +503,8 @@ public List getRestHandlers(Settings settings, RestController restC new RestDeleteCalendarJobAction(settings, restController), new RestPutCalendarJobAction(settings, restController), new RestGetCalendarEventsAction(settings, restController), - new RestPostCalendarEventAction(settings, restController) + new RestPostCalendarEventAction(settings, restController), + new RestFileStructureAction(settings, restController) ); } @@ -557,7 +561,8 @@ public List getRestHandlers(Settings settings, RestController restC new ActionHandler<>(UpdateCalendarJobAction.INSTANCE, TransportUpdateCalendarJobAction.class), new ActionHandler<>(GetCalendarEventsAction.INSTANCE, TransportGetCalendarEventsAction.class), new ActionHandler<>(PostCalendarEventsAction.INSTANCE, TransportPostCalendarEventsAction.class), - new ActionHandler<>(PersistJobAction.INSTANCE, TransportPersistJobAction.class) + new ActionHandler<>(PersistJobAction.INSTANCE, TransportPersistJobAction.class), + new ActionHandler<>(FileStructureAction.INSTANCE, TransportFileStructureAction.class) ); } @Override diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFileStructureAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFileStructureAction.java new file mode 100644 index 0000000000000..419e516477201 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFileStructureAction.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.action; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; +import org.elasticsearch.xpack.core.ml.action.FileStructureAction; +import org.elasticsearch.xpack.ml.MachineLearning; +import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinder; +import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinderManager; + +public class TransportFileStructureAction extends HandledTransportAction { + + private final ThreadPool threadPool; + + @Inject + public TransportFileStructureAction(Settings settings, TransportService transportService, ActionFilters actionFilters, + ThreadPool threadPool) { + super(settings, FileStructureAction.NAME, transportService, actionFilters, FileStructureAction.Request::new); + this.threadPool = threadPool; + } + + @Override + protected void doExecute(Task task, FileStructureAction.Request request, ActionListener listener) { + + // As determining the file structure might take a while, we run + // in a different thread to avoid blocking the network thread. + threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME).execute(() -> { + try { + listener.onResponse(buildFileStructureResponse(request)); + } catch (Exception e) { + listener.onFailure(e); + } + }); + } + + private FileStructureAction.Response buildFileStructureResponse(FileStructureAction.Request request) throws Exception { + + FileStructureFinderManager structureFinderManager = new FileStructureFinderManager(); + + FileStructureFinder fileStructureFinder = + structureFinderManager.findFileStructure(request.getLinesToSample(), request.getSample().streamInput()); + + return new FileStructureAction.Response(fileStructureFinder.getStructure()); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderManager.java index 983188614d0ca..d0ce68aff25c0 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderManager.java @@ -35,6 +35,7 @@ public final class FileStructureFinderManager { public static final int MIN_SAMPLE_LINE_COUNT = 2; + public static final int DEFAULT_IDEAL_SAMPLE_LINE_COUNT = 1000; static final Set FILEBEAT_SUPPORTED_ENCODINGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( "866", "ansi_x3.4-1968", "arabic", "ascii", "asmo-708", "big5", "big5-hkscs", "chinese", "cn-big5", "cp1250", "cp1251", "cp1252", @@ -82,16 +83,18 @@ public final class FileStructureFinderManager { * Given a stream of data from some file, determine its structure. * @param idealSampleLineCount Ideally, how many lines from the stream will be read to determine the structure? * If the stream has fewer lines then an attempt will still be made, providing at - * least {@link #MIN_SAMPLE_LINE_COUNT} lines can be read. + * least {@link #MIN_SAMPLE_LINE_COUNT} lines can be read. If null + * the value of {@link #DEFAULT_IDEAL_SAMPLE_LINE_COUNT} will be used. * @param fromFile A stream from which the sample will be read. * @return A {@link FileStructureFinder} object from which the structure and messages can be queried. * @throws Exception A variety of problems could occur at various stages of the structure finding process. */ - public FileStructureFinder findLogStructure(int idealSampleLineCount, InputStream fromFile) throws Exception { - return findLogStructure(new ArrayList<>(), idealSampleLineCount, fromFile); + public FileStructureFinder findFileStructure(Integer idealSampleLineCount, InputStream fromFile) throws Exception { + return findFileStructure(new ArrayList<>(), (idealSampleLineCount == null) ? DEFAULT_IDEAL_SAMPLE_LINE_COUNT : idealSampleLineCount, + fromFile); } - public FileStructureFinder findLogStructure(List explanation, int idealSampleLineCount, InputStream fromFile) + public FileStructureFinder findFileStructure(List explanation, int idealSampleLineCount, InputStream fromFile) throws Exception { CharsetMatch charsetMatch = findCharset(explanation, fromFile); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFileStructureAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFileStructureAction.java new file mode 100644 index 0000000000000..ab4144e44ab58 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFileStructureAction.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.rest; + +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.client.node.NodeClient; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestController; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.action.RestToXContentListener; +import org.elasticsearch.xpack.core.ml.action.FileStructureAction; +import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure; +import org.elasticsearch.xpack.ml.MachineLearning; +import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinderManager; + +import java.io.IOException; +import java.util.Collections; +import java.util.Set; + +public class RestFileStructureAction extends BaseRestHandler { + + public RestFileStructureAction(Settings settings, RestController controller) { + super(settings); + controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "file_structure", this); + } + + @Override + public String getName() { + return "xpack_ml_file_structure_action"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { + + FileStructureAction.Request request = new FileStructureAction.Request(); + request.setLinesToSample(restRequest.paramAsInt(FileStructureAction.Request.LINES_TO_SAMPLE.getPreferredName(), + FileStructureFinderManager.DEFAULT_IDEAL_SAMPLE_LINE_COUNT)); + if (restRequest.hasContent()) { + request.setSample(restRequest.content()); + } else { + throw new ElasticsearchParseException("request body is required"); + } + + return channel -> client.execute(FileStructureAction.INSTANCE, request, new RestToXContentListener<>(channel)); + } + + @Override + protected Set responseParams() { + return Collections.singleton(FileStructure.EXPLAIN); + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.file_structure.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.file_structure.json new file mode 100644 index 0000000000000..fefd2ffcc168b --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.file_structure.json @@ -0,0 +1,25 @@ +{ + "xpack.ml.file_structure": { + "documentation": "http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-file-structure.html", + "methods": [ "POST" ], + "url": { + "path": "/_xpack/ml/file_structure", + "paths": [ "/_xpack/ml/file_structure" ], + "params": { + "lines_to_sample": { + "type": "int", + "description": "Optional parameter to specify how many lines of the file to include in the analysis" + }, + "explain": { + "type": "boolean", + "description": "Optional parameter to include an commentary on how the structure was derived" + } + } + }, + "body": { + "description" : "The contents of the file to be analyzed", + "required" : true, + "serialize" : "bulk" + } + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/file_structure.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/file_structure.yml new file mode 100644 index 0000000000000..96e7b67dfb655 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/file_structure.yml @@ -0,0 +1,44 @@ +--- +"Test JSON file structure analysis": + - do: + headers: + # This is to stop the usual content type randomization, which + # would obviously ruin the results for this particular test + Content-Type: "application/json" + xpack.ml.file_structure: + body: + - airline: AAL + responsetime: 132.2046 + sourcetype: file-structure-test + time: 1403481600 + - airline: JZA + responsetime: 990.4628 + sourcetype: file-structure-test + time: 1403481700 + - airline: AAL + responsetime: 134.2046 + sourcetype: file-structure-test + time: 1403481800 + + - match: { num_lines_analyzed: 3 } + - match: { num_messages_analyzed: 3 } + - match: { charset: "UTF-8" } + - match: { has_byte_order_marker: false } + - match: { format: json } + - match: { timestamp_field: time } + - match: { timestamp_formats.0: UNIX } + - match: { need_client_timezone: false } + - match: { mappings.airline.type: keyword } + - match: { mappings.responsetime.type: double } + - match: { mappings.sourcetype.type: keyword } + - match: { mappings.time.type: date } + - match: { mappings.time.format: epoch_second } + - match: { field_stats.airline.count: 3 } + - match: { field_stats.airline.cardinality: 2 } + - match: { field_stats.responsetime.count: 3 } + - match: { field_stats.responsetime.cardinality: 3 } + - match: { field_stats.sourcetype.count: 3 } + - match: { field_stats.sourcetype.cardinality: 1 } + - match: { field_stats.time.count: 3 } + - match: { field_stats.time.cardinality: 3 } + - match: { field_stats.time.cardinality: 3 } From efd2ed79af5fbf405d677daaa68fd793403a17cb Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 7 Sep 2018 09:40:43 +0100 Subject: [PATCH 2/6] Improve validation of file structure request --- .../core/ml/action/FileStructureAction.java | 5 ++-- .../FileStructureActionRequestTests.java | 29 +++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FileStructureAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FileStructureAction.java index 43267fa07ef24..806a1e85d3a9c 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FileStructureAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FileStructureAction.java @@ -136,9 +136,10 @@ public void setSample(BytesReference sample) { public ActionRequestValidationException validate() { ActionRequestValidationException validationException = null; if (linesToSample != null && linesToSample <= 0) { - validationException = addValidationError("lines_to_sample must be positive if specified", validationException); + validationException = + addValidationError(LINES_TO_SAMPLE.getPreferredName() + " must be positive if specified", validationException); } - if (sample == null) { + if (sample == null || sample.length() == 0) { validationException = addValidationError("sample must be specified", validationException); } return validationException; diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionRequestTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionRequestTests.java index b4c94f563bedc..0652aa56cc74c 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionRequestTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionRequestTests.java @@ -5,9 +5,13 @@ */ package org.elasticsearch.xpack.core.ml.action; +import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.test.AbstractStreamableTestCase; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.startsWith; + public class FileStructureActionRequestTests extends AbstractStreamableTestCase { @Override @@ -27,4 +31,29 @@ protected FileStructureAction.Request createTestInstance() { protected FileStructureAction.Request createBlankInstance() { return new FileStructureAction.Request(); } + + public void testValidateLinesToSample() { + + FileStructureAction.Request request = new FileStructureAction.Request(); + request.setLinesToSample(randomFrom(-1, 0)); + request.setSample(new BytesArray("foo\n")); + + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertThat(e.getMessage(), startsWith("Validation Failed: ")); + assertThat(e.getMessage(), containsString(" lines_to_sample must be positive if specified")); + } + + public void testValidateSample() { + + FileStructureAction.Request request = new FileStructureAction.Request(); + if (randomBoolean()) { + request.setSample(BytesArray.EMPTY); + } + + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertThat(e.getMessage(), startsWith("Validation Failed: ")); + assertThat(e.getMessage(), containsString(" sample must be specified")); + } } From 8440f0f2ed0b54e4e607f33dc25a2d1690c31102 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 7 Sep 2018 11:00:31 +0100 Subject: [PATCH 3/6] Calculate field stats for value-only extracted fields --- .../ml/filestructurefinder/GrokPatternCreator.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java index 3caa78589ba1b..292d0b8e8b305 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java @@ -445,7 +445,7 @@ public boolean matchesAll(Collection snippets) { @Override public String processCaptures(Map fieldNameCountStore, Collection snippets, Collection prefaces, Collection epilogues, Map mappings, Map fieldStats) { - String sampleValue = null; + Collection values = new ArrayList<>(); for (String snippet : snippets) { Map captures = grok.captures(snippet); // If the pattern doesn't match then captures will be null @@ -453,22 +453,24 @@ public String processCaptures(Map fieldNameCountStore, Collecti throw new IllegalStateException("[%{" + grokPatternName + "}] does not match snippet [" + snippet + "]"); } prefaces.add(captures.getOrDefault(PREFACE, "").toString()); - if (sampleValue == null) { - sampleValue = captures.get(VALUE).toString(); - } + values.add(captures.getOrDefault(VALUE, "").toString()); epilogues.add(captures.getOrDefault(EPILOGUE, "").toString()); } String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName); if (mappings != null) { Map fullMappingType = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType); if ("date".equals(mappingType)) { - TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(sampleValue); + assert values.isEmpty() == false; + TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(values.iterator().next()); if (timestampMatch != null) { fullMappingType = timestampMatch.getEsDateMappingTypeWithFormat(); } } mappings.put(adjustedFieldName, fullMappingType); } + if (fieldStats != null) { + fieldStats.put(adjustedFieldName, FileStructureUtils.calculateFieldStats(values)); + } return "%{" + grokPatternName + ":" + adjustedFieldName + "}"; } } From c6fba6ffacf3483831f54bdc188e7ef3d76e3dec Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 7 Sep 2018 11:58:03 +0100 Subject: [PATCH 4/6] Add a verb to the action name and remove public default constructor --- .../xpack/core/XPackClientPlugin.java | 4 ++-- ...ction.java => FindFileStructureAction.java} | 14 +++++++------- ...> FindFileStructureActionRequestTests.java} | 14 +++++++------- ... FindFileStructureActionResponseTests.java} | 10 +++++----- .../xpack/ml/MachineLearning.java | 10 +++++----- ...a => TransportFindFileStructureAction.java} | 18 ++++++++++-------- ...n.java => RestFindFileStructureAction.java} | 16 ++++++++-------- ....json => xpack.ml.find_file_structure.json} | 6 +++--- ...e_structure.yml => find_file_structure.yml} | 2 +- 9 files changed, 48 insertions(+), 46 deletions(-) rename x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/{FileStructureAction.java => FindFileStructureAction.java} (92%) rename x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/{FileStructureActionRequestTests.java => FindFileStructureActionRequestTests.java} (74%) rename x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/{FileStructureActionResponseTests.java => FindFileStructureActionResponseTests.java} (53%) rename x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/{TransportFileStructureAction.java => TransportFindFileStructureAction.java} (64%) rename x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/{RestFileStructureAction.java => RestFindFileStructureAction.java} (70%) rename x-pack/plugin/src/test/resources/rest-api-spec/api/{xpack.ml.file_structure.json => xpack.ml.find_file_structure.json} (82%) rename x-pack/plugin/src/test/resources/rest-api-spec/test/ml/{file_structure.yml => find_file_structure.yml} (97%) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index 9822c4475fc1a..ba34efb948462 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -55,7 +55,7 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction; import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; -import org.elasticsearch.xpack.core.ml.action.FileStructureAction; +import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; import org.elasticsearch.xpack.core.ml.action.FlushJobAction; import org.elasticsearch.xpack.core.ml.action.ForecastJobAction; @@ -266,7 +266,7 @@ public List> getClientActions() { GetCalendarEventsAction.INSTANCE, PostCalendarEventsAction.INSTANCE, PersistJobAction.INSTANCE, - FileStructureAction.INSTANCE, + FindFileStructureAction.INSTANCE, // security ClearRealmCacheAction.INSTANCE, ClearRolesCacheAction.INSTANCE, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FileStructureAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureAction.java similarity index 92% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FileStructureAction.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureAction.java index 806a1e85d3a9c..9fda416b33bbe 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FileStructureAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureAction.java @@ -26,12 +26,12 @@ import static org.elasticsearch.action.ValidateActions.addValidationError; -public class FileStructureAction extends Action { +public class FindFileStructureAction extends Action { - public static final FileStructureAction INSTANCE = new FileStructureAction(); - public static final String NAME = "cluster:monitor/xpack/ml/filestructure"; + public static final FindFileStructureAction INSTANCE = new FindFileStructureAction(); + public static final String NAME = "cluster:monitor/xpack/ml/findfilestructure"; - private FileStructureAction() { + private FindFileStructureAction() { super(NAME); } @@ -42,7 +42,7 @@ public Response newResponse() { static class RequestBuilder extends ActionRequestBuilder { - RequestBuilder(ElasticsearchClient client, FileStructureAction action) { + RequestBuilder(ElasticsearchClient client, FindFileStructureAction action) { super(client, action, new Request()); } } @@ -55,7 +55,7 @@ public Response(FileStructure fileStructure) { this.fileStructure = fileStructure; } - public Response() { + Response() { } public FileStructure getFileStructure() { @@ -101,7 +101,7 @@ public boolean equals(Object other) { return false; } - FileStructureAction.Response that = (FileStructureAction.Response) other; + FindFileStructureAction.Response that = (FindFileStructureAction.Response) other; return Objects.equals(fileStructure, that.fileStructure); } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionRequestTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionRequestTests.java similarity index 74% rename from x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionRequestTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionRequestTests.java index 0652aa56cc74c..05ba0e7f306f4 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionRequestTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionRequestTests.java @@ -12,12 +12,12 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.startsWith; -public class FileStructureActionRequestTests extends AbstractStreamableTestCase { +public class FindFileStructureActionRequestTests extends AbstractStreamableTestCase { @Override - protected FileStructureAction.Request createTestInstance() { + protected FindFileStructureAction.Request createTestInstance() { - FileStructureAction.Request request = new FileStructureAction.Request(); + FindFileStructureAction.Request request = new FindFileStructureAction.Request(); if (randomBoolean()) { request.setLinesToSample(randomIntBetween(10, 2000)); @@ -28,13 +28,13 @@ protected FileStructureAction.Request createTestInstance() { } @Override - protected FileStructureAction.Request createBlankInstance() { - return new FileStructureAction.Request(); + protected FindFileStructureAction.Request createBlankInstance() { + return new FindFileStructureAction.Request(); } public void testValidateLinesToSample() { - FileStructureAction.Request request = new FileStructureAction.Request(); + FindFileStructureAction.Request request = new FindFileStructureAction.Request(); request.setLinesToSample(randomFrom(-1, 0)); request.setSample(new BytesArray("foo\n")); @@ -46,7 +46,7 @@ public void testValidateLinesToSample() { public void testValidateSample() { - FileStructureAction.Request request = new FileStructureAction.Request(); + FindFileStructureAction.Request request = new FindFileStructureAction.Request(); if (randomBoolean()) { request.setSample(BytesArray.EMPTY); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionResponseTests.java similarity index 53% rename from x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionResponseTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionResponseTests.java index a64e4550bdb4e..706ee44a4fd97 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FileStructureActionResponseTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionResponseTests.java @@ -8,15 +8,15 @@ import org.elasticsearch.test.AbstractStreamableTestCase; import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructureTests; -public class FileStructureActionResponseTests extends AbstractStreamableTestCase { +public class FindFileStructureActionResponseTests extends AbstractStreamableTestCase { @Override - protected FileStructureAction.Response createTestInstance() { - return new FileStructureAction.Response(FileStructureTests.createTestFileStructure()); + protected FindFileStructureAction.Response createTestInstance() { + return new FindFileStructureAction.Response(FileStructureTests.createTestFileStructure()); } @Override - protected FileStructureAction.Response createBlankInstance() { - return new FileStructureAction.Response(); + protected FindFileStructureAction.Response createBlankInstance() { + return new FindFileStructureAction.Response(); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 595a3aec7ecdb..2e82cd156fc99 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -65,7 +65,7 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction; import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; -import org.elasticsearch.xpack.core.ml.action.FileStructureAction; +import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; import org.elasticsearch.xpack.core.ml.action.FlushJobAction; import org.elasticsearch.xpack.core.ml.action.ForecastJobAction; @@ -119,7 +119,7 @@ import org.elasticsearch.xpack.ml.action.TransportDeleteForecastAction; import org.elasticsearch.xpack.ml.action.TransportDeleteJobAction; import org.elasticsearch.xpack.ml.action.TransportDeleteModelSnapshotAction; -import org.elasticsearch.xpack.ml.action.TransportFileStructureAction; +import org.elasticsearch.xpack.ml.action.TransportFindFileStructureAction; import org.elasticsearch.xpack.ml.action.TransportFinalizeJobExecutionAction; import org.elasticsearch.xpack.ml.action.TransportFlushJobAction; import org.elasticsearch.xpack.ml.action.TransportForecastJobAction; @@ -182,7 +182,7 @@ import org.elasticsearch.xpack.ml.job.process.normalizer.NormalizerProcessFactory; import org.elasticsearch.xpack.ml.notifications.Auditor; import org.elasticsearch.xpack.ml.rest.RestDeleteExpiredDataAction; -import org.elasticsearch.xpack.ml.rest.RestFileStructureAction; +import org.elasticsearch.xpack.ml.rest.RestFindFileStructureAction; import org.elasticsearch.xpack.ml.rest.RestMlInfoAction; import org.elasticsearch.xpack.ml.rest.calendar.RestDeleteCalendarAction; import org.elasticsearch.xpack.ml.rest.calendar.RestDeleteCalendarEventAction; @@ -504,7 +504,7 @@ public List getRestHandlers(Settings settings, RestController restC new RestPutCalendarJobAction(settings, restController), new RestGetCalendarEventsAction(settings, restController), new RestPostCalendarEventAction(settings, restController), - new RestFileStructureAction(settings, restController) + new RestFindFileStructureAction(settings, restController) ); } @@ -562,7 +562,7 @@ public List getRestHandlers(Settings settings, RestController restC new ActionHandler<>(GetCalendarEventsAction.INSTANCE, TransportGetCalendarEventsAction.class), new ActionHandler<>(PostCalendarEventsAction.INSTANCE, TransportPostCalendarEventsAction.class), new ActionHandler<>(PersistJobAction.INSTANCE, TransportPersistJobAction.class), - new ActionHandler<>(FileStructureAction.INSTANCE, TransportFileStructureAction.class) + new ActionHandler<>(FindFileStructureAction.INSTANCE, TransportFindFileStructureAction.class) ); } @Override diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFileStructureAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFindFileStructureAction.java similarity index 64% rename from x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFileStructureAction.java rename to x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFindFileStructureAction.java index 419e516477201..66d07f5111c52 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFileStructureAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFindFileStructureAction.java @@ -13,24 +13,26 @@ import org.elasticsearch.tasks.Task; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; -import org.elasticsearch.xpack.core.ml.action.FileStructureAction; +import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; import org.elasticsearch.xpack.ml.MachineLearning; import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinder; import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinderManager; -public class TransportFileStructureAction extends HandledTransportAction { +public class TransportFindFileStructureAction + extends HandledTransportAction { private final ThreadPool threadPool; @Inject - public TransportFileStructureAction(Settings settings, TransportService transportService, ActionFilters actionFilters, - ThreadPool threadPool) { - super(settings, FileStructureAction.NAME, transportService, actionFilters, FileStructureAction.Request::new); + public TransportFindFileStructureAction(Settings settings, TransportService transportService, ActionFilters actionFilters, + ThreadPool threadPool) { + super(settings, FindFileStructureAction.NAME, transportService, actionFilters, FindFileStructureAction.Request::new); this.threadPool = threadPool; } @Override - protected void doExecute(Task task, FileStructureAction.Request request, ActionListener listener) { + protected void doExecute(Task task, FindFileStructureAction.Request request, + ActionListener listener) { // As determining the file structure might take a while, we run // in a different thread to avoid blocking the network thread. @@ -43,13 +45,13 @@ protected void doExecute(Task task, FileStructureAction.Request request, ActionL }); } - private FileStructureAction.Response buildFileStructureResponse(FileStructureAction.Request request) throws Exception { + private FindFileStructureAction.Response buildFileStructureResponse(FindFileStructureAction.Request request) throws Exception { FileStructureFinderManager structureFinderManager = new FileStructureFinderManager(); FileStructureFinder fileStructureFinder = structureFinderManager.findFileStructure(request.getLinesToSample(), request.getSample().streamInput()); - return new FileStructureAction.Response(fileStructureFinder.getStructure()); + return new FindFileStructureAction.Response(fileStructureFinder.getStructure()); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFileStructureAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFindFileStructureAction.java similarity index 70% rename from x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFileStructureAction.java rename to x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFindFileStructureAction.java index ab4144e44ab58..83293c7d60efa 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFileStructureAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFindFileStructureAction.java @@ -12,7 +12,7 @@ import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.action.RestToXContentListener; -import org.elasticsearch.xpack.core.ml.action.FileStructureAction; +import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure; import org.elasticsearch.xpack.ml.MachineLearning; import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinderManager; @@ -21,23 +21,23 @@ import java.util.Collections; import java.util.Set; -public class RestFileStructureAction extends BaseRestHandler { +public class RestFindFileStructureAction extends BaseRestHandler { - public RestFileStructureAction(Settings settings, RestController controller) { + public RestFindFileStructureAction(Settings settings, RestController controller) { super(settings); - controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "file_structure", this); + controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "find_file_structure", this); } @Override public String getName() { - return "xpack_ml_file_structure_action"; + return "xpack_ml_find_file_structure_action"; } @Override protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { - FileStructureAction.Request request = new FileStructureAction.Request(); - request.setLinesToSample(restRequest.paramAsInt(FileStructureAction.Request.LINES_TO_SAMPLE.getPreferredName(), + FindFileStructureAction.Request request = new FindFileStructureAction.Request(); + request.setLinesToSample(restRequest.paramAsInt(FindFileStructureAction.Request.LINES_TO_SAMPLE.getPreferredName(), FileStructureFinderManager.DEFAULT_IDEAL_SAMPLE_LINE_COUNT)); if (restRequest.hasContent()) { request.setSample(restRequest.content()); @@ -45,7 +45,7 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient throw new ElasticsearchParseException("request body is required"); } - return channel -> client.execute(FileStructureAction.INSTANCE, request, new RestToXContentListener<>(channel)); + return channel -> client.execute(FindFileStructureAction.INSTANCE, request, new RestToXContentListener<>(channel)); } @Override diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.file_structure.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.find_file_structure.json similarity index 82% rename from x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.file_structure.json rename to x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.find_file_structure.json index fefd2ffcc168b..bd41e0c00bca8 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.file_structure.json +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.find_file_structure.json @@ -1,10 +1,10 @@ { - "xpack.ml.file_structure": { + "xpack.ml.find_file_structure": { "documentation": "http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-file-structure.html", "methods": [ "POST" ], "url": { - "path": "/_xpack/ml/file_structure", - "paths": [ "/_xpack/ml/file_structure" ], + "path": "/_xpack/ml/find_file_structure", + "paths": [ "/_xpack/ml/find_file_structure" ], "params": { "lines_to_sample": { "type": "int", diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/file_structure.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml similarity index 97% rename from x-pack/plugin/src/test/resources/rest-api-spec/test/ml/file_structure.yml rename to x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml index 96e7b67dfb655..1d164cc0c5afc 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/file_structure.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml @@ -5,7 +5,7 @@ # This is to stop the usual content type randomization, which # would obviously ruin the results for this particular test Content-Type: "application/json" - xpack.ml.file_structure: + xpack.ml.find_file_structure: body: - airline: AAL responsetime: 132.2046 From 8e840a6bc2e51280698ee2f694396572a7b5a224 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 7 Sep 2018 12:15:04 +0100 Subject: [PATCH 5/6] Put imports in alphabetical order --- .../java/org/elasticsearch/xpack/core/XPackClientPlugin.java | 2 +- .../main/java/org/elasticsearch/xpack/ml/MachineLearning.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index ba34efb948462..190a9a2215ee6 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -55,8 +55,8 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction; import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; -import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; +import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; import org.elasticsearch.xpack.core.ml.action.FlushJobAction; import org.elasticsearch.xpack.core.ml.action.ForecastJobAction; import org.elasticsearch.xpack.core.ml.action.GetBucketsAction; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 2e82cd156fc99..cd13b2c8bb657 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -65,8 +65,8 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction; import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; -import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; +import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; import org.elasticsearch.xpack.core.ml.action.FlushJobAction; import org.elasticsearch.xpack.core.ml.action.ForecastJobAction; import org.elasticsearch.xpack.core.ml.action.GetBucketsAction; @@ -119,8 +119,8 @@ import org.elasticsearch.xpack.ml.action.TransportDeleteForecastAction; import org.elasticsearch.xpack.ml.action.TransportDeleteJobAction; import org.elasticsearch.xpack.ml.action.TransportDeleteModelSnapshotAction; -import org.elasticsearch.xpack.ml.action.TransportFindFileStructureAction; import org.elasticsearch.xpack.ml.action.TransportFinalizeJobExecutionAction; +import org.elasticsearch.xpack.ml.action.TransportFindFileStructureAction; import org.elasticsearch.xpack.ml.action.TransportFlushJobAction; import org.elasticsearch.xpack.ml.action.TransportForecastJobAction; import org.elasticsearch.xpack.ml.action.TransportGetBucketsAction; From 344b140fe71c76935297c2fb1bd9e1fef03920d3 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 7 Sep 2018 15:21:55 +0100 Subject: [PATCH 6/6] Fix ML security tests --- .../elasticsearch/smoketest/MlWithSecurityUserRoleIT.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/ml/qa/ml-with-security/src/test/java/org/elasticsearch/smoketest/MlWithSecurityUserRoleIT.java b/x-pack/plugin/ml/qa/ml-with-security/src/test/java/org/elasticsearch/smoketest/MlWithSecurityUserRoleIT.java index b103d30f282e2..9e31ddb131c6f 100644 --- a/x-pack/plugin/ml/qa/ml-with-security/src/test/java/org/elasticsearch/smoketest/MlWithSecurityUserRoleIT.java +++ b/x-pack/plugin/ml/qa/ml-with-security/src/test/java/org/elasticsearch/smoketest/MlWithSecurityUserRoleIT.java @@ -31,10 +31,13 @@ public void test() throws IOException { super.test(); // We should have got here if and only if the only ML endpoints in the test were GETs + // or the find_file_structure API, which is also available to the machine_learning_user + // role for (ExecutableSection section : testCandidate.getTestSection().getExecutableSections()) { if (section instanceof DoSection) { if (((DoSection) section).getApiCallSection().getApi().startsWith("xpack.ml.") && - ((DoSection) section).getApiCallSection().getApi().startsWith("xpack.ml.get_") == false) { + ((DoSection) section).getApiCallSection().getApi().startsWith("xpack.ml.get_") == false && + ((DoSection) section).getApiCallSection().getApi().equals("xpack.ml.find_file_structure") == false) { fail("should have failed because of missing role"); } }