diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index 4db09247b379e..a3707ba136388 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -54,6 +54,7 @@ import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; +import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; import org.elasticsearch.xpack.core.ml.action.FlushJobAction; import org.elasticsearch.xpack.core.ml.action.ForecastJobAction; import org.elasticsearch.xpack.core.ml.action.GetBucketsAction; @@ -264,6 +265,7 @@ public List getClientActions() { GetCalendarEventsAction.INSTANCE, PostCalendarEventsAction.INSTANCE, PersistJobAction.INSTANCE, + FindFileStructureAction.INSTANCE, // security ClearRealmCacheAction.INSTANCE, ClearRolesCacheAction.INSTANCE, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureAction.java new file mode 100644 index 0000000000000..593109381d604 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureAction.java @@ -0,0 +1,189 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.action.Action; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionRequestBuilder; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.client.ElasticsearchClient; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.StatusToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure; + +import java.io.IOException; +import java.util.Objects; + +import static org.elasticsearch.action.ValidateActions.addValidationError; + +public class FindFileStructureAction + extends Action { + + public static final FindFileStructureAction INSTANCE = new FindFileStructureAction(); + public static final String NAME = "cluster:monitor/xpack/ml/findfilestructure"; + + private FindFileStructureAction() { + super(NAME); + } + + @Override + public RequestBuilder newRequestBuilder(ElasticsearchClient client) { + return new RequestBuilder(client, this); + } + + @Override + public Response newResponse() { + return new Response(); + } + + static class RequestBuilder extends ActionRequestBuilder { + + RequestBuilder(ElasticsearchClient client, FindFileStructureAction action) { + super(client, action, new Request()); + } + } + + public static class Response extends ActionResponse implements StatusToXContentObject, Writeable { + + private FileStructure fileStructure; + + public Response(FileStructure fileStructure) { + this.fileStructure = fileStructure; + } + + Response() { + } + + public FileStructure getFileStructure() { + return fileStructure; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + fileStructure = new FileStructure(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + fileStructure.writeTo(out); + } + + @Override + public RestStatus status() { + return RestStatus.OK; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + fileStructure.toXContent(builder, params); + return builder; + } + + @Override + public int hashCode() { + return Objects.hash(fileStructure); + } + + @Override + public boolean equals(Object other) { + + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + FindFileStructureAction.Response that = (FindFileStructureAction.Response) other; + return Objects.equals(fileStructure, that.fileStructure); + } + } + + public static class Request extends ActionRequest { + + public static final ParseField LINES_TO_SAMPLE = new ParseField("lines_to_sample"); + + private Integer linesToSample; + private BytesReference sample; + + public Request() { + } + + public Integer getLinesToSample() { + return linesToSample; + } + + public void setLinesToSample(Integer linesToSample) { + this.linesToSample = linesToSample; + } + + public BytesReference getSample() { + return sample; + } + + public void setSample(BytesReference sample) { + this.sample = sample; + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = null; + if (linesToSample != null && linesToSample <= 0) { + validationException = + addValidationError(LINES_TO_SAMPLE.getPreferredName() + " must be positive if specified", validationException); + } + if (sample == null || sample.length() == 0) { + validationException = addValidationError("sample must be specified", validationException); + } + return validationException; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + linesToSample = in.readOptionalVInt(); + sample = in.readBytesReference(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeOptionalVInt(linesToSample); + out.writeBytesReference(sample); + } + + @Override + public int hashCode() { + return Objects.hash(linesToSample, sample); + } + + @Override + public boolean equals(Object other) { + + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + Request that = (Request) other; + return Objects.equals(this.linesToSample, that.linesToSample) && + Objects.equals(this.sample, that.sample); + } + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java index a09aa522f7f87..8f624d000cc38 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java @@ -6,6 +6,9 @@ package org.elasticsearch.xpack.core.ml.filestructurefinder; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.ConstructingObjectParser; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -16,7 +19,7 @@ import java.util.Map; import java.util.Objects; -public class FieldStats implements ToXContentObject { +public class FieldStats implements ToXContentObject, Writeable { static final ParseField COUNT = new ParseField("count"); static final ParseField CARDINALITY = new ParseField("cardinality"); @@ -64,6 +67,27 @@ public FieldStats(long count, int cardinality, Double minValue, Double maxValue, this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits); } + public FieldStats(StreamInput in) throws IOException { + count = in.readVLong(); + cardinality = in.readVInt(); + minValue = in.readOptionalDouble(); + maxValue = in.readOptionalDouble(); + meanValue = in.readOptionalDouble(); + medianValue = in.readOptionalDouble(); + topHits = in.readList(StreamInput::readMap); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(count); + out.writeVInt(cardinality); + out.writeOptionalDouble(minValue); + out.writeOptionalDouble(maxValue); + out.writeOptionalDouble(meanValue); + out.writeOptionalDouble(medianValue); + out.writeCollection(topHits, StreamOutput::writeMap); + } + public long getCount() { return count; } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructure.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructure.java index 6993737e8547d..5484f9f9902f4 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructure.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructure.java @@ -6,6 +6,9 @@ package org.elasticsearch.xpack.core.ml.filestructurefinder; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -24,7 +27,7 @@ /** * Stores the file format determined by Machine Learning. */ -public class FileStructure implements ToXContentObject { +public class FileStructure implements ToXContentObject, Writeable { public enum Format { @@ -79,6 +82,8 @@ public String toString() { } } + public static final String EXPLAIN = "explain"; + static final ParseField NUM_LINES_ANALYZED = new ParseField("num_lines_analyzed"); static final ParseField NUM_MESSAGES_ANALYZED = new ParseField("num_messages_analyzed"); static final ParseField SAMPLE_START = new ParseField("sample_start"); @@ -176,6 +181,66 @@ public FileStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampl this.explanation = Collections.unmodifiableList(new ArrayList<>(explanation)); } + public FileStructure(StreamInput in) throws IOException { + numLinesAnalyzed = in.readVInt(); + numMessagesAnalyzed = in.readVInt(); + sampleStart = in.readString(); + charset = in.readString(); + hasByteOrderMarker = in.readOptionalBoolean(); + format = in.readEnum(Format.class); + multilineStartPattern = in.readOptionalString(); + excludeLinesPattern = in.readOptionalString(); + inputFields = in.readBoolean() ? Collections.unmodifiableList(in.readList(StreamInput::readString)) : null; + hasHeaderRow = in.readOptionalBoolean(); + delimiter = in.readBoolean() ? (char) in.readVInt() : null; + shouldTrimFields = in.readOptionalBoolean(); + grokPattern = in.readOptionalString(); + timestampFormats = in.readBoolean() ? Collections.unmodifiableList(in.readList(StreamInput::readString)) : null; + timestampField = in.readOptionalString(); + needClientTimezone = in.readBoolean(); + mappings = Collections.unmodifiableSortedMap(new TreeMap<>(in.readMap())); + fieldStats = Collections.unmodifiableSortedMap(new TreeMap<>(in.readMap(StreamInput::readString, FieldStats::new))); + explanation = Collections.unmodifiableList(in.readList(StreamInput::readString)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(numLinesAnalyzed); + out.writeVInt(numMessagesAnalyzed); + out.writeString(sampleStart); + out.writeString(charset); + out.writeOptionalBoolean(hasByteOrderMarker); + out.writeEnum(format); + out.writeOptionalString(multilineStartPattern); + out.writeOptionalString(excludeLinesPattern); + if (inputFields == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeCollection(inputFields, StreamOutput::writeString); + } + out.writeOptionalBoolean(hasHeaderRow); + if (delimiter == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeVInt(delimiter); + } + out.writeOptionalBoolean(shouldTrimFields); + out.writeOptionalString(grokPattern); + if (timestampFormats == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeCollection(timestampFormats, StreamOutput::writeString); + } + out.writeOptionalString(timestampField); + out.writeBoolean(needClientTimezone); + out.writeMap(mappings); + out.writeMap(fieldStats, StreamOutput::writeString, (out1, value) -> value.writeTo(out1)); + out.writeCollection(explanation, StreamOutput::writeString); + } + public int getNumLinesAnalyzed() { return numLinesAnalyzed; } @@ -300,7 +365,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } builder.endObject(); } - builder.field(EXPLANATION.getPreferredName(), explanation); + if (params.paramAsBoolean(EXPLAIN, false)) { + builder.field(EXPLANATION.getPreferredName(), explanation); + } builder.endObject(); return builder; diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionRequestTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionRequestTests.java new file mode 100644 index 0000000000000..05ba0e7f306f4 --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionRequestTests.java @@ -0,0 +1,59 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.test.AbstractStreamableTestCase; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.startsWith; + +public class FindFileStructureActionRequestTests extends AbstractStreamableTestCase { + + @Override + protected FindFileStructureAction.Request createTestInstance() { + + FindFileStructureAction.Request request = new FindFileStructureAction.Request(); + + if (randomBoolean()) { + request.setLinesToSample(randomIntBetween(10, 2000)); + } + request.setSample(new BytesArray(randomByteArrayOfLength(randomIntBetween(1000, 20000)))); + + return request; + } + + @Override + protected FindFileStructureAction.Request createBlankInstance() { + return new FindFileStructureAction.Request(); + } + + public void testValidateLinesToSample() { + + FindFileStructureAction.Request request = new FindFileStructureAction.Request(); + request.setLinesToSample(randomFrom(-1, 0)); + request.setSample(new BytesArray("foo\n")); + + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertThat(e.getMessage(), startsWith("Validation Failed: ")); + assertThat(e.getMessage(), containsString(" lines_to_sample must be positive if specified")); + } + + public void testValidateSample() { + + FindFileStructureAction.Request request = new FindFileStructureAction.Request(); + if (randomBoolean()) { + request.setSample(BytesArray.EMPTY); + } + + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertThat(e.getMessage(), startsWith("Validation Failed: ")); + assertThat(e.getMessage(), containsString(" sample must be specified")); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionResponseTests.java new file mode 100644 index 0000000000000..706ee44a4fd97 --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/FindFileStructureActionResponseTests.java @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.test.AbstractStreamableTestCase; +import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructureTests; + +public class FindFileStructureActionResponseTests extends AbstractStreamableTestCase { + + @Override + protected FindFileStructureAction.Response createTestInstance() { + return new FindFileStructureAction.Response(FileStructureTests.createTestFileStructure()); + } + + @Override + protected FindFileStructureAction.Response createBlankInstance() { + return new FindFileStructureAction.Response(); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java index 2041fb26a6259..30f7c8f5576d0 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java @@ -5,16 +5,18 @@ */ package org.elasticsearch.xpack.core.ml.filestructurefinder; +import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.test.AbstractXContentTestCase; +import org.elasticsearch.test.AbstractSerializingTestCase; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -public class FieldStatsTests extends AbstractXContentTestCase { +public class FieldStatsTests extends AbstractSerializingTestCase { + @Override protected FieldStats createTestInstance() { return createTestFieldStats(); } @@ -51,11 +53,13 @@ static FieldStats createTestFieldStats() { return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits); } - protected FieldStats doParseInstance(XContentParser parser) { - return FieldStats.PARSER.apply(parser, null); + @Override + protected Writeable.Reader instanceReader() { + return FieldStats::new; } - protected boolean supportsUnknownFields() { - return false; + @Override + protected FieldStats doParseInstance(XContentParser parser) { + return FieldStats.PARSER.apply(parser, null); } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructureTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructureTests.java index 5e89a4840b585..6dcf675196508 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructureTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructureTests.java @@ -5,8 +5,10 @@ */ package org.elasticsearch.xpack.core.ml.filestructurefinder; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.test.AbstractXContentTestCase; +import org.elasticsearch.test.AbstractSerializingTestCase; import java.nio.charset.Charset; import java.util.Arrays; @@ -16,9 +18,14 @@ import java.util.Map; import java.util.TreeMap; -public class FileStructureTests extends AbstractXContentTestCase { +public class FileStructureTests extends AbstractSerializingTestCase { + @Override protected FileStructure createTestInstance() { + return createTestFileStructure(); + } + + public static FileStructure createTestFileStructure() { FileStructure.Format format = randomFrom(EnumSet.allOf(FileStructure.Format.class)); @@ -66,24 +73,31 @@ protected FileStructure createTestInstance() { } builder.setMappings(mappings); - //if (randomBoolean()) { + if (randomBoolean()) { Map fieldStats = new TreeMap<>(); for (String field : generateRandomStringArray(5, 20, false, false)) { fieldStats.put(field, FieldStatsTests.createTestFieldStats()); } builder.setFieldStats(fieldStats); - //} + } builder.setExplanation(Arrays.asList(generateRandomStringArray(10, 150, false, false))); return builder.build(); } + @Override + protected Writeable.Reader instanceReader() { + return FileStructure::new; + } + + @Override protected FileStructure doParseInstance(XContentParser parser) { return FileStructure.PARSER.apply(parser, null).build(); } - protected boolean supportsUnknownFields() { - return false; + @Override + protected ToXContent.Params getToXContentParams() { + return new ToXContent.MapParams(Collections.singletonMap(FileStructure.EXPLAIN, "true")); } } diff --git a/x-pack/plugin/ml/qa/ml-with-security/src/test/java/org/elasticsearch/smoketest/MlWithSecurityUserRoleIT.java b/x-pack/plugin/ml/qa/ml-with-security/src/test/java/org/elasticsearch/smoketest/MlWithSecurityUserRoleIT.java index 1c545ee387925..28fb159c0e947 100644 --- a/x-pack/plugin/ml/qa/ml-with-security/src/test/java/org/elasticsearch/smoketest/MlWithSecurityUserRoleIT.java +++ b/x-pack/plugin/ml/qa/ml-with-security/src/test/java/org/elasticsearch/smoketest/MlWithSecurityUserRoleIT.java @@ -31,10 +31,13 @@ public void test() throws IOException { super.test(); // We should have got here if and only if the only ML endpoints in the test were GETs + // or the find_file_structure API, which is also available to the machine_learning_user + // role for (ExecutableSection section : testCandidate.getTestSection().getExecutableSections()) { if (section instanceof DoSection) { if (((DoSection) section).getApiCallSection().getApi().startsWith("xpack.ml.") && - ((DoSection) section).getApiCallSection().getApi().startsWith("xpack.ml.get_") == false) { + ((DoSection) section).getApiCallSection().getApi().startsWith("xpack.ml.get_") == false && + ((DoSection) section).getApiCallSection().getApi().equals("xpack.ml.find_file_structure") == false) { fail("should have failed because of missing role"); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 66e82727bc70a..a3b498bad4770 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -67,6 +67,7 @@ import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; +import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; import org.elasticsearch.xpack.core.ml.action.FlushJobAction; import org.elasticsearch.xpack.core.ml.action.ForecastJobAction; import org.elasticsearch.xpack.core.ml.action.GetBucketsAction; @@ -120,6 +121,7 @@ import org.elasticsearch.xpack.ml.action.TransportDeleteJobAction; import org.elasticsearch.xpack.ml.action.TransportDeleteModelSnapshotAction; import org.elasticsearch.xpack.ml.action.TransportFinalizeJobExecutionAction; +import org.elasticsearch.xpack.ml.action.TransportFindFileStructureAction; import org.elasticsearch.xpack.ml.action.TransportFlushJobAction; import org.elasticsearch.xpack.ml.action.TransportForecastJobAction; import org.elasticsearch.xpack.ml.action.TransportGetBucketsAction; @@ -181,6 +183,7 @@ import org.elasticsearch.xpack.ml.job.process.normalizer.NormalizerProcessFactory; import org.elasticsearch.xpack.ml.notifications.Auditor; import org.elasticsearch.xpack.ml.rest.RestDeleteExpiredDataAction; +import org.elasticsearch.xpack.ml.rest.RestFindFileStructureAction; import org.elasticsearch.xpack.ml.rest.RestMlInfoAction; import org.elasticsearch.xpack.ml.rest.calendar.RestDeleteCalendarAction; import org.elasticsearch.xpack.ml.rest.calendar.RestDeleteCalendarEventAction; @@ -505,7 +508,8 @@ public List getRestHandlers(Settings settings, RestController restC new RestDeleteCalendarJobAction(settings, restController), new RestPutCalendarJobAction(settings, restController), new RestGetCalendarEventsAction(settings, restController), - new RestPostCalendarEventAction(settings, restController) + new RestPostCalendarEventAction(settings, restController), + new RestFindFileStructureAction(settings, restController) ); } @@ -562,7 +566,8 @@ public List getRestHandlers(Settings settings, RestController restC new ActionHandler<>(UpdateCalendarJobAction.INSTANCE, TransportUpdateCalendarJobAction.class), new ActionHandler<>(GetCalendarEventsAction.INSTANCE, TransportGetCalendarEventsAction.class), new ActionHandler<>(PostCalendarEventsAction.INSTANCE, TransportPostCalendarEventsAction.class), - new ActionHandler<>(PersistJobAction.INSTANCE, TransportPersistJobAction.class) + new ActionHandler<>(PersistJobAction.INSTANCE, TransportPersistJobAction.class), + new ActionHandler<>(FindFileStructureAction.INSTANCE, TransportFindFileStructureAction.class) ); } @Override diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFindFileStructureAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFindFileStructureAction.java new file mode 100644 index 0000000000000..96d5ef686b82b --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFindFileStructureAction.java @@ -0,0 +1,54 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.action; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; +import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; +import org.elasticsearch.xpack.ml.MachineLearning; +import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinder; +import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinderManager; + +public class TransportFindFileStructureAction + extends HandledTransportAction { + + @Inject + public TransportFindFileStructureAction(Settings settings, TransportService transportService, ThreadPool threadPool, + ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver) { + super(settings, FindFileStructureAction.NAME, threadPool, transportService, actionFilters, indexNameExpressionResolver, + FindFileStructureAction.Request::new); + } + + @Override + protected void doExecute(FindFileStructureAction.Request request, ActionListener listener) { + + // As determining the file structure might take a while, we run + // in a different thread to avoid blocking the network thread. + threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME).execute(() -> { + try { + listener.onResponse(buildFileStructureResponse(request)); + } catch (Exception e) { + listener.onFailure(e); + } + }); + } + + private FindFileStructureAction.Response buildFileStructureResponse(FindFileStructureAction.Request request) throws Exception { + + FileStructureFinderManager structureFinderManager = new FileStructureFinderManager(); + + FileStructureFinder fileStructureFinder = + structureFinderManager.findFileStructure(request.getLinesToSample(), request.getSample().streamInput()); + + return new FindFileStructureAction.Response(fileStructureFinder.getStructure()); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderManager.java index 983188614d0ca..d0ce68aff25c0 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderManager.java @@ -35,6 +35,7 @@ public final class FileStructureFinderManager { public static final int MIN_SAMPLE_LINE_COUNT = 2; + public static final int DEFAULT_IDEAL_SAMPLE_LINE_COUNT = 1000; static final Set FILEBEAT_SUPPORTED_ENCODINGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( "866", "ansi_x3.4-1968", "arabic", "ascii", "asmo-708", "big5", "big5-hkscs", "chinese", "cn-big5", "cp1250", "cp1251", "cp1252", @@ -82,16 +83,18 @@ public final class FileStructureFinderManager { * Given a stream of data from some file, determine its structure. * @param idealSampleLineCount Ideally, how many lines from the stream will be read to determine the structure? * If the stream has fewer lines then an attempt will still be made, providing at - * least {@link #MIN_SAMPLE_LINE_COUNT} lines can be read. + * least {@link #MIN_SAMPLE_LINE_COUNT} lines can be read. If null + * the value of {@link #DEFAULT_IDEAL_SAMPLE_LINE_COUNT} will be used. * @param fromFile A stream from which the sample will be read. * @return A {@link FileStructureFinder} object from which the structure and messages can be queried. * @throws Exception A variety of problems could occur at various stages of the structure finding process. */ - public FileStructureFinder findLogStructure(int idealSampleLineCount, InputStream fromFile) throws Exception { - return findLogStructure(new ArrayList<>(), idealSampleLineCount, fromFile); + public FileStructureFinder findFileStructure(Integer idealSampleLineCount, InputStream fromFile) throws Exception { + return findFileStructure(new ArrayList<>(), (idealSampleLineCount == null) ? DEFAULT_IDEAL_SAMPLE_LINE_COUNT : idealSampleLineCount, + fromFile); } - public FileStructureFinder findLogStructure(List explanation, int idealSampleLineCount, InputStream fromFile) + public FileStructureFinder findFileStructure(List explanation, int idealSampleLineCount, InputStream fromFile) throws Exception { CharsetMatch charsetMatch = findCharset(explanation, fromFile); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java index 3caa78589ba1b..292d0b8e8b305 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java @@ -445,7 +445,7 @@ public boolean matchesAll(Collection snippets) { @Override public String processCaptures(Map fieldNameCountStore, Collection snippets, Collection prefaces, Collection epilogues, Map mappings, Map fieldStats) { - String sampleValue = null; + Collection values = new ArrayList<>(); for (String snippet : snippets) { Map captures = grok.captures(snippet); // If the pattern doesn't match then captures will be null @@ -453,22 +453,24 @@ public String processCaptures(Map fieldNameCountStore, Collecti throw new IllegalStateException("[%{" + grokPatternName + "}] does not match snippet [" + snippet + "]"); } prefaces.add(captures.getOrDefault(PREFACE, "").toString()); - if (sampleValue == null) { - sampleValue = captures.get(VALUE).toString(); - } + values.add(captures.getOrDefault(VALUE, "").toString()); epilogues.add(captures.getOrDefault(EPILOGUE, "").toString()); } String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName); if (mappings != null) { Map fullMappingType = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType); if ("date".equals(mappingType)) { - TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(sampleValue); + assert values.isEmpty() == false; + TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(values.iterator().next()); if (timestampMatch != null) { fullMappingType = timestampMatch.getEsDateMappingTypeWithFormat(); } } mappings.put(adjustedFieldName, fullMappingType); } + if (fieldStats != null) { + fieldStats.put(adjustedFieldName, FileStructureUtils.calculateFieldStats(values)); + } return "%{" + grokPatternName + ":" + adjustedFieldName + "}"; } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFindFileStructureAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFindFileStructureAction.java new file mode 100644 index 0000000000000..83293c7d60efa --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/RestFindFileStructureAction.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.rest; + +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.client.node.NodeClient; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestController; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.action.RestToXContentListener; +import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; +import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure; +import org.elasticsearch.xpack.ml.MachineLearning; +import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinderManager; + +import java.io.IOException; +import java.util.Collections; +import java.util.Set; + +public class RestFindFileStructureAction extends BaseRestHandler { + + public RestFindFileStructureAction(Settings settings, RestController controller) { + super(settings); + controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "find_file_structure", this); + } + + @Override + public String getName() { + return "xpack_ml_find_file_structure_action"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { + + FindFileStructureAction.Request request = new FindFileStructureAction.Request(); + request.setLinesToSample(restRequest.paramAsInt(FindFileStructureAction.Request.LINES_TO_SAMPLE.getPreferredName(), + FileStructureFinderManager.DEFAULT_IDEAL_SAMPLE_LINE_COUNT)); + if (restRequest.hasContent()) { + request.setSample(restRequest.content()); + } else { + throw new ElasticsearchParseException("request body is required"); + } + + return channel -> client.execute(FindFileStructureAction.INSTANCE, request, new RestToXContentListener<>(channel)); + } + + @Override + protected Set responseParams() { + return Collections.singleton(FileStructure.EXPLAIN); + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.find_file_structure.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.find_file_structure.json new file mode 100644 index 0000000000000..bd41e0c00bca8 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.find_file_structure.json @@ -0,0 +1,25 @@ +{ + "xpack.ml.find_file_structure": { + "documentation": "http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-file-structure.html", + "methods": [ "POST" ], + "url": { + "path": "/_xpack/ml/find_file_structure", + "paths": [ "/_xpack/ml/find_file_structure" ], + "params": { + "lines_to_sample": { + "type": "int", + "description": "Optional parameter to specify how many lines of the file to include in the analysis" + }, + "explain": { + "type": "boolean", + "description": "Optional parameter to include an commentary on how the structure was derived" + } + } + }, + "body": { + "description" : "The contents of the file to be analyzed", + "required" : true, + "serialize" : "bulk" + } + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml new file mode 100644 index 0000000000000..1d164cc0c5afc --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml @@ -0,0 +1,44 @@ +--- +"Test JSON file structure analysis": + - do: + headers: + # This is to stop the usual content type randomization, which + # would obviously ruin the results for this particular test + Content-Type: "application/json" + xpack.ml.find_file_structure: + body: + - airline: AAL + responsetime: 132.2046 + sourcetype: file-structure-test + time: 1403481600 + - airline: JZA + responsetime: 990.4628 + sourcetype: file-structure-test + time: 1403481700 + - airline: AAL + responsetime: 134.2046 + sourcetype: file-structure-test + time: 1403481800 + + - match: { num_lines_analyzed: 3 } + - match: { num_messages_analyzed: 3 } + - match: { charset: "UTF-8" } + - match: { has_byte_order_marker: false } + - match: { format: json } + - match: { timestamp_field: time } + - match: { timestamp_formats.0: UNIX } + - match: { need_client_timezone: false } + - match: { mappings.airline.type: keyword } + - match: { mappings.responsetime.type: double } + - match: { mappings.sourcetype.type: keyword } + - match: { mappings.time.type: date } + - match: { mappings.time.format: epoch_second } + - match: { field_stats.airline.count: 3 } + - match: { field_stats.airline.cardinality: 2 } + - match: { field_stats.responsetime.count: 3 } + - match: { field_stats.responsetime.cardinality: 3 } + - match: { field_stats.sourcetype.count: 3 } + - match: { field_stats.sourcetype.cardinality: 1 } + - match: { field_stats.time.count: 3 } + - match: { field_stats.time.cardinality: 3 } + - match: { field_stats.time.cardinality: 3 }