From 2284725b5034d6e4be7df3c9e6a3048e1447245e Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Mon, 15 Jan 2024 11:59:41 +0100 Subject: [PATCH 01/28] Add extract match ranges functionality to Grok. --- .../java/org/elasticsearch/grok/Grok.java | 24 +++++++-- .../elasticsearch/grok/GrokCaptureConfig.java | 17 +++++++ .../grok/GrokCaptureExtracter.java | 11 +++- .../org/elasticsearch/grok/GrokTests.java | 50 ++++++++++++++++++- 4 files changed, 96 insertions(+), 6 deletions(-) diff --git a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java index 5b9a6f469896..9acaefcd9197 100644 --- a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java @@ -23,6 +23,7 @@ import java.util.Locale; import java.util.Map; import java.util.function.Consumer; +import java.util.function.Function; public final class Grok { @@ -86,7 +87,7 @@ private Grok( expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE, - message -> logCallBack.accept(message) + logCallBack::accept ); List grokCaptureConfigs = new ArrayList<>(); @@ -116,7 +117,7 @@ private static String groupMatch(String name, Region region, String pattern) { * * @return named regex expression */ - protected String toRegex(PatternBank patternBank, String grokPattern) { + String toRegex(PatternBank patternBank, String grokPattern) { StringBuilder res = new StringBuilder(); for (int i = 0; i < MAX_TO_REGEX_ITERATIONS; i++) { byte[] grokPatternBytes = grokPattern.getBytes(StandardCharsets.UTF_8); @@ -189,8 +190,25 @@ public boolean match(String text) { * @return a map containing field names and their respective coerced values that matched or null if the pattern didn't match */ public Map captures(String text) { + return innerCaptures(text, cfg -> cfg::objectExtracter); + } + + /** + * Matches and returns the ranges of any named captures. + * + * @param text the text to match and extract values from. + * @return a map containing field names and their respective ranges that matched or null if the pattern didn't match + */ + public Map captureRanges(String text) { + return innerCaptures(text, cfg -> cfg::rangeExtracter); + } + + private Map innerCaptures( + String text, + Function, GrokCaptureExtracter>> getExtracter + ) { byte[] utf8Bytes = text.getBytes(StandardCharsets.UTF_8); - GrokCaptureExtracter.MapExtracter extracter = new GrokCaptureExtracter.MapExtracter(captureConfig); + GrokCaptureExtracter.MapExtracter extracter = new GrokCaptureExtracter.MapExtracter(captureConfig, getExtracter); if (match(utf8Bytes, 0, utf8Bytes.length, extracter)) { return extracter.result(); } diff --git a/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureConfig.java b/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureConfig.java index 703db401814d..462fe8dff4fe 100644 --- a/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureConfig.java +++ b/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureConfig.java @@ -144,4 +144,21 @@ public interface NativeExtracterMap { */ T forBoolean(Function, GrokCaptureExtracter> buildExtracter); } + + /** + * Creates a {@linkplain GrokCaptureExtracter} that will call {@code emit} with the + * extracted range (offset and length) when it extracts text. + */ + public GrokCaptureExtracter rangeExtracter(Consumer emit) { + return (utf8Bytes, offset, region) -> { + for (int number : backRefs) { + if (region.beg[number] >= 0) { + int matchOffset = offset + region.beg[number]; + int matchLength = region.end[number] - region.beg[number]; + String match = new String(utf8Bytes, matchOffset, matchLength); + emit.accept(new GrokCaptureExtracter.Range(match, matchOffset, matchLength)); + } + } + }; + } } diff --git a/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureExtracter.java b/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureExtracter.java index fa7762d7d6cf..11f907745859 100644 --- a/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureExtracter.java +++ b/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureExtracter.java @@ -14,6 +14,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Function; import static java.util.Collections.emptyMap; @@ -22,6 +24,8 @@ */ public interface GrokCaptureExtracter { + record Range(Object match, int offset, int length) {} + /** * Extract {@link Map} results. This implementation of {@link GrokCaptureExtracter} * is mutable and should be discarded after collecting a single result. @@ -31,11 +35,14 @@ class MapExtracter implements GrokCaptureExtracter { private final List fieldExtracters; @SuppressWarnings("unchecked") - MapExtracter(List captureConfig) { + MapExtracter( + List captureConfig, + Function, GrokCaptureExtracter>> getExtracter + ) { result = captureConfig.isEmpty() ? emptyMap() : new HashMap<>(); fieldExtracters = new ArrayList<>(captureConfig.size()); for (GrokCaptureConfig config : captureConfig) { - fieldExtracters.add(config.objectExtracter(value -> { + fieldExtracters.add(getExtracter.apply(config).apply(value -> { var key = config.name(); // Logstash's Grok processor flattens the list of values to a single value in case there's only 1 match, diff --git a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java index 6c5618f11ec9..41a4a86c4cb8 100644 --- a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java +++ b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java @@ -82,13 +82,61 @@ private void testCapturesBytes(boolean ecsCompatibility) { } private Map captureBytes(Grok grok, byte[] utf8, int offset, int length) { - GrokCaptureExtracter.MapExtracter extracter = new GrokCaptureExtracter.MapExtracter(grok.captureConfig()); + GrokCaptureExtracter.MapExtracter extracter = new GrokCaptureExtracter.MapExtracter( + grok.captureConfig(), + cfg -> cfg::objectExtracter + ); if (grok.match(utf8, offset, length, extracter)) { return extracter.result(); } return null; } + public void testCaptureRanges() { + captureRanges(false); + captureRanges(true); + } + + private void captureRanges(boolean ecsCompatibility) { + Grok grok = new Grok(GrokBuiltinPatterns.get(ecsCompatibility), "%{WORD:a} %{WORD:b} %{NUMBER:c:int}", logger::warn); + assertThat( + grok.captureRanges("xx aaaaa bbb 1234 yyy"), + equalTo( + Map.of( + "a", + new GrokCaptureExtracter.Range("aaaaa", 3, 5), + "b", + new GrokCaptureExtracter.Range("bbb", 9, 3), + "c", + new GrokCaptureExtracter.Range("1234", 13, 4) + ) + ) + ); + } + + public void testCaptureRanges_noMatch() { + captureRanges_noMatch(false); + captureRanges_noMatch(true); + } + + private void captureRanges_noMatch(boolean ecsCompatibility) { + Grok grok = new Grok(GrokBuiltinPatterns.get(ecsCompatibility), "%{WORD:a} %{WORD:b} %{NUMBER:c:int}", logger::warn); + assertNull(grok.captureRanges("xx aaaaa bbb ccc yyy")); + } + + public void testCaptureRanges_multipleNamedCapturesWithSameName() { + captureRanges_multipleNamedCapturesWithSameName(false); + captureRanges_multipleNamedCapturesWithSameName(true); + } + + private void captureRanges_multipleNamedCapturesWithSameName(boolean ecsCompatibility) { + Grok grok = new Grok(GrokBuiltinPatterns.get(ecsCompatibility), "%{WORD:parts} %{WORD:parts}", logger::warn); + assertThat( + grok.captureRanges(" aa bbb c ddd e "), + equalTo(Map.of("parts", List.of(new GrokCaptureExtracter.Range("aa", 2, 2), new GrokCaptureExtracter.Range("bbb", 5, 3)))) + ); + } + public void testNoMatchingPatternInDictionary() { Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(PatternBank.EMPTY, "%{NOTFOUND}", logger::warn)); assertThat(e.getMessage(), equalTo("Unable to find pattern [NOTFOUND] in Grok's pattern dictionary")); From 3e2057e3a9ad68b435539d5e1a6b638c5f214c50 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Tue, 16 Jan 2024 09:13:56 +0100 Subject: [PATCH 02/28] TestGrokPatternAction and Request --- .../action/TestGrokPatternAction.java | 147 ++++++++++++++++++ .../textstructure/TextStructurePlugin.java | 11 +- .../rest/RestTestGrokPatternAction.java | 44 ++++++ .../TransportTestGrokPatternAction.java | 40 +++++ 4 files changed, 239 insertions(+), 3 deletions(-) create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java create mode 100644 x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java create mode 100644 x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java new file mode 100644 index 000000000000..adaca096ed7c --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java @@ -0,0 +1,147 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.textstructure.action; + +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.xcontent.ObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.action.ValidateActions.addValidationError; + +public class TestGrokPatternAction extends ActionType { + + public static final TestGrokPatternAction INSTANCE = new TestGrokPatternAction(); + public static final String NAME = "cluster:monitor/text_structure/testgrokpattern"; + + private TestGrokPatternAction() { + super(NAME, Response::new); + } + + public static class Request extends ActionRequest { + + public static final ParseField GROK_PATTERN = new ParseField("grok_pattern"); + public static final ParseField TEXTS = new ParseField("texts"); + + private static final ObjectParser PARSER = createParser(); + + private static ObjectParser createParser() { + ObjectParser parser = new ObjectParser<>("textstructure/testgrokpattern", false, Request.Builder::new); + parser.declareString(Request.Builder::grokPattern, GROK_PATTERN); + parser.declareStringArray(Request.Builder::texts, TEXTS); + return parser; + } + + public static class Builder { + private String grokPattern; + private List texts; + + public void grokPattern(String grokPattern) { + this.grokPattern = grokPattern; + } + + public void texts(List texts) { + this.texts = texts; + } + + public Request build() { + return new Request(grokPattern, texts); + } + } + + private final String grokPattern; + private final List texts; + + private Request(String grokPattern, List texts) { + this.grokPattern = grokPattern; + this.texts = texts; + } + + public Request(StreamInput in) throws IOException { + super(in); + grokPattern = in.readString(); + texts = in.readStringCollectionAsList(); + } + + public static Request parseRequest(XContentParser parser) throws IOException { + return PARSER.parse(parser, null).build(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(grokPattern); + out.writeStringCollection(texts); + } + + public String getGrokPattern() { + return grokPattern; + } + + public List getTexts() { + return texts; + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = null; + if (grokPattern == null) { + validationException = addValidationError("[" + GROK_PATTERN.getPreferredName() + "] missing.", validationException); + } + if (texts == null || texts.isEmpty()) { + validationException = addValidationError("[" + TEXTS.getPreferredName() + "] missing or empty.", validationException); + } + return validationException; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Request request = (Request) o; + return Objects.equals(grokPattern, request.grokPattern) && Objects.equals(texts, request.texts); + } + + @Override + public int hashCode() { + return Objects.hash(grokPattern, texts); + } + + @Override + public String toString() { + return "Request{" + "grokPattern='" + grokPattern + '\'' + ", texts=" + texts + '}'; + } + } + + public static class Response extends ActionResponse implements ToXContentObject, Writeable { + + Response(StreamInput in) throws IOException { + super(in); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException {} + } +} diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/TextStructurePlugin.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/TextStructurePlugin.java index c81aa45581b8..b7d9117a3f9d 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/TextStructurePlugin.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/TextStructurePlugin.java @@ -21,8 +21,11 @@ import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; import org.elasticsearch.xpack.core.textstructure.action.FindStructureAction; +import org.elasticsearch.xpack.core.textstructure.action.TestGrokPatternAction; import org.elasticsearch.xpack.textstructure.rest.RestFindStructureAction; +import org.elasticsearch.xpack.textstructure.rest.RestTestGrokPatternAction; import org.elasticsearch.xpack.textstructure.transport.TransportFindStructureAction; +import org.elasticsearch.xpack.textstructure.transport.TransportTestGrokPatternAction; import java.util.Arrays; import java.util.List; @@ -47,12 +50,14 @@ public List getRestHandlers( IndexNameExpressionResolver indexNameExpressionResolver, Supplier nodesInCluster ) { - return Arrays.asList(new RestFindStructureAction()); + return Arrays.asList(new RestFindStructureAction(), new RestTestGrokPatternAction()); } @Override public List> getActions() { - return Arrays.asList(new ActionHandler<>(FindStructureAction.INSTANCE, TransportFindStructureAction.class)); + return Arrays.asList( + new ActionHandler<>(FindStructureAction.INSTANCE, TransportFindStructureAction.class), + new ActionHandler<>(TestGrokPatternAction.INSTANCE, TransportTestGrokPatternAction.class) + ); } - } diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java new file mode 100644 index 000000000000..9e97c6c4d4a1 --- /dev/null +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java @@ -0,0 +1,44 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.textstructure.rest; + +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.action.RestToXContentListener; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xpack.core.textstructure.action.TestGrokPatternAction; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.GET; +import static org.elasticsearch.rest.RestRequest.Method.POST; +import static org.elasticsearch.xpack.textstructure.TextStructurePlugin.BASE_PATH; + +public class RestTestGrokPatternAction extends BaseRestHandler { + + @Override + public String getName() { + return "text_structure_test_grok_pattern_action"; + } + + @Override + public List routes() { + return List.of(new Route(GET, BASE_PATH + "test_grok_pattern"), new Route(POST, BASE_PATH + "test_grok_pattern")); + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { + TestGrokPatternAction.Request request; + try (XContentParser parser = restRequest.contentParser()) { + request = TestGrokPatternAction.Request.parseRequest(parser); + } + return channel -> client.execute(TestGrokPatternAction.INSTANCE, request, new RestToXContentListener<>(channel)); + } +} diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java new file mode 100644 index 000000000000..42f1c2bbf6d4 --- /dev/null +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.textstructure.transport; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; +import org.elasticsearch.xpack.core.textstructure.action.TestGrokPatternAction; + +public class TransportTestGrokPatternAction extends HandledTransportAction { + + private final ThreadPool threadPool; + + @Inject + public TransportTestGrokPatternAction(TransportService transportService, ActionFilters actionFilters, ThreadPool threadPool) { + super( + TestGrokPatternAction.NAME, + transportService, + actionFilters, + TestGrokPatternAction.Request::new, + EsExecutors.DIRECT_EXECUTOR_SERVICE + ); + this.threadPool = threadPool; + } + + @Override + protected void doExecute(Task task, TestGrokPatternAction.Request request, ActionListener listener) { + listener.onFailure(new Exception("not implemented yet! request=" + request)); + } +} From 4c925ddacc81a667160385ea90383a44a5c1f7a3 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Tue, 16 Jan 2024 11:25:18 +0100 Subject: [PATCH 03/28] TestGrokPattern response --- .../grok/GrokCaptureExtracter.java | 4 +- .../action/TestGrokPatternAction.java | 48 ++++++++++++++++++- .../TransportTestGrokPatternAction.java | 29 ++++++++++- 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureExtracter.java b/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureExtracter.java index 11f907745859..415348be05a7 100644 --- a/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureExtracter.java +++ b/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureExtracter.java @@ -11,7 +11,7 @@ import org.joni.Region; import java.util.ArrayList; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.function.Consumer; @@ -39,7 +39,7 @@ class MapExtracter implements GrokCaptureExtracter { List captureConfig, Function, GrokCaptureExtracter>> getExtracter ) { - result = captureConfig.isEmpty() ? emptyMap() : new HashMap<>(); + result = captureConfig.isEmpty() ? emptyMap() : new LinkedHashMap<>(); fieldExtracters = new ArrayList<>(captureConfig.size()); for (GrokCaptureConfig config : captureConfig) { fieldExtracters.add(getExtracter.apply(config).apply(value -> { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java index adaca096ed7c..5e18d4d3eede 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.grok.GrokCaptureExtracter; import org.elasticsearch.xcontent.ObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContentObject; @@ -22,6 +23,7 @@ import java.io.IOException; import java.util.List; +import java.util.Map; import java.util.Objects; import static org.elasticsearch.action.ValidateActions.addValidationError; @@ -132,16 +134,58 @@ public String toString() { public static class Response extends ActionResponse implements ToXContentObject, Writeable { - Response(StreamInput in) throws IOException { + private final List> ranges; + + public Response(List> ranges) { + this.ranges = ranges; + } + + public Response(StreamInput in) throws IOException { super(in); + ranges = in.readCollectionAsList(StreamInput::readGenericMap); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startArray("matches"); + for (Map ranges : ranges) { + if (ranges == null) { + builder.nullValue(); + } else { + builder.startObject(); + for (Map.Entry rangeOrList : ranges.entrySet()) { + if (rangeOrList.getValue() instanceof GrokCaptureExtracter.Range) { + GrokCaptureExtracter.Range range = (GrokCaptureExtracter.Range) rangeOrList.getValue(); + builder.startObject(rangeOrList.getKey()); + builder.field("match", range.match()); + builder.field("offset", range.offset()); + builder.field("length", range.length()); + builder.endObject(); + } else if (rangeOrList.getValue() instanceof List) { + builder.startArray(rangeOrList.getKey()); + for (Object rangeObject : (List) rangeOrList.getValue()) { + GrokCaptureExtracter.Range range = (GrokCaptureExtracter.Range) rangeObject; + builder.startObject(); + builder.field("match", range.match()); + builder.field("offset", range.offset()); + builder.field("length", range.length()); + builder.endObject(); + } + builder.endArray(); + } + } + builder.endObject(); + } + } + builder.endArray(); + builder.endObject(); return builder; } @Override - public void writeTo(StreamOutput out) throws IOException {} + public void writeTo(StreamOutput out) throws IOException { + out.writeGenericList(ranges, StreamOutput::writeGenericMap); + } } } diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java index 42f1c2bbf6d4..614f891d4d78 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java @@ -7,18 +7,28 @@ package org.elasticsearch.xpack.textstructure.transport; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.HandledTransportAction; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.grok.Grok; +import org.elasticsearch.grok.GrokBuiltinPatterns; import org.elasticsearch.tasks.Task; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.textstructure.action.TestGrokPatternAction; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + public class TransportTestGrokPatternAction extends HandledTransportAction { + private static final Logger logger = LogManager.getLogger(TransportTestGrokPatternAction.class); + private final ThreadPool threadPool; @Inject @@ -35,6 +45,23 @@ public TransportTestGrokPatternAction(TransportService transportService, ActionF @Override protected void doExecute(Task task, TestGrokPatternAction.Request request, ActionListener listener) { - listener.onFailure(new Exception("not implemented yet! request=" + request)); + // As determining the text structure might take a while, we run + // in a different thread to avoid blocking the network thread. + threadPool.generic().execute(() -> { + try { + listener.onResponse(getResponse(request)); + } catch (Exception e) { + listener.onFailure(e); + } + }); + } + + private TestGrokPatternAction.Response getResponse(TestGrokPatternAction.Request request) { + Grok grok = new Grok(GrokBuiltinPatterns.get(true), request.getGrokPattern(), logger::warn); + List> ranges = new ArrayList<>(); + for (String text : request.getTexts()) { + ranges.add(grok.captureRanges(text)); + } + return new TestGrokPatternAction.Response(ranges); } } From 84d5b77e0843ba84ef50078b576e9f2863c2e3ac Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Tue, 16 Jan 2024 12:29:41 +0100 Subject: [PATCH 04/28] Update docs/changelog/104394.yaml --- docs/changelog/104394.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/104394.yaml diff --git a/docs/changelog/104394.yaml b/docs/changelog/104394.yaml new file mode 100644 index 000000000000..a9bd4f0ac375 --- /dev/null +++ b/docs/changelog/104394.yaml @@ -0,0 +1,5 @@ +pr: 104394 +summary: Endpoint to test Grok pattern +area: Machine Learning +type: enhancement +issues: [] From 6ed27d238d43c163279127eccd0784ddeb05d73b Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Tue, 16 Jan 2024 12:34:25 +0100 Subject: [PATCH 05/28] Polish validation error message --- .../core/textstructure/action/TestGrokPatternAction.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java index 5e18d4d3eede..2f0ecd7734ac 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java @@ -105,10 +105,10 @@ public List getTexts() { public ActionRequestValidationException validate() { ActionRequestValidationException validationException = null; if (grokPattern == null) { - validationException = addValidationError("[" + GROK_PATTERN.getPreferredName() + "] missing.", validationException); + validationException = addValidationError("[" + GROK_PATTERN.getPreferredName() + "] missing", validationException); } - if (texts == null || texts.isEmpty()) { - validationException = addValidationError("[" + TEXTS.getPreferredName() + "] missing or empty.", validationException); + if (texts == null) { + validationException = addValidationError("[" + TEXTS.getPreferredName() + "] missing", validationException); } return validationException; } From 83db25ceb8dbbb14e413f5ed0023a62fb7a8463d Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Wed, 17 Jan 2024 09:29:18 +0100 Subject: [PATCH 06/28] Improve test_grok_pattern API --- .../action/TestGrokPatternAction.java | 45 ++++++++++--------- .../TransportTestGrokPatternAction.java | 2 +- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java index 2f0ecd7734ac..ea9f3bc9b0ca 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java @@ -40,46 +40,46 @@ private TestGrokPatternAction() { public static class Request extends ActionRequest { public static final ParseField GROK_PATTERN = new ParseField("grok_pattern"); - public static final ParseField TEXTS = new ParseField("texts"); + public static final ParseField TEXT = new ParseField("text"); private static final ObjectParser PARSER = createParser(); private static ObjectParser createParser() { ObjectParser parser = new ObjectParser<>("textstructure/testgrokpattern", false, Request.Builder::new); parser.declareString(Request.Builder::grokPattern, GROK_PATTERN); - parser.declareStringArray(Request.Builder::texts, TEXTS); + parser.declareStringArray(Request.Builder::texts, TEXT); return parser; } public static class Builder { private String grokPattern; - private List texts; + private List text; public void grokPattern(String grokPattern) { this.grokPattern = grokPattern; } - public void texts(List texts) { - this.texts = texts; + public void texts(List text) { + this.text = text; } public Request build() { - return new Request(grokPattern, texts); + return new Request(grokPattern, text); } } private final String grokPattern; - private final List texts; + private final List text; - private Request(String grokPattern, List texts) { + private Request(String grokPattern, List text) { this.grokPattern = grokPattern; - this.texts = texts; + this.text = text; } public Request(StreamInput in) throws IOException { super(in); grokPattern = in.readString(); - texts = in.readStringCollectionAsList(); + text = in.readStringCollectionAsList(); } public static Request parseRequest(XContentParser parser) throws IOException { @@ -90,15 +90,15 @@ public static Request parseRequest(XContentParser parser) throws IOException { public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); out.writeString(grokPattern); - out.writeStringCollection(texts); + out.writeStringCollection(text); } public String getGrokPattern() { return grokPattern; } - public List getTexts() { - return texts; + public List getText() { + return text; } @Override @@ -107,8 +107,8 @@ public ActionRequestValidationException validate() { if (grokPattern == null) { validationException = addValidationError("[" + GROK_PATTERN.getPreferredName() + "] missing", validationException); } - if (texts == null) { - validationException = addValidationError("[" + TEXTS.getPreferredName() + "] missing", validationException); + if (text == null) { + validationException = addValidationError("[" + TEXT.getPreferredName() + "] missing", validationException); } return validationException; } @@ -118,17 +118,17 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Request request = (Request) o; - return Objects.equals(grokPattern, request.grokPattern) && Objects.equals(texts, request.texts); + return Objects.equals(grokPattern, request.grokPattern) && Objects.equals(text, request.text); } @Override public int hashCode() { - return Objects.hash(grokPattern, texts); + return Objects.hash(grokPattern, text); } @Override public String toString() { - return "Request{" + "grokPattern='" + grokPattern + '\'' + ", texts=" + texts + '}'; + return "Request{" + "grokPattern='" + grokPattern + '\'' + ", text=" + text + '}'; } } @@ -150,10 +150,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); builder.startArray("matches"); for (Map ranges : ranges) { - if (ranges == null) { - builder.nullValue(); - } else { - builder.startObject(); + builder.startObject(); + builder.field("matched", ranges != null); + if (ranges != null) { + builder.startObject("fields"); for (Map.Entry rangeOrList : ranges.entrySet()) { if (rangeOrList.getValue() instanceof GrokCaptureExtracter.Range) { GrokCaptureExtracter.Range range = (GrokCaptureExtracter.Range) rangeOrList.getValue(); @@ -177,6 +177,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } builder.endObject(); } + builder.endObject(); } builder.endArray(); builder.endObject(); diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java index 614f891d4d78..b94365349fd7 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java @@ -59,7 +59,7 @@ protected void doExecute(Task task, TestGrokPatternAction.Request request, Actio private TestGrokPatternAction.Response getResponse(TestGrokPatternAction.Request request) { Grok grok = new Grok(GrokBuiltinPatterns.get(true), request.getGrokPattern(), logger::warn); List> ranges = new ArrayList<>(); - for (String text : request.getTexts()) { + for (String text : request.getText()) { ranges.add(grok.captureRanges(text)); } return new TestGrokPatternAction.Response(ranges); From 05f96581cae0ebd7ac775e7f2091b4ebf5245cd7 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Wed, 17 Jan 2024 09:43:19 +0100 Subject: [PATCH 07/28] Add explicit CharSet --- .../src/main/java/org/elasticsearch/grok/GrokCaptureConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureConfig.java b/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureConfig.java index 462fe8dff4fe..3b10cffebebb 100644 --- a/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureConfig.java +++ b/libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureConfig.java @@ -155,7 +155,7 @@ public GrokCaptureExtracter rangeExtracter(Consumer emit) { if (region.beg[number] >= 0) { int matchOffset = offset + region.beg[number]; int matchLength = region.end[number] - region.beg[number]; - String match = new String(utf8Bytes, matchOffset, matchLength); + String match = new String(utf8Bytes, matchOffset, matchLength, StandardCharsets.UTF_8); emit.accept(new GrokCaptureExtracter.Range(match, matchOffset, matchLength)); } } From 9daa1b615e897f6bc440be49759ad78da9ca9ef4 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Wed, 17 Jan 2024 10:29:50 +0100 Subject: [PATCH 08/28] Add endpoint to operator constants --- .../textstructure/action/TestGrokPatternAction.java | 10 ++++++---- .../xpack/security/operator/Constants.java | 1 + .../transport/TransportTestGrokPatternAction.java | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java index ea9f3bc9b0ca..c8141ddb265f 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java @@ -31,7 +31,7 @@ public class TestGrokPatternAction extends ActionType { public static final TestGrokPatternAction INSTANCE = new TestGrokPatternAction(); - public static final String NAME = "cluster:monitor/text_structure/testgrokpattern"; + public static final String NAME = "cluster:monitor/text_structure/test_grok_pattern"; private TestGrokPatternAction() { super(NAME, Response::new); @@ -47,7 +47,7 @@ public static class Request extends ActionRequest { private static ObjectParser createParser() { ObjectParser parser = new ObjectParser<>("textstructure/testgrokpattern", false, Request.Builder::new); parser.declareString(Request.Builder::grokPattern, GROK_PATTERN); - parser.declareStringArray(Request.Builder::texts, TEXT); + parser.declareStringArray(Request.Builder::text, TEXT); return parser; } @@ -55,12 +55,14 @@ public static class Builder { private String grokPattern; private List text; - public void grokPattern(String grokPattern) { + public Builder grokPattern(String grokPattern) { this.grokPattern = grokPattern; + return this; } - public void texts(List text) { + public Builder text(List text) { this.text = text; + return this; } public Request build() { diff --git a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java index cab0c2bff28f..eac3d8bf928c 100644 --- a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java +++ b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java @@ -340,6 +340,7 @@ public class Constants { "cluster:monitor/task/get", "cluster:monitor/tasks/lists", "cluster:monitor/text_structure/findstructure", + "cluster:monitor/text_structure/test_grok_pattern", "cluster:monitor/transform/get", "cluster:monitor/transform/stats/get", "cluster:monitor/xpack/analytics/stats", diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java index b94365349fd7..65007371e927 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java @@ -45,7 +45,7 @@ public TransportTestGrokPatternAction(TransportService transportService, ActionF @Override protected void doExecute(Task task, TestGrokPatternAction.Request request, ActionListener listener) { - // As determining the text structure might take a while, we run + // As matching a regular expression might take a while, we run // in a different thread to avoid blocking the network thread. threadPool.generic().execute(() -> { try { From 5d16a041ab55665cbd9d4ae348d4adf311aeb9b7 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Wed, 17 Jan 2024 10:50:52 +0100 Subject: [PATCH 09/28] Add TransportTestGrokPatternActionTests --- .../TransportTestGrokPatternActionTests.java | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java diff --git a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java new file mode 100644 index 000000000000..663b983dc140 --- /dev/null +++ b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java @@ -0,0 +1,69 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.textstructure.transport; + +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.MockUtils; +import org.elasticsearch.transport.TransportService; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xpack.core.textstructure.action.TestGrokPatternAction; +import org.elasticsearch.xpack.core.watcher.support.xcontent.XContentSource; + +import java.util.List; + +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.mockito.Mockito.mock; + +public class TransportTestGrokPatternActionTests extends ESTestCase { + + private XContentSource executeRequest(TestGrokPatternAction.Request request) throws Exception { + TransportService service = MockUtils.setupTransportServiceWithThreadpoolExecutor(); + TransportTestGrokPatternAction action = new TransportTestGrokPatternAction( + service, + mock(ActionFilters.class), + service.getThreadPool() + ); + + PlainActionFuture future = new PlainActionFuture<>(); + action.doExecute(mock(Task.class), request, future); + try (XContentBuilder builder = XContentFactory.jsonBuilder()) { + future.get().toXContent(builder, ToXContent.EMPTY_PARAMS); + return new XContentSource(builder); + } + } + + public void test() throws Exception { + TestGrokPatternAction.Request request = new TestGrokPatternAction.Request.Builder().grokPattern( + "%{WORD}.*%{WORD:first_name} %{WORD:last_name}!" + ).text(List.of("Hello Dave Roberts!", "this does not match")).build(); + + XContentSource source = executeRequest(request); + assertThat(source.getValue(""), aMapWithSize(1)); + assertThat(source.getValue("matches"), hasSize(2)); + assertThat(source.getValue("matches.0"), aMapWithSize(2)); + assertThat(source.getValue("matches.0.matched"), equalTo(true)); + assertThat(source.getValue("matches.0.fields"), aMapWithSize(2)); + assertThat(source.getValue("matches.0.fields.first_name"), aMapWithSize(3)); + assertThat(source.getValue("matches.0.fields.first_name.match"), equalTo("Dave")); + assertThat(source.getValue("matches.0.fields.first_name.offset"), equalTo(8)); + assertThat(source.getValue("matches.0.fields.first_name.length"), equalTo(4)); + assertThat(source.getValue("matches.0.fields.last_name"), aMapWithSize(3)); + assertThat(source.getValue("matches.0.fields.last_name.match"), equalTo("Roberts")); + assertThat(source.getValue("matches.0.fields.last_name.offset"), equalTo(13)); + assertThat(source.getValue("matches.0.fields.last_name.length"), equalTo(7)); + assertThat(source.getValue("matches.1"), aMapWithSize(1)); + assertThat(source.getValue("matches.1.matched"), equalTo(false)); + } +} From e1b739453cd9b8f8ea245bc3cb22a74997f44ba0 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Wed, 17 Jan 2024 17:07:39 +0100 Subject: [PATCH 10/28] REST API spec --- .../api/text_structure.test_grok_pattern.json | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json b/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json new file mode 100644 index 000000000000..a1a6fb93f11a --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json @@ -0,0 +1,29 @@ +{ + "text_structure.test_grok_pattern":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/test-grok-pattern-api.html", + "description":"Tests a Grok pattern on some text." + }, + "stability":"experimental", + "visibility":"public", + "headers":{ + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url":{ + "paths":[ + { + "path":"/_text_structure/test_grok_pattern", + "methods":[ + "GET", + "POST" + ], + "body":{ + "description":"The Grok pattern and text.", + "required":true + } + } + ] + } + } +} From b18bd1555f9b421e7211e6b93ea7789e15fc8542 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Wed, 17 Jan 2024 17:19:04 +0100 Subject: [PATCH 11/28] One more TransportTestGrokPatternActionTest --- .../TransportTestGrokPatternActionTests.java | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java index 663b983dc140..8a39cad8a909 100644 --- a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java +++ b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java @@ -66,4 +66,28 @@ public void test() throws Exception { assertThat(source.getValue("matches.1"), aMapWithSize(1)); assertThat(source.getValue("matches.1.matched"), equalTo(false)); } + + public void test_repeatedIdentifiers() throws Exception { + TestGrokPatternAction.Request request = new TestGrokPatternAction.Request.Builder().grokPattern( + "%{WORD}.*%{WORD:name} %{WORD:name}!" + ).text(List.of("Hello Dave Roberts!", "this does not match")).build(); + + XContentSource source = executeRequest(request); + assertThat(source.getValue(""), aMapWithSize(1)); + assertThat(source.getValue("matches"), hasSize(2)); + assertThat(source.getValue("matches.0"), aMapWithSize(2)); + assertThat(source.getValue("matches.0.matched"), equalTo(true)); + assertThat(source.getValue("matches.0.fields"), aMapWithSize(1)); + assertThat(source.getValue("matches.0.fields.name"), hasSize(2)); + assertThat(source.getValue("matches.0.fields.name.0"), aMapWithSize(3)); + assertThat(source.getValue("matches.0.fields.name.0.match"), equalTo("Dave")); + assertThat(source.getValue("matches.0.fields.name.0.offset"), equalTo(8)); + assertThat(source.getValue("matches.0.fields.name.0.length"), equalTo(4)); + assertThat(source.getValue("matches.0.fields.name.1"), aMapWithSize(3)); + assertThat(source.getValue("matches.0.fields.name.1.match"), equalTo("Roberts")); + assertThat(source.getValue("matches.0.fields.name.1.offset"), equalTo(13)); + assertThat(source.getValue("matches.0.fields.name.1.length"), equalTo(7)); + assertThat(source.getValue("matches.1"), aMapWithSize(1)); + assertThat(source.getValue("matches.1.matched"), equalTo(false)); + } } From 6c22cef6badf3ddfb564ab11afe71042a4214db7 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Wed, 17 Jan 2024 17:29:11 +0100 Subject: [PATCH 12/28] Fix API spec --- .../api/text_structure.test_grok_pattern.json | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json b/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json index a1a6fb93f11a..1a1ff915c869 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json @@ -10,20 +10,20 @@ "accept": ["application/json"], "content_type": ["application/json"] }, - "url":{ - "paths":[ + "url": { + "paths": [ { - "path":"/_text_structure/test_grok_pattern", - "methods":[ + "path": "/_text_structure/test_grok_pattern", + "methods": [ "GET", "POST" - ], - "body":{ - "description":"The Grok pattern and text.", - "required":true - } + ] } ] + }, + "body":{ + "description":"The Grok pattern and text.", + "required":true } } } From 21004c9fd2941ffc515e9c3cc3526c30ff5cb8c9 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Wed, 17 Jan 2024 17:50:09 +0100 Subject: [PATCH 13/28] Refactor REST API spec --- .../api/text_structure.test_grok_pattern.json | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json b/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json index 1a1ff915c869..92cfae5e1b89 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json @@ -1,12 +1,12 @@ { - "text_structure.test_grok_pattern":{ - "documentation":{ - "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/test-grok-pattern-api.html", - "description":"Tests a Grok pattern on some text." + "text_structure.test_grok_pattern": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/test-grok-pattern-api.html", + "description": "Tests a Grok pattern on some text." }, - "stability":"experimental", - "visibility":"public", - "headers":{ + "stability": "experimental", + "visibility": "public", + "headers": { "accept": ["application/json"], "content_type": ["application/json"] }, @@ -14,16 +14,13 @@ "paths": [ { "path": "/_text_structure/test_grok_pattern", - "methods": [ - "GET", - "POST" - ] + "methods": ["GET", "POST"] } ] }, - "body":{ - "description":"The Grok pattern and text.", - "required":true + "body": { + "description": "The Grok pattern and text.", + "required": true } } } From 018614b4bdab76b86da21ae1f863c933c944f7db Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Thu, 18 Jan 2024 13:46:37 +0100 Subject: [PATCH 14/28] Polish code --- docs/changelog/104394.yaml | 2 +- .../api/text_structure.test_grok_pattern.json | 2 +- .../action/TestGrokPatternAction.java | 14 ++++---------- .../transport/TransportTestGrokPatternAction.java | 2 +- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/docs/changelog/104394.yaml b/docs/changelog/104394.yaml index a9bd4f0ac375..39fbfc0c4ea2 100644 --- a/docs/changelog/104394.yaml +++ b/docs/changelog/104394.yaml @@ -1,5 +1,5 @@ pr: 104394 -summary: Endpoint to test Grok pattern +summary: Endpoint to find positions of Grok pattern matches area: Machine Learning type: enhancement issues: [] diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json b/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json index 92cfae5e1b89..eb14e880bb4b 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json @@ -4,7 +4,7 @@ "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/test-grok-pattern-api.html", "description": "Tests a Grok pattern on some text." }, - "stability": "experimental", + "stability": "stable", "visibility": "public", "headers": { "accept": ["application/json"], diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java index c8141ddb265f..9c7a097dc6b6 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java @@ -20,6 +20,7 @@ import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xpack.core.transform.utils.ExceptionsHelper; import java.io.IOException; import java.util.List; @@ -74,8 +75,8 @@ public Request build() { private final List text; private Request(String grokPattern, List text) { - this.grokPattern = grokPattern; - this.text = text; + this.grokPattern = ExceptionsHelper.requireNonNull(grokPattern, GROK_PATTERN.getPreferredName()); + this.text = ExceptionsHelper.requireNonNull(text, TEXT.getPreferredName()); } public Request(StreamInput in) throws IOException { @@ -105,14 +106,7 @@ public List getText() { @Override public ActionRequestValidationException validate() { - ActionRequestValidationException validationException = null; - if (grokPattern == null) { - validationException = addValidationError("[" + GROK_PATTERN.getPreferredName() + "] missing", validationException); - } - if (text == null) { - validationException = addValidationError("[" + TEXT.getPreferredName() + "] missing", validationException); - } - return validationException; + return null; } @Override diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java index 65007371e927..f7250a9b213c 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java @@ -57,7 +57,7 @@ protected void doExecute(Task task, TestGrokPatternAction.Request request, Actio } private TestGrokPatternAction.Response getResponse(TestGrokPatternAction.Request request) { - Grok grok = new Grok(GrokBuiltinPatterns.get(true), request.getGrokPattern(), logger::warn); + Grok grok = new Grok(GrokBuiltinPatterns.get(true), request.getGrokPattern(), logger::debug); List> ranges = new ArrayList<>(); for (String text : request.getText()) { ranges.add(grok.captureRanges(text)); From 4ee8ffb8b63ba0a0c725ecfb77fe801d8cc0a2cb Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Thu, 18 Jan 2024 15:39:16 +0100 Subject: [PATCH 15/28] Replace TransportTestGrokPatternActionTests by a YAML REST test --- .../test/text_structure/test_grok_pattern.yml | 42 +++++++++ .../TransportTestGrokPatternActionTests.java | 93 ------------------- 2 files changed, 42 insertions(+), 93 deletions(-) create mode 100644 x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml delete mode 100644 x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml new file mode 100644 index 000000000000..7d0359d25df1 --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml @@ -0,0 +1,42 @@ +"Grok pattern with two named fields": + - do: + text_structure.test_grok_pattern: + body: + grok_pattern: "%{WORD}.*%{WORD:first_name} %{WORD:last_name}!" + text: + - "Hello Dave Roberts!" + - "this does not match" + - match: + matches: + - matched: true + fields: + first_name: + match: Dave + offset: 8 + length: 4 + last_name: + match: Roberts + offset: 13 + length: 7 + - matched: false +--- +"Grok pattern with two identically named fields": + - do: + text_structure.test_grok_pattern: + body: + grok_pattern: "%{WORD}.*%{WORD:name} %{WORD:name}!" + text: + - "Hello Dave Roberts!" + - "this does not match" + - match: + matches: + - matched: true + fields: + name: + - match: Dave + offset: 8 + length: 4 + - match: Roberts + offset: 13 + length: 7 + - matched: false diff --git a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java deleted file mode 100644 index 8a39cad8a909..000000000000 --- a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternActionTests.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.textstructure.transport; - -import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.PlainActionFuture; -import org.elasticsearch.tasks.Task; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.MockUtils; -import org.elasticsearch.transport.TransportService; -import org.elasticsearch.xcontent.ToXContent; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xpack.core.textstructure.action.TestGrokPatternAction; -import org.elasticsearch.xpack.core.watcher.support.xcontent.XContentSource; - -import java.util.List; - -import static org.hamcrest.Matchers.aMapWithSize; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.hasSize; -import static org.mockito.Mockito.mock; - -public class TransportTestGrokPatternActionTests extends ESTestCase { - - private XContentSource executeRequest(TestGrokPatternAction.Request request) throws Exception { - TransportService service = MockUtils.setupTransportServiceWithThreadpoolExecutor(); - TransportTestGrokPatternAction action = new TransportTestGrokPatternAction( - service, - mock(ActionFilters.class), - service.getThreadPool() - ); - - PlainActionFuture future = new PlainActionFuture<>(); - action.doExecute(mock(Task.class), request, future); - try (XContentBuilder builder = XContentFactory.jsonBuilder()) { - future.get().toXContent(builder, ToXContent.EMPTY_PARAMS); - return new XContentSource(builder); - } - } - - public void test() throws Exception { - TestGrokPatternAction.Request request = new TestGrokPatternAction.Request.Builder().grokPattern( - "%{WORD}.*%{WORD:first_name} %{WORD:last_name}!" - ).text(List.of("Hello Dave Roberts!", "this does not match")).build(); - - XContentSource source = executeRequest(request); - assertThat(source.getValue(""), aMapWithSize(1)); - assertThat(source.getValue("matches"), hasSize(2)); - assertThat(source.getValue("matches.0"), aMapWithSize(2)); - assertThat(source.getValue("matches.0.matched"), equalTo(true)); - assertThat(source.getValue("matches.0.fields"), aMapWithSize(2)); - assertThat(source.getValue("matches.0.fields.first_name"), aMapWithSize(3)); - assertThat(source.getValue("matches.0.fields.first_name.match"), equalTo("Dave")); - assertThat(source.getValue("matches.0.fields.first_name.offset"), equalTo(8)); - assertThat(source.getValue("matches.0.fields.first_name.length"), equalTo(4)); - assertThat(source.getValue("matches.0.fields.last_name"), aMapWithSize(3)); - assertThat(source.getValue("matches.0.fields.last_name.match"), equalTo("Roberts")); - assertThat(source.getValue("matches.0.fields.last_name.offset"), equalTo(13)); - assertThat(source.getValue("matches.0.fields.last_name.length"), equalTo(7)); - assertThat(source.getValue("matches.1"), aMapWithSize(1)); - assertThat(source.getValue("matches.1.matched"), equalTo(false)); - } - - public void test_repeatedIdentifiers() throws Exception { - TestGrokPatternAction.Request request = new TestGrokPatternAction.Request.Builder().grokPattern( - "%{WORD}.*%{WORD:name} %{WORD:name}!" - ).text(List.of("Hello Dave Roberts!", "this does not match")).build(); - - XContentSource source = executeRequest(request); - assertThat(source.getValue(""), aMapWithSize(1)); - assertThat(source.getValue("matches"), hasSize(2)); - assertThat(source.getValue("matches.0"), aMapWithSize(2)); - assertThat(source.getValue("matches.0.matched"), equalTo(true)); - assertThat(source.getValue("matches.0.fields"), aMapWithSize(1)); - assertThat(source.getValue("matches.0.fields.name"), hasSize(2)); - assertThat(source.getValue("matches.0.fields.name.0"), aMapWithSize(3)); - assertThat(source.getValue("matches.0.fields.name.0.match"), equalTo("Dave")); - assertThat(source.getValue("matches.0.fields.name.0.offset"), equalTo(8)); - assertThat(source.getValue("matches.0.fields.name.0.length"), equalTo(4)); - assertThat(source.getValue("matches.0.fields.name.1"), aMapWithSize(3)); - assertThat(source.getValue("matches.0.fields.name.1.match"), equalTo("Roberts")); - assertThat(source.getValue("matches.0.fields.name.1.offset"), equalTo(13)); - assertThat(source.getValue("matches.0.fields.name.1.length"), equalTo(7)); - assertThat(source.getValue("matches.1"), aMapWithSize(1)); - assertThat(source.getValue("matches.1.matched"), equalTo(false)); - } -} From 2092a89e5a867bf4aeeb818ceafc16b7aed0ba94 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Thu, 18 Jan 2024 17:11:43 +0100 Subject: [PATCH 16/28] Add ecs_compatibility --- .../api/text_structure.test_grok_pattern.json | 6 +++++ .../action/TestGrokPatternAction.java | 26 ++++++++++++++----- .../rest/RestTestGrokPatternAction.java | 5 +++- .../TransportTestGrokPatternAction.java | 5 +++- 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json b/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json index eb14e880bb4b..e0361d30b5e7 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.test_grok_pattern.json @@ -18,6 +18,12 @@ } ] }, + "params": { + "ecs_compatibility": { + "type": "string", + "description": "Optional parameter to specify the compatibility mode with ECS Grok patterns - may be either 'v1' or 'disabled'" + } + }, "body": { "description": "The Grok pattern and text.", "required": true diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java index 9c7a097dc6b6..7f5dd86b03d0 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.ActionResponse; import org.elasticsearch.action.ActionType; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -27,8 +28,6 @@ import java.util.Map; import java.util.Objects; -import static org.elasticsearch.action.ValidateActions.addValidationError; - public class TestGrokPatternAction extends ActionType { public static final TestGrokPatternAction INSTANCE = new TestGrokPatternAction(); @@ -42,6 +41,7 @@ public static class Request extends ActionRequest { public static final ParseField GROK_PATTERN = new ParseField("grok_pattern"); public static final ParseField TEXT = new ParseField("text"); + public static final ParseField ECS_COMPATIBILITY = new ParseField("ecs_compatibility"); private static final ObjectParser PARSER = createParser(); @@ -55,6 +55,7 @@ private static ObjectParser createParser() { public static class Builder { private String grokPattern; private List text; + private String ecsCompatibility; public Builder grokPattern(String grokPattern) { this.grokPattern = grokPattern; @@ -66,27 +67,35 @@ public Builder text(List text) { return this; } + public Builder ecsCompatibility(String ecsCompatibility) { + this.ecsCompatibility = Strings.isNullOrEmpty(ecsCompatibility) ? null : ecsCompatibility; + return this; + } + public Request build() { - return new Request(grokPattern, text); + return new Request(grokPattern, text, ecsCompatibility); } } private final String grokPattern; private final List text; + private final String ecsCompatibility; - private Request(String grokPattern, List text) { + private Request(String grokPattern, List text, String ecsCompatibility) { this.grokPattern = ExceptionsHelper.requireNonNull(grokPattern, GROK_PATTERN.getPreferredName()); this.text = ExceptionsHelper.requireNonNull(text, TEXT.getPreferredName()); + this.ecsCompatibility = ecsCompatibility; } public Request(StreamInput in) throws IOException { super(in); grokPattern = in.readString(); text = in.readStringCollectionAsList(); + ecsCompatibility = in.readOptionalString(); } - public static Request parseRequest(XContentParser parser) throws IOException { - return PARSER.parse(parser, null).build(); + public static Request parseRequest(String ecsCompatibility, XContentParser parser) throws IOException { + return PARSER.parse(parser, null).ecsCompatibility(ecsCompatibility).build(); } @Override @@ -94,6 +103,7 @@ public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); out.writeString(grokPattern); out.writeStringCollection(text); + out.writeOptionalString(ecsCompatibility); } public String getGrokPattern() { @@ -104,6 +114,10 @@ public List getText() { return text; } + public String getEcsCompatibility() { + return ecsCompatibility; + } + @Override public ActionRequestValidationException validate() { return null; diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java index 9e97c6c4d4a1..35aa018fcb6a 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java @@ -12,6 +12,7 @@ import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.action.RestToXContentListener; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xpack.core.textstructure.action.FindStructureAction; import org.elasticsearch.xpack.core.textstructure.action.TestGrokPatternAction; import java.io.IOException; @@ -36,9 +37,11 @@ public List routes() { @Override protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { TestGrokPatternAction.Request request; + String ecsCompatibility = restRequest.param(FindStructureAction.Request.ECS_COMPATIBILITY.getPreferredName()); try (XContentParser parser = restRequest.contentParser()) { - request = TestGrokPatternAction.Request.parseRequest(parser); + request = TestGrokPatternAction.Request.parseRequest(ecsCompatibility, parser); } + return channel -> client.execute(TestGrokPatternAction.INSTANCE, request, new RestToXContentListener<>(channel)); } } diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java index f7250a9b213c..6712e7a5ad68 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportTestGrokPatternAction.java @@ -25,6 +25,8 @@ import java.util.List; import java.util.Map; +import static org.elasticsearch.grok.GrokBuiltinPatterns.ECS_COMPATIBILITY_V1; + public class TransportTestGrokPatternAction extends HandledTransportAction { private static final Logger logger = LogManager.getLogger(TransportTestGrokPatternAction.class); @@ -57,7 +59,8 @@ protected void doExecute(Task task, TestGrokPatternAction.Request request, Actio } private TestGrokPatternAction.Response getResponse(TestGrokPatternAction.Request request) { - Grok grok = new Grok(GrokBuiltinPatterns.get(true), request.getGrokPattern(), logger::debug); + boolean ecsCompatibility = ECS_COMPATIBILITY_V1.equals(request.getEcsCompatibility()); + Grok grok = new Grok(GrokBuiltinPatterns.get(ecsCompatibility), request.getGrokPattern(), logger::debug); List> ranges = new ArrayList<>(); for (String text : request.getText()) { ranges.add(grok.captureRanges(text)); From f5ca1bd33b9a6cd9c08d29bedc92b25eab580fce Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Mon, 22 Jan 2024 10:36:27 +0100 Subject: [PATCH 17/28] Always return arrays in the API --- .../action/TestGrokPatternAction.java | 25 ++++++++----------- .../test/text_structure/test_grok_pattern.yml | 12 ++++----- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java index 7f5dd86b03d0..2751abedbfaf 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/TestGrokPatternAction.java @@ -165,25 +165,22 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (ranges != null) { builder.startObject("fields"); for (Map.Entry rangeOrList : ranges.entrySet()) { - if (rangeOrList.getValue() instanceof GrokCaptureExtracter.Range) { - GrokCaptureExtracter.Range range = (GrokCaptureExtracter.Range) rangeOrList.getValue(); - builder.startObject(rangeOrList.getKey()); + List listOfRanges; + if (rangeOrList.getValue() instanceof List) { + listOfRanges = (List) rangeOrList.getValue(); + } else { + listOfRanges = List.of(rangeOrList.getValue()); + } + builder.startArray(rangeOrList.getKey()); + for (Object rangeObject : listOfRanges) { + GrokCaptureExtracter.Range range = (GrokCaptureExtracter.Range) rangeObject; + builder.startObject(); builder.field("match", range.match()); builder.field("offset", range.offset()); builder.field("length", range.length()); builder.endObject(); - } else if (rangeOrList.getValue() instanceof List) { - builder.startArray(rangeOrList.getKey()); - for (Object rangeObject : (List) rangeOrList.getValue()) { - GrokCaptureExtracter.Range range = (GrokCaptureExtracter.Range) rangeObject; - builder.startObject(); - builder.field("match", range.match()); - builder.field("offset", range.offset()); - builder.field("length", range.length()); - builder.endObject(); - } - builder.endArray(); } + builder.endArray(); } builder.endObject(); } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml index 7d0359d25df1..f1b2183a9b6f 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml @@ -11,13 +11,13 @@ - matched: true fields: first_name: - match: Dave - offset: 8 - length: 4 + - match: Dave + offset: 8 + length: 4 last_name: - match: Roberts - offset: 13 - length: 7 + - match: Roberts + offset: 13 + length: 7 - matched: false --- "Grok pattern with two identically named fields": From 6ef2f1e39bedf49fbd89b9d190329ac0774ceb6e Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Mon, 22 Jan 2024 14:08:58 +0100 Subject: [PATCH 18/28] Documentation --- docs/reference/rest-api/index.asciidoc | 2 + .../apis/test_grok_pattern.asciidoc | 97 +++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 docs/reference/text-structure/apis/test_grok_pattern.asciidoc diff --git a/docs/reference/rest-api/index.asciidoc b/docs/reference/rest-api/index.asciidoc index 7757e7c2f792..90012de7d74f 100644 --- a/docs/reference/rest-api/index.asciidoc +++ b/docs/reference/rest-api/index.asciidoc @@ -55,6 +55,7 @@ not be included yet. * <> * <> * <> +* <> * <> * <> -- @@ -76,6 +77,7 @@ include::{es-repo-dir}/esql/esql-apis.asciidoc[] include::{es-repo-dir}/features/apis/features-apis.asciidoc[] include::{es-repo-dir}/fleet/index.asciidoc[] include::{es-repo-dir}/text-structure/apis/find-structure.asciidoc[leveloffset=+1] +include::{es-repo-dir}/text-structure/apis/test-grok-pattern.asciidoc[] include::{es-repo-dir}/graph/explore.asciidoc[] include::{es-repo-dir}/indices.asciidoc[] include::{es-repo-dir}/ilm/apis/ilm-api.asciidoc[] diff --git a/docs/reference/text-structure/apis/test_grok_pattern.asciidoc b/docs/reference/text-structure/apis/test_grok_pattern.asciidoc new file mode 100644 index 000000000000..4a13f070ddb1 --- /dev/null +++ b/docs/reference/text-structure/apis/test_grok_pattern.asciidoc @@ -0,0 +1,97 @@ +[role="xpack"] +[[test-grok-pattern]] += Test Grok pattern API + +++++ +Test Grok pattern +++++ + +Tests a Grok pattern on lines of text, see also <>. + +[[test-grok-pattern-request]] +== {api-request-title} + +`GET _text_structure/test_grok_pattern` + + +`POST _text_structure/test_grok_pattern` + + +[[test-grok-pattern-desc]] +== {api-description-title} + +The test Grok pattern API allows you to execute a Grok pattern on one +or more lines of text. It returns whether the lines match the pattern +together with the offsets and lengths of the matched substrings. + +[[test-grok-pattern-parms]] + +[[test-grok-pattern-query-parms]] +== {api-query-parms-title} + +`ecs_compatibility`:: +(Optional, string) The mode of compatibility with ECS compliant Grok patterns. +Use this parameter to specify whether to use ECS Grok patterns instead of +legacy ones when the structure finder creates a Grok pattern. Valid values +are `disabled` and `v1`. The default value is `disabled`. This setting primarily +has an impact when a whole message Grok pattern such as `%{CATALINALOG}` +matches the input. If the structure finder identifies a common structure but +has no idea of meaning then generic field names such as `path`, `ipaddress`, +`field1` and `field2` are used in the `grok_pattern` output, with the intention +that a user who knows the meanings rename these fields before using it. + +[[test-grok-pattern-request-body]] +== {api-request-body-title} + +`grok_pattern`:: +(Required, string) +The Grok pattern to run on the lines of text. + +`text`:: +(Required, array of strings) +The lines of text to run the Grok pattern on. + +[[test-grok-pattern-example]] +== {api-examples-title} + +[source,console] +-------------------------------------------------- +GET _text_structure/test_grok_pattern +{ + "grok_pattern": "Hello %{WORD:first_name} %{WORD:last_name}", + "text": [ + "Hello John Doe", + "this does not match" + ] +} +-------------------------------------------------- + +The API returns the following response: + +[source,console-result] +---- +{ + "matches": [ + { + "matched": true, + "fields": { + "first_name": [ + { + "match": "John", + "offset": 6, + "length": 4 + } + ], + "last_name": [ + { + "match": "Doe", + "offset": 11, + "length": 3 + } + ] + } + }, + { + "matched": false + } + ] +} +---- From 71cea7f59e1e197bfe41eeee4e8f122eacb98da5 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Mon, 22 Jan 2024 14:09:16 +0100 Subject: [PATCH 19/28] YAML test for ecs_compatibility --- .../rest-api-spec/test/text_structure/test_grok_pattern.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml index f1b2183a9b6f..411cebb529ab 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/test_grok_pattern.yml @@ -23,6 +23,7 @@ "Grok pattern with two identically named fields": - do: text_structure.test_grok_pattern: + ecs_compatibility: v1 body: grok_pattern: "%{WORD}.*%{WORD:name} %{WORD:name}!" text: From a9e1fd198f65be1ebec2e02d911750420dde32b1 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Mon, 22 Jan 2024 14:43:13 +0100 Subject: [PATCH 20/28] =?UTF-8?q?Rename=20doc=20file=C3=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{test_grok_pattern.asciidoc => test-grok-pattern.asciidoc} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/reference/text-structure/apis/{test_grok_pattern.asciidoc => test-grok-pattern.asciidoc} (100%) diff --git a/docs/reference/text-structure/apis/test_grok_pattern.asciidoc b/docs/reference/text-structure/apis/test-grok-pattern.asciidoc similarity index 100% rename from docs/reference/text-structure/apis/test_grok_pattern.asciidoc rename to docs/reference/text-structure/apis/test-grok-pattern.asciidoc From 99c6efa0ae4f64ab968c61b59e9c4e8e84179bea Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Mon, 22 Jan 2024 20:31:21 +0100 Subject: [PATCH 21/28] serverless scope --- .../xpack/textstructure/rest/RestTestGrokPatternAction.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java index 35aa018fcb6a..2aa52da619b8 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestTestGrokPatternAction.java @@ -10,6 +10,8 @@ import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.Scope; +import org.elasticsearch.rest.ServerlessScope; import org.elasticsearch.rest.action.RestToXContentListener; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xpack.core.textstructure.action.FindStructureAction; @@ -22,6 +24,7 @@ import static org.elasticsearch.rest.RestRequest.Method.POST; import static org.elasticsearch.xpack.textstructure.TextStructurePlugin.BASE_PATH; +@ServerlessScope(Scope.INTERNAL) public class RestTestGrokPatternAction extends BaseRestHandler { @Override From c7ad51c7f8faa5597f9fd0a3b70036162d6ecbf4 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Mon, 22 Jan 2024 20:38:24 +0100 Subject: [PATCH 22/28] Fix docs (hopefully) --- .../text-structure/apis/test-grok-pattern.asciidoc | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/docs/reference/text-structure/apis/test-grok-pattern.asciidoc b/docs/reference/text-structure/apis/test-grok-pattern.asciidoc index 4a13f070ddb1..4034a24cf0a1 100644 --- a/docs/reference/text-structure/apis/test-grok-pattern.asciidoc +++ b/docs/reference/text-structure/apis/test-grok-pattern.asciidoc @@ -8,6 +8,7 @@ Tests a Grok pattern on lines of text, see also <>. +[discrete] [[test-grok-pattern-request]] == {api-request-title} @@ -15,6 +16,7 @@ Tests a Grok pattern on lines of text, see also <>. `POST _text_structure/test_grok_pattern` + +[discrete] [[test-grok-pattern-desc]] == {api-description-title} @@ -22,8 +24,7 @@ The test Grok pattern API allows you to execute a Grok pattern on one or more lines of text. It returns whether the lines match the pattern together with the offsets and lengths of the matched substrings. -[[test-grok-pattern-parms]] - +[discrete] [[test-grok-pattern-query-parms]] == {api-query-parms-title} @@ -31,13 +32,9 @@ together with the offsets and lengths of the matched substrings. (Optional, string) The mode of compatibility with ECS compliant Grok patterns. Use this parameter to specify whether to use ECS Grok patterns instead of legacy ones when the structure finder creates a Grok pattern. Valid values -are `disabled` and `v1`. The default value is `disabled`. This setting primarily -has an impact when a whole message Grok pattern such as `%{CATALINALOG}` -matches the input. If the structure finder identifies a common structure but -has no idea of meaning then generic field names such as `path`, `ipaddress`, -`field1` and `field2` are used in the `grok_pattern` output, with the intention -that a user who knows the meanings rename these fields before using it. +are `disabled` and `v1`. The default value is `disabled`. +[discrete] [[test-grok-pattern-request-body]] == {api-request-body-title} @@ -49,6 +46,7 @@ The Grok pattern to run on the lines of text. (Required, array of strings) The lines of text to run the Grok pattern on. +[discrete] [[test-grok-pattern-example]] == {api-examples-title} From e906fc80ff85a37cbb2e68d7a75a190a23ce50b8 Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Tue, 23 Jan 2024 09:55:53 +0100 Subject: [PATCH 23/28] Update docs/reference/rest-api/index.asciidoc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- docs/reference/rest-api/index.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/rest-api/index.asciidoc b/docs/reference/rest-api/index.asciidoc index 90012de7d74f..67c750bdd863 100644 --- a/docs/reference/rest-api/index.asciidoc +++ b/docs/reference/rest-api/index.asciidoc @@ -77,7 +77,7 @@ include::{es-repo-dir}/esql/esql-apis.asciidoc[] include::{es-repo-dir}/features/apis/features-apis.asciidoc[] include::{es-repo-dir}/fleet/index.asciidoc[] include::{es-repo-dir}/text-structure/apis/find-structure.asciidoc[leveloffset=+1] -include::{es-repo-dir}/text-structure/apis/test-grok-pattern.asciidoc[] +include::{es-repo-dir}/text-structure/apis/test-grok-pattern.asciidoc[leveloffset=+1] include::{es-repo-dir}/graph/explore.asciidoc[] include::{es-repo-dir}/indices.asciidoc[] include::{es-repo-dir}/ilm/apis/ilm-api.asciidoc[] From d0441e23e4ac17f1578a38e659e1165291071b95 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Tue, 23 Jan 2024 10:38:30 +0100 Subject: [PATCH 24/28] Add "text structure APIs" header in docs TOC --- docs/reference/rest-api/index.asciidoc | 5 ++--- docs/reference/text-structure/index.asciidoc | 12 ++++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 docs/reference/text-structure/index.asciidoc diff --git a/docs/reference/rest-api/index.asciidoc b/docs/reference/rest-api/index.asciidoc index 67c750bdd863..99f7bd05402e 100644 --- a/docs/reference/rest-api/index.asciidoc +++ b/docs/reference/rest-api/index.asciidoc @@ -25,7 +25,6 @@ not be included yet. * <> * <> * <> -* <> * <> * <> * <> @@ -54,6 +53,7 @@ not be included yet. * <> * <> * <> +* <> * <> * <> * <> @@ -76,8 +76,6 @@ include::{es-repo-dir}/eql/eql-apis.asciidoc[] include::{es-repo-dir}/esql/esql-apis.asciidoc[] include::{es-repo-dir}/features/apis/features-apis.asciidoc[] include::{es-repo-dir}/fleet/index.asciidoc[] -include::{es-repo-dir}/text-structure/apis/find-structure.asciidoc[leveloffset=+1] -include::{es-repo-dir}/text-structure/apis/test-grok-pattern.asciidoc[leveloffset=+1] include::{es-repo-dir}/graph/explore.asciidoc[] include::{es-repo-dir}/indices.asciidoc[] include::{es-repo-dir}/ilm/apis/ilm-api.asciidoc[] @@ -105,6 +103,7 @@ include::{es-repo-dir}/snapshot-restore/apis/snapshot-restore-apis.asciidoc[] include::{es-repo-dir}/slm/apis/slm-api.asciidoc[] include::{es-repo-dir}/sql/apis/sql-apis.asciidoc[] include::{es-repo-dir}/synonyms/apis/synonyms-apis.asciidoc[] +include::{es-repo-dir}/text-structure/index.asciidoc[] include::{es-repo-dir}/transform/apis/index.asciidoc[] include::usage.asciidoc[] include::{es-repo-dir}/rest-api/watcher.asciidoc[] diff --git a/docs/reference/text-structure/index.asciidoc b/docs/reference/text-structure/index.asciidoc new file mode 100644 index 000000000000..3f67f87146d5 --- /dev/null +++ b/docs/reference/text-structure/index.asciidoc @@ -0,0 +1,12 @@ +[role="xpack"] +[[text-structure-apis]] +== Text structure APIs + +You can use the following APIs to find text structures: + +* <> +* <> + +include::find-structure.asciidoc[leveloffset=+1] +include::test-grok-pattern.asciidoc[leveloffset=+1] + From d16051d1f585c6ef340bba81aa49b922ffb29f68 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Tue, 23 Jan 2024 10:57:56 +0100 Subject: [PATCH 25/28] Move file --- docs/reference/rest-api/index.asciidoc | 2 +- docs/reference/text-structure/{ => apis}/index.asciidoc | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename docs/reference/text-structure/{ => apis}/index.asciidoc (100%) diff --git a/docs/reference/rest-api/index.asciidoc b/docs/reference/rest-api/index.asciidoc index 99f7bd05402e..5b4883ef0220 100644 --- a/docs/reference/rest-api/index.asciidoc +++ b/docs/reference/rest-api/index.asciidoc @@ -103,7 +103,7 @@ include::{es-repo-dir}/snapshot-restore/apis/snapshot-restore-apis.asciidoc[] include::{es-repo-dir}/slm/apis/slm-api.asciidoc[] include::{es-repo-dir}/sql/apis/sql-apis.asciidoc[] include::{es-repo-dir}/synonyms/apis/synonyms-apis.asciidoc[] -include::{es-repo-dir}/text-structure/index.asciidoc[] +include::{es-repo-dir}/text-structure/apis/index.asciidoc[] include::{es-repo-dir}/transform/apis/index.asciidoc[] include::usage.asciidoc[] include::{es-repo-dir}/rest-api/watcher.asciidoc[] diff --git a/docs/reference/text-structure/index.asciidoc b/docs/reference/text-structure/apis/index.asciidoc similarity index 100% rename from docs/reference/text-structure/index.asciidoc rename to docs/reference/text-structure/apis/index.asciidoc From a06000854a0dfd31403f180f94b0e69a6aa0b5a1 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Tue, 23 Jan 2024 11:02:39 +0100 Subject: [PATCH 26/28] Remove test grok from main index --- docs/reference/rest-api/index.asciidoc | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/reference/rest-api/index.asciidoc b/docs/reference/rest-api/index.asciidoc index 5b4883ef0220..6395c8800bb3 100644 --- a/docs/reference/rest-api/index.asciidoc +++ b/docs/reference/rest-api/index.asciidoc @@ -55,7 +55,6 @@ not be included yet. * <> * <> * <> -* <> * <> * <> -- From 8bdd92719779c18af05b6504eaf86f9dbf3e75cd Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Tue, 23 Jan 2024 11:03:44 +0100 Subject: [PATCH 27/28] typo --- docs/reference/text-structure/apis/index.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/text-structure/apis/index.asciidoc b/docs/reference/text-structure/apis/index.asciidoc index 3f67f87146d5..dd745660e72d 100644 --- a/docs/reference/text-structure/apis/index.asciidoc +++ b/docs/reference/text-structure/apis/index.asciidoc @@ -5,7 +5,7 @@ You can use the following APIs to find text structures: * <> -* <> +* <> include::find-structure.asciidoc[leveloffset=+1] include::test-grok-pattern.asciidoc[leveloffset=+1] From 98953e2701e7e1b1df1ff41a4c752e566a33499c Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Tue, 23 Jan 2024 11:27:33 +0100 Subject: [PATCH 28/28] Nested APIs underneath text structure --- docs/reference/text-structure/apis/index.asciidoc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/reference/text-structure/apis/index.asciidoc b/docs/reference/text-structure/apis/index.asciidoc index dd745660e72d..8628badba7e7 100644 --- a/docs/reference/text-structure/apis/index.asciidoc +++ b/docs/reference/text-structure/apis/index.asciidoc @@ -7,6 +7,5 @@ You can use the following APIs to find text structures: * <> * <> -include::find-structure.asciidoc[leveloffset=+1] -include::test-grok-pattern.asciidoc[leveloffset=+1] - +include::find-structure.asciidoc[leveloffset=+2] +include::test-grok-pattern.asciidoc[leveloffset=+2]