diff --git a/docs/changelog/110574.yaml b/docs/changelog/110574.yaml new file mode 100644 index 000000000000..184083850015 --- /dev/null +++ b/docs/changelog/110574.yaml @@ -0,0 +1,6 @@ +pr: 110574 +summary: "ES|QL: better validation for GROK patterns" +area: ES|QL +type: bug +issues: + - 110533 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 88f6ff0c95b0..fa822b50ffcf 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -111,7 +111,13 @@ public enum Cap { /** * Fix for union-types when aggregating over an inline conversion with casting operator. Done in #110476. */ - UNION_TYPES_AGG_CAST; + UNION_TYPES_AGG_CAST, + + /** + * Fix to GROK validation in case of multiple fields with same name and different types + * https://github.com/elastic/elasticsearch/issues/110533 + */ + GROK_VALIDATION; private final boolean snapshotOnly; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java index 9ee5931c85c3..e97323f96388 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java @@ -146,12 +146,30 @@ public PlanFactory visitEvalCommand(EsqlBaseParser.EvalCommandContext ctx) { @Override public PlanFactory visitGrokCommand(EsqlBaseParser.GrokCommandContext ctx) { return p -> { + Source source = source(ctx); String pattern = visitString(ctx.string()).fold().toString(); - Grok result = new Grok(source(ctx), p, expression(ctx.primaryExpression()), Grok.pattern(source(ctx), pattern)); + Grok.Parser grokParser = Grok.pattern(source, pattern); + validateGrokPattern(source, grokParser, pattern); + Grok result = new Grok(source(ctx), p, expression(ctx.primaryExpression()), grokParser); return result; }; } + private void validateGrokPattern(Source source, Grok.Parser grokParser, String pattern) { + Map definedAttributes = new HashMap<>(); + for (Attribute field : grokParser.extractedFields()) { + String name = field.name(); + DataType type = field.dataType(); + DataType prev = definedAttributes.put(name, type); + if (prev != null) { + throw new ParsingException( + source, + "Invalid GROK pattern [" + pattern + "]: the attribute [" + name + "] is defined multiple times with different types" + ); + } + } + } + @Override public PlanFactory visitDissectCommand(EsqlBaseParser.DissectCommandContext ctx) { return p -> { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java index e084f6d3e5e3..963fd318f814 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java @@ -30,7 +30,7 @@ public class Grok extends RegexExtract { public record Parser(String pattern, org.elasticsearch.grok.Grok grok) { - private List extractedFields() { + public List extractedFields() { return grok.captureConfig() .stream() .sorted(Comparator.comparing(GrokCaptureConfig::name)) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index eee40b25176a..2f76cb204982 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -758,15 +758,27 @@ public void testDissectPattern() { public void testGrokPattern() { LogicalPlan cmd = processingCommand("grok a \"%{WORD:foo}\""); assertEquals(Grok.class, cmd.getClass()); - Grok dissect = (Grok) cmd; - assertEquals("%{WORD:foo}", dissect.parser().pattern()); - assertEquals(List.of(referenceAttribute("foo", KEYWORD)), dissect.extractedFields()); + Grok grok = (Grok) cmd; + assertEquals("%{WORD:foo}", grok.parser().pattern()); + assertEquals(List.of(referenceAttribute("foo", KEYWORD)), grok.extractedFields()); ParsingException pe = expectThrows(ParsingException.class, () -> statement("row a = \"foo bar\" | grok a \"%{_invalid_:x}\"")); assertThat( pe.getMessage(), containsString("Invalid pattern [%{_invalid_:x}] for grok: Unable to find pattern [_invalid_] in Grok's pattern dictionary") ); + + cmd = processingCommand("grok a \"%{WORD:foo} %{WORD:foo}\""); + assertEquals(Grok.class, cmd.getClass()); + grok = (Grok) cmd; + assertEquals("%{WORD:foo} %{WORD:foo}", grok.parser().pattern()); + assertEquals(List.of(referenceAttribute("foo", KEYWORD)), grok.extractedFields()); + + expectError( + "row a = \"foo bar\" | GROK a \"%{NUMBER:foo} %{WORD:foo}\"", + "line 1:22: Invalid GROK pattern [%{NUMBER:foo} %{WORD:foo}]:" + + " the attribute [foo] is defined multiple times with different types" + ); } public void testLikeRLike() { diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/100_bug_fix.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/100_bug_fix.yml index b91343d03d3d..cffc161b1153 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/100_bug_fix.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/100_bug_fix.yml @@ -303,3 +303,38 @@ - match: { values.0.2: [1, 2] } - match: { values.0.3: [1, 2] } - match: { values.0.4: [1.1, 2.2] } + + +--- +"grok with duplicate names and different types #110533": + - requires: + test_runner_features: [capabilities] + capabilities: + - method: POST + path: /_query + parameters: [] + capabilities: [grok_validation] + reason: "fixed grok validation with patterns containing the same attribute multiple times with different types" + - do: + indices.create: + index: test_grok + body: + mappings: + properties: + first_name : + type : keyword + last_name: + type: keyword + + - do: + bulk: + refresh: true + body: + - { "index": { "_index": "test_grok" } } + - { "first_name": "Georgi", "last_name":"Facello" } + + - do: + catch: '/Invalid GROK pattern \[%\{NUMBER:foo\} %\{WORD:foo\}\]: the attribute \[foo\] is defined multiple times with different types/' + esql.query: + body: + query: 'FROM test_grok | KEEP name | WHERE last_name == "Facello" | EVAL name = concat("1 ", last_name) | GROK name "%{NUMBER:foo} %{WORD:foo}"'