From 3bb551dfa004d48858d50e6f2c6ebb6665f99aea Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Thu, 1 Jul 2021 10:13:52 -0500 Subject: [PATCH] [7.14] Improve circular reference detection in grok processor (#74581) (#74829) --- .../java/org/elasticsearch/grok/Grok.java | 48 +++++++++++++------ .../org/elasticsearch/grok/GrokTests.java | 22 +++++++-- 2 files changed, 51 insertions(+), 19 deletions(-) diff --git a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java index ee4e15c3810c8..1f0441da9db43 100644 --- a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java @@ -82,11 +82,7 @@ private Grok(Map patternBank, String grokPattern, boolean namedC this.namedCaptures = namedCaptures; this.matcherWatchdog = matcherWatchdog; - for (Map.Entry entry : patternBank.entrySet()) { - String name = entry.getKey(); - String pattern = entry.getValue(); - forbidCircularReferences(name, new ArrayList<>(), pattern); - } + forbidCircularReferences(); String expression = toRegex(grokPattern); byte[] expressionBytes = expression.getBytes(StandardCharsets.UTF_8); @@ -107,8 +103,27 @@ private Grok(Map patternBank, String grokPattern, boolean namedC * a reference to another named pattern. This method will navigate to all these named patterns and * check for a circular reference. */ - private void forbidCircularReferences(String patternName, List path, String pattern) { - if (pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":")) { + private void forbidCircularReferences() { + + // first ensure that the pattern bank contains no simple circular references (i.e., any pattern + // containing an immediate reference to itself) as those can cause the remainder of this algorithm + // to recurse infinitely + for (Map.Entry entry : patternBank.entrySet()) { + if (patternReferencesItself(entry.getValue(), entry.getKey())) { + throw new IllegalArgumentException("circular reference in pattern [" + entry.getKey() + "][" + entry.getValue() + "]"); + } + } + + // next, recursively check any other pattern names referenced in each pattern + for (Map.Entry entry : patternBank.entrySet()) { + String name = entry.getKey(); + String pattern = entry.getValue(); + innerForbidCircularReferences(name, new ArrayList<>(), pattern); + } + } + + private void innerForbidCircularReferences(String patternName, List path, String pattern) { + if (patternReferencesItself(pattern, patternName)) { String message; if (path.isEmpty()) { message = "circular reference in pattern [" + patternName + "][" + pattern + "]"; @@ -123,26 +138,31 @@ private void forbidCircularReferences(String patternName, List path, Str throw new IllegalArgumentException(message); } + // next check any other pattern names found in the pattern for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) { int begin = i + 2; - int brackedIndex = pattern.indexOf('}', begin); + int bracketIndex = pattern.indexOf('}', begin); int columnIndex = pattern.indexOf(':', begin); int end; - if (brackedIndex != -1 && columnIndex == -1) { - end = brackedIndex; - } else if (columnIndex != -1 && brackedIndex == -1) { + if (bracketIndex != -1 && columnIndex == -1) { + end = bracketIndex; + } else if (columnIndex != -1 && bracketIndex == -1) { end = columnIndex; - } else if (brackedIndex != -1 && columnIndex != -1) { - end = Math.min(brackedIndex, columnIndex); + } else if (bracketIndex != -1 && columnIndex != -1) { + end = Math.min(bracketIndex, columnIndex); } else { throw new IllegalArgumentException("pattern [" + pattern + "] has circular references to other pattern definitions"); } String otherPatternName = pattern.substring(begin, end); path.add(otherPatternName); - forbidCircularReferences(patternName, path, patternBank.get(otherPatternName)); + innerForbidCircularReferences(patternName, path, patternBank.get(otherPatternName)); } } + private static boolean patternReferencesItself(String pattern, String patternName) { + return pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":"); + } + private String groupMatch(String name, Region region, String pattern) { try { int number = GROK_PATTERN_REGEX.nameToBackrefNumber(name.getBytes(StandardCharsets.UTF_8), 0, diff --git a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java index db2a44b83f850..570a4c2ec0420 100644 --- a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java +++ b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java @@ -320,8 +320,7 @@ public void testCircularReference() { String pattern = "%{NAME1}"; new Grok(bank, pattern, false, logger::warn); }); - assertEquals("circular reference in pattern [NAME3][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2]", - e.getMessage()); + assertEquals("circular reference in pattern [NAME3][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2]", e.getMessage()); e = expectThrows(IllegalArgumentException.class, () -> { Map bank = new TreeMap<>(); @@ -331,10 +330,23 @@ public void testCircularReference() { bank.put("NAME4", "!!!%{NAME5}!!!"); bank.put("NAME5", "!!!%{NAME1}!!!"); String pattern = "%{NAME1}"; - new Grok(bank, pattern, false, logger::warn ); + new Grok(bank, pattern, false, logger::warn); + }); + assertEquals( + "circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2=>NAME3=>NAME4]", + e.getMessage() + ); + } + + public void testCircularSelfReference() { + Exception e = expectThrows(IllegalArgumentException.class, () -> { + Map bank = new HashMap<>(); + bank.put("ANOTHER", "%{INT}"); + bank.put("INT", "%{INT}"); + String pattern = "does_not_matter"; + new Grok(bank, pattern, false, logger::warn); }); - assertEquals("circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] " + - "via patterns [NAME2=>NAME3=>NAME4]", e.getMessage()); + assertEquals("circular reference in pattern [INT][%{INT}]", e.getMessage()); } public void testBooleanCaptures() {