From 690b10a4a1ad402433f12837ff9cdd31872e95f8 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 14 Dec 2018 07:18:09 +0000 Subject: [PATCH] [ML] Interrupt Grok in file structure finder timeout (#36588) The file structure finder has timeout functionality, but prior to this change it would not interrupt a single long-running Grok match attempt. This commit hooks into the ThreadWatchdog facility provided by the Grok library to interrupt individual Grok matches that may be running at the time the file structure finder timeout expires. --- .../FileStructureUtils.java | 26 +++--- .../GrokPatternCreator.java | 20 ++--- .../TextLogFileStructureFinder.java | 2 +- .../filestructurefinder/TimeoutChecker.java | 87 ++++++++++++++++++- .../TimestampFormatFinder.java | 47 ++++++---- .../TimeoutCheckerTests.java | 32 +++++++ .../TimestampFormatFinderTests.java | 70 ++++++++------- 7 files changed, 211 insertions(+), 73 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java index aacdc3b2cc610..9172de9dedaa5 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java @@ -34,8 +34,9 @@ public final class FileStructureUtils { private static final int NUM_TOP_HITS = 10; // NUMBER Grok pattern doesn't support scientific notation, so we extend it - private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$"); - private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$"); + private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$", + TimeoutChecker.watchdog); + private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog); private static final int KEYWORD_MAX_LEN = 256; private static final int KEYWORD_MAX_SPACES = 5; @@ -69,7 +70,7 @@ static Tuple guessTimestampField(List explanatio } // Accept the first match from the first sample that is compatible with all the other samples - for (Tuple candidate : findCandidates(explanation, sampleRecords, overrides)) { + for (Tuple candidate : findCandidates(explanation, sampleRecords, overrides, timeoutChecker)) { boolean allGood = true; for (Map sampleRecord : sampleRecords.subList(1, sampleRecords.size())) { @@ -87,7 +88,8 @@ static Tuple guessTimestampField(List explanatio timeoutChecker.check("timestamp field determination"); - TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(fieldValue.toString(), overrides.getTimestampFormat()); + TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(fieldValue.toString(), overrides.getTimestampFormat(), + timeoutChecker); if (match == null || match.candidateIndex != candidate.v2().candidateIndex) { if (overrides.getTimestampFormat() != null) { throw new IllegalArgumentException("Specified timestamp format [" + overrides.getTimestampFormat() + @@ -111,7 +113,7 @@ static Tuple guessTimestampField(List explanatio } private static List> findCandidates(List explanation, List> sampleRecords, - FileStructureOverrides overrides) { + FileStructureOverrides overrides, TimeoutChecker timeoutChecker) { assert sampleRecords.isEmpty() == false; Map firstRecord = sampleRecords.get(0); @@ -130,7 +132,8 @@ private static List> findCandidates(List e if (onlyConsiderField == null || onlyConsiderField.equals(fieldName)) { Object value = field.getValue(); if (value != null) { - TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(value.toString(), overrides.getTimestampFormat()); + TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(value.toString(), overrides.getTimestampFormat(), + timeoutChecker); if (match != null) { Tuple candidate = new Tuple<>(fieldName, match); candidates.add(candidate); @@ -211,7 +214,7 @@ static Tuple, FieldStats> guessMappingAndCalculateFieldStats } Collection fieldValuesAsStrings = fieldValues.stream().map(Object::toString).collect(Collectors.toList()); - Map mapping = guessScalarMapping(explanation, fieldName, fieldValuesAsStrings); + Map mapping = guessScalarMapping(explanation, fieldName, fieldValuesAsStrings, timeoutChecker); timeoutChecker.check("mapping determination"); return new Tuple<>(mapping, calculateFieldStats(fieldValuesAsStrings, timeoutChecker)); } @@ -238,10 +241,12 @@ private static Stream flatten(Object value) { * @param fieldValues Values of the field for which mappings are to be guessed. The guessed * mapping will be compatible with all the provided values. Must not be * empty. + * @param timeoutChecker Will abort the operation if its timeout is exceeded. * @return The sub-section of the index mappings most appropriate for the field, * for example { "type" : "keyword" }. */ - static Map guessScalarMapping(List explanation, String fieldName, Collection fieldValues) { + static Map guessScalarMapping(List explanation, String fieldName, Collection fieldValues, + TimeoutChecker timeoutChecker) { assert fieldValues.isEmpty() == false; @@ -251,11 +256,12 @@ static Map guessScalarMapping(List explanation, String f // This checks if a date mapping would be appropriate, and, if so, finds the correct format Iterator iter = fieldValues.iterator(); - TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(iter.next()); + TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(iter.next(), timeoutChecker); while (timestampMatch != null && iter.hasNext()) { // To be mapped as type date all the values must match the same timestamp format - it is // not acceptable for all values to be dates, but with different formats - if (timestampMatch.equals(TimestampFormatFinder.findFirstFullMatch(iter.next(), timestampMatch.candidateIndex)) == false) { + if (timestampMatch.equals(TimestampFormatFinder.findFirstFullMatch(iter.next(), timestampMatch.candidateIndex, + timeoutChecker)) == false) { timestampMatch = null; } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java index 4c6549ad3934c..6620afcb7145b 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java @@ -453,7 +453,7 @@ static class ValueOnlyGrokPatternCandidate implements GrokPatternCandidate { this.fieldName = fieldName; // The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + preBreak + - "%{" + grokPatternName + ":" + VALUE + "}" + postBreak + "%{GREEDYDATA:" + EPILOGUE + "}"); + "%{" + grokPatternName + ":" + VALUE + "}" + postBreak + "%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog); } @Override @@ -472,7 +472,7 @@ public String processCaptures(Map fieldNameCountStore, Collecti TimeoutChecker timeoutChecker) { Collection values = new ArrayList<>(); for (String snippet : snippets) { - Map captures = grok.captures(snippet); + Map captures = timeoutChecker.grokCaptures(grok, snippet, "full message Grok pattern field extraction"); // If the pattern doesn't match then captures will be null if (captures == null) { throw new IllegalStateException("[%{" + grokPatternName + "}] does not match snippet [" + snippet + "]"); @@ -480,14 +480,13 @@ public String processCaptures(Map fieldNameCountStore, Collecti prefaces.add(captures.getOrDefault(PREFACE, "").toString()); values.add(captures.getOrDefault(VALUE, "").toString()); epilogues.add(captures.getOrDefault(EPILOGUE, "").toString()); - timeoutChecker.check("full message Grok pattern field extraction"); } String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName); if (mappings != null) { Map fullMappingType = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType); if ("date".equals(mappingType)) { assert values.isEmpty() == false; - TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(values.iterator().next()); + TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(values.iterator().next(), timeoutChecker); if (timestampMatch != null) { fullMappingType = timestampMatch.getEsDateMappingTypeWithFormat(); } @@ -548,7 +547,7 @@ public String processCaptures(Map fieldNameCountStore, Collecti throw new IllegalStateException("Cannot process KV matches until a field name has been determined"); } Grok grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}\\b" + - fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}"); + fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog); Collection values = new ArrayList<>(); for (String snippet : snippets) { Map captures = grok.captures(snippet); @@ -563,7 +562,8 @@ public String processCaptures(Map fieldNameCountStore, Collecti } String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName); if (mappings != null) { - mappings.put(adjustedFieldName, FileStructureUtils.guessScalarMapping(explanation, adjustedFieldName, values)); + mappings.put(adjustedFieldName, + FileStructureUtils.guessScalarMapping(explanation, adjustedFieldName, values, timeoutChecker)); timeoutChecker.check("mapping determination"); } if (fieldStats != null) { @@ -610,7 +610,7 @@ static FullMatchGrokPatternCandidate fromGrokPattern(String grokPattern, String private FullMatchGrokPatternCandidate(String grokPattern, String timeField) { this.grokPattern = grokPattern; this.timeField = timeField; - grok = new Grok(Grok.getBuiltinPatterns(), grokPattern); + grok = new Grok(Grok.getBuiltinPatterns(), grokPattern, TimeoutChecker.watchdog); } public String getTimeField() { @@ -640,7 +640,8 @@ public Tuple processMatch(List explanation, Collection> valuesPerField = new HashMap<>(); for (String sampleMessage : sampleMessages) { - Map captures = grok.captures(sampleMessage); + Map captures = timeoutChecker.grokCaptures(grok, sampleMessage, + "full message Grok pattern field extraction"); // If the pattern doesn't match then captures will be null if (captures == null) { throw new IllegalStateException("[" + grokPattern + "] does not match snippet [" + sampleMessage + "]"); @@ -658,7 +659,6 @@ public Tuple processMatch(List explanation, Collection> valuesForField : valuesPerField.entrySet()) { @@ -667,7 +667,7 @@ public Tuple processMatch(List explanation, Collection> mostLikelyTimestamp(String[] sampleLin int remainingLines = sampleLines.length; double differenceBetweenTwoHighestWeights = 0.0; for (String sampleLine : sampleLines) { - TimestampMatch match = TimestampFormatFinder.findFirstMatch(sampleLine, overrides.getTimestampFormat()); + TimestampMatch match = TimestampFormatFinder.findFirstMatch(sampleLine, overrides.getTimestampFormat(), timeoutChecker); if (match != null) { TimestampMatch pureMatch = new TimestampMatch(match.candidateIndex, "", match.jodaTimestampFormats, match.javaTimestampFormats, match.simplePattern, match.grokPatternName, ""); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutChecker.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutChecker.java index 30c018827292d..f8a9368b8425e 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutChecker.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutChecker.java @@ -6,14 +6,19 @@ package org.elasticsearch.xpack.ml.filestructurefinder; import org.elasticsearch.ElasticsearchTimeoutException; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.FutureUtils; import org.elasticsearch.grok.Grok; +import org.elasticsearch.grok.ThreadWatchdog; import java.io.Closeable; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; /** * This class can be used to keep track of when a long running operation started and @@ -32,9 +37,13 @@ */ public class TimeoutChecker implements Closeable { + private static final TimeoutCheckerWatchdog timeoutCheckerWatchdog = new TimeoutCheckerWatchdog(); + public static final ThreadWatchdog watchdog = timeoutCheckerWatchdog; + private final String operation; - private final ScheduledFuture future; private final TimeValue timeout; + private final Thread checkedThread; + private final ScheduledFuture future; private volatile boolean timeoutExceeded; /** @@ -48,6 +57,8 @@ public class TimeoutChecker implements Closeable { public TimeoutChecker(String operation, TimeValue timeout, ScheduledExecutorService scheduler) { this.operation = operation; this.timeout = timeout; + this.checkedThread = Thread.currentThread(); + timeoutCheckerWatchdog.add(checkedThread, timeout); this.future = (timeout != null) ? scheduler.schedule(this::setTimeoutExceeded, timeout.nanos(), TimeUnit.NANOSECONDS) : null; } @@ -57,6 +68,7 @@ public TimeoutChecker(String operation, TimeValue timeout, ScheduledExecutorServ @Override public void close() { FutureUtils.cancel(future); + timeoutCheckerWatchdog.remove(checkedThread); } /** @@ -72,7 +84,80 @@ public void check(String where) { } } + /** + * Wrapper around {@link Grok#captures} that translates any timeout exception + * to the style thrown by this class's {@link #check} method. + * @param grok The grok pattern from which captures are to be extracted. + * @param text The text to match and extract values from. + * @param where Which stage of the operation is currently in progress? + * @return A map containing field names and their respective coerced values that matched. + * @throws ElasticsearchTimeoutException If the operation is found to have taken longer than the permitted time. + */ + public Map grokCaptures(Grok grok, String text, String where) { + + try { + return grok.captures(text); + } finally { + // If a timeout has occurred then this check will overwrite any timeout exception thrown by Grok.captures() and this + // is intentional - the exception from this class makes more sense in the context of the find file structure API + check(where); + } + } + private void setTimeoutExceeded() { timeoutExceeded = true; + timeoutCheckerWatchdog.interruptLongRunningThreadIfRegistered(checkedThread); + } + + /** + * An implementation of the type of watchdog used by the {@link Grok} class to interrupt + * matching operations that take too long. Rather than have a timeout per match operation + * like the {@link ThreadWatchdog.Default} implementation, the interruption is governed by + * a {@link TimeoutChecker} associated with the thread doing the matching. + */ + static class TimeoutCheckerWatchdog implements ThreadWatchdog { + + final ConcurrentHashMap> registry = new ConcurrentHashMap<>(); + + void add(Thread thread, TimeValue timeout) { + Tuple previousValue = registry.put(thread, new Tuple<>(new AtomicBoolean(false), timeout)); + assert previousValue == null; + } + + @Override + public void register() { + Tuple value = registry.get(Thread.currentThread()); + if (value != null) { + boolean wasFalse = value.v1().compareAndSet(false, true); + assert wasFalse; + } + } + + @Override + public long maxExecutionTimeInMillis() { + Tuple value = registry.get(Thread.currentThread()); + return value != null ? value.v2().getMillis() : Long.MAX_VALUE; + } + + @Override + public void unregister() { + Tuple value = registry.get(Thread.currentThread()); + if (value != null) { + boolean wasTrue = value.v1().compareAndSet(true, false); + assert wasTrue; + } + } + + void remove(Thread thread) { + Tuple previousValue = registry.remove(thread); + assert previousValue != null; + } + + void interruptLongRunningThreadIfRegistered(Thread thread) { + Tuple value = registry.get(thread); + if (value.v1().get()) { + thread.interrupt(); + } + } } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java index 7ed95f656b4ee..392e7b4e0be5e 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java @@ -173,20 +173,22 @@ private TimestampFormatFinder() { /** * Find the first timestamp format that matches part of the supplied value. * @param text The value that the returned timestamp format must exist within. + * @param timeoutChecker Will abort the operation if its timeout is exceeded. * @return The timestamp format, or null if none matches. */ - public static TimestampMatch findFirstMatch(String text) { - return findFirstMatch(text, 0); + public static TimestampMatch findFirstMatch(String text, TimeoutChecker timeoutChecker) { + return findFirstMatch(text, 0, timeoutChecker); } /** * Find the first timestamp format that matches part of the supplied value. * @param text The value that the returned timestamp format must exist within. * @param requiredFormat A timestamp format that any returned match must support. + * @param timeoutChecker Will abort the operation if its timeout is exceeded. * @return The timestamp format, or null if none matches. */ - public static TimestampMatch findFirstMatch(String text, String requiredFormat) { - return findFirstMatch(text, 0, requiredFormat); + public static TimestampMatch findFirstMatch(String text, String requiredFormat, TimeoutChecker timeoutChecker) { + return findFirstMatch(text, 0, requiredFormat, timeoutChecker); } /** @@ -194,10 +196,11 @@ public static TimestampMatch findFirstMatch(String text, String requiredFormat) * excluding a specified number of candidate formats. * @param text The value that the returned timestamp format must exist within. * @param ignoreCandidates The number of candidate formats to exclude from the search. + * @param timeoutChecker Will abort the operation if its timeout is exceeded. * @return The timestamp format, or null if none matches. */ - public static TimestampMatch findFirstMatch(String text, int ignoreCandidates) { - return findFirstMatch(text, ignoreCandidates, null); + public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, TimeoutChecker timeoutChecker) { + return findFirstMatch(text, ignoreCandidates, null, timeoutChecker); } /** @@ -206,9 +209,10 @@ public static TimestampMatch findFirstMatch(String text, int ignoreCandidates) { * @param text The value that the returned timestamp format must exist within. * @param ignoreCandidates The number of candidate formats to exclude from the search. * @param requiredFormat A timestamp format that any returned match must support. + * @param timeoutChecker Will abort the operation if its timeout is exceeded. * @return The timestamp format, or null if none matches. */ - public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, String requiredFormat) { + public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, String requiredFormat, TimeoutChecker timeoutChecker) { if (ignoreCandidates >= ORDERED_CANDIDATE_FORMATS.size()) { return null; } @@ -229,7 +233,8 @@ public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, S } } if (quicklyRuledOut == false) { - Map captures = candidate.strictSearchGrok.captures(text); + Map captures = timeoutChecker.grokCaptures(candidate.strictSearchGrok, text, + "timestamp format determination"); if (captures != null) { String preface = captures.getOrDefault(PREFACE, "").toString(); String epilogue = captures.getOrDefault(EPILOGUE, "").toString(); @@ -246,20 +251,22 @@ public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, S /** * Find the best timestamp format for matching an entire field value. * @param text The value that the returned timestamp format must match in its entirety. + * @param timeoutChecker Will abort the operation if its timeout is exceeded. * @return The timestamp format, or null if none matches. */ - public static TimestampMatch findFirstFullMatch(String text) { - return findFirstFullMatch(text, 0); + public static TimestampMatch findFirstFullMatch(String text, TimeoutChecker timeoutChecker) { + return findFirstFullMatch(text, 0, timeoutChecker); } /** * Find the best timestamp format for matching an entire field value. * @param text The value that the returned timestamp format must match in its entirety. * @param requiredFormat A timestamp format that any returned match must support. + * @param timeoutChecker Will abort the operation if its timeout is exceeded. * @return The timestamp format, or null if none matches. */ - public static TimestampMatch findFirstFullMatch(String text, String requiredFormat) { - return findFirstFullMatch(text, 0, requiredFormat); + public static TimestampMatch findFirstFullMatch(String text, String requiredFormat, TimeoutChecker timeoutChecker) { + return findFirstFullMatch(text, 0, requiredFormat, timeoutChecker); } /** @@ -267,10 +274,11 @@ public static TimestampMatch findFirstFullMatch(String text, String requiredForm * excluding a specified number of candidate formats. * @param text The value that the returned timestamp format must match in its entirety. * @param ignoreCandidates The number of candidate formats to exclude from the search. + * @param timeoutChecker Will abort the operation if its timeout is exceeded. * @return The timestamp format, or null if none matches. */ - public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates) { - return findFirstFullMatch(text, ignoreCandidates, null); + public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates, TimeoutChecker timeoutChecker) { + return findFirstFullMatch(text, ignoreCandidates, null, timeoutChecker); } /** @@ -279,9 +287,11 @@ public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidate * @param text The value that the returned timestamp format must match in its entirety. * @param ignoreCandidates The number of candidate formats to exclude from the search. * @param requiredFormat A timestamp format that any returned match must support. + * @param timeoutChecker Will abort the operation if its timeout is exceeded. * @return The timestamp format, or null if none matches. */ - public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates, String requiredFormat) { + public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates, String requiredFormat, + TimeoutChecker timeoutChecker) { if (ignoreCandidates >= ORDERED_CANDIDATE_FORMATS.size()) { return null; } @@ -290,7 +300,8 @@ public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidate for (CandidateTimestampFormat candidate : ORDERED_CANDIDATE_FORMATS.subList(ignoreCandidates, ORDERED_CANDIDATE_FORMATS.size())) { if (adjustedRequiredFormat == null || candidate.jodaTimestampFormats.contains(adjustedRequiredFormat) || candidate.javaTimestampFormats.contains(adjustedRequiredFormat)) { - Map captures = candidate.strictFullMatchGrok.captures(text); + Map captures = timeoutChecker.grokCaptures(candidate.strictFullMatchGrok, text, + "timestamp format determination"); if (captures != null) { return makeTimestampMatch(candidate, index, "", text, ""); } @@ -540,8 +551,8 @@ static final class CandidateTimestampFormat { this.simplePattern = Pattern.compile(simpleRegex, Pattern.MULTILINE); // The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java this.strictSearchGrok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern + - "%{GREEDYDATA:" + EPILOGUE + "}"); - this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$"); + "%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog); + this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog); this.standardGrokPatternName = standardGrokPatternName; assert quickRuleOutIndices.stream() .noneMatch(quickRuleOutIndex -> quickRuleOutIndex < 0 || quickRuleOutIndex >= QUICK_RULE_OUT_PATTERNS.size()); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutCheckerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutCheckerTests.java index 125aab7e45ee7..b35f87ae97475 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutCheckerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutCheckerTests.java @@ -7,6 +7,7 @@ import org.elasticsearch.ElasticsearchTimeoutException; import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.grok.Grok; import org.junit.After; import org.junit.Before; @@ -57,4 +58,35 @@ public void testCheckTimeoutExceeded() throws Exception { }); } } + + public void testWatchdog() { + + assertFalse(Thread.interrupted()); + + TimeValue timeout = TimeValue.timeValueMillis(1); + try (TimeoutChecker timeoutChecker = new TimeoutChecker("watchdog test", timeout, scheduler)) { + + TimeoutChecker.watchdog.register(); + try { + expectThrows(InterruptedException.class, () -> Thread.sleep(10000)); + } finally { + TimeoutChecker.watchdog.unregister(); + } + } + } + + public void testGrokCaptures() { + + assertFalse(Thread.interrupted()); + Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog); + + TimeValue timeout = TimeValue.timeValueMillis(1); + try (TimeoutChecker timeoutChecker = new TimeoutChecker("grok captures test", timeout, scheduler)) { + + ElasticsearchTimeoutException e = expectThrows(ElasticsearchTimeoutException.class, + () -> timeoutChecker.grokCaptures(grok, randomAlphaOfLength(1000000), "should timeout")); + assertEquals("Aborting grok captures test during [should timeout] as it has taken longer than the timeout of [" + + timeout + "]", e.getMessage()); + } + } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java index 4b9b0b36c3426..6e256680eca55 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java @@ -17,10 +17,10 @@ public class TimestampFormatFinderTests extends FileStructureTestCase { public void testFindFirstMatchGivenNoMatch() { - assertNull(TimestampFormatFinder.findFirstMatch("")); - assertNull(TimestampFormatFinder.findFirstMatch("no timestamps in here")); - assertNull(TimestampFormatFinder.findFirstMatch(":::")); - assertNull(TimestampFormatFinder.findFirstMatch("/+")); + assertNull(TimestampFormatFinder.findFirstMatch("", NOOP_TIMEOUT_CHECKER)); + assertNull(TimestampFormatFinder.findFirstMatch("no timestamps in here", NOOP_TIMEOUT_CHECKER)); + assertNull(TimestampFormatFinder.findFirstMatch(":::", NOOP_TIMEOUT_CHECKER)); + assertNull(TimestampFormatFinder.findFirstMatch("/+", NOOP_TIMEOUT_CHECKER)); } public void testFindFirstMatchGivenOnlyIso8601() { @@ -132,23 +132,23 @@ public void testFindFirstMatchGivenOnlyKnownTimestampFormat() { public void testFindFirstMatchGivenOnlySystemDate() { assertEquals(new TimestampMatch(26, "", "UNIX_MS", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""), - TimestampFormatFinder.findFirstMatch("1526400896374")); + TimestampFormatFinder.findFirstMatch("1526400896374", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(26, "", "UNIX_MS", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""), - TimestampFormatFinder.findFirstFullMatch("1526400896374")); + TimestampFormatFinder.findFirstFullMatch("1526400896374", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(27, "", "UNIX", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""), - TimestampFormatFinder.findFirstMatch("1526400896.736")); + TimestampFormatFinder.findFirstMatch("1526400896.736", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(27, "", "UNIX", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""), - TimestampFormatFinder.findFirstFullMatch("1526400896.736")); + TimestampFormatFinder.findFirstFullMatch("1526400896.736", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(28, "", "UNIX", "UNIX", "\\b\\d{10}\\b", "POSINT", ""), - TimestampFormatFinder.findFirstMatch("1526400896")); + TimestampFormatFinder.findFirstMatch("1526400896", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(28, "", "UNIX", "UNIX", "\\b\\d{10}\\b", "POSINT", ""), - TimestampFormatFinder.findFirstFullMatch("1526400896")); + TimestampFormatFinder.findFirstFullMatch("1526400896", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(29, "", "TAI64N", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""), - TimestampFormatFinder.findFirstMatch("400000005afb159a164ac980")); + TimestampFormatFinder.findFirstMatch("400000005afb159a164ac980", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(29, "", "TAI64N", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""), - TimestampFormatFinder.findFirstFullMatch("400000005afb159a164ac980")); + TimestampFormatFinder.findFirstFullMatch("400000005afb159a164ac980", NOOP_TIMEOUT_CHECKER)); } public void testFindFirstMatchGivenRealLogMessages() { @@ -157,45 +157,47 @@ public void testFindFirstMatchGivenRealLogMessages() { "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", "][INFO ][o.e.e.NodeEnvironment ] [node-0] heap size [3.9gb], compressed ordinary object pointers [true]"), TimestampFormatFinder.findFirstMatch("[2018-05-11T17:07:29,553][INFO ][o.e.e.NodeEnvironment ] [node-0] " + - "heap size [3.9gb], compressed ordinary object pointers [true]")); + "heap size [3.9gb], compressed ordinary object pointers [true]", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(23, "192.168.62.101 - - [", "dd/MMM/YYYY:HH:mm:ss Z", "dd/MMM/yyyy:HH:mm:ss XX", "\\b\\d{2}/[A-Z]\\S{2}/\\d{4}:\\d{2}:\\d{2}:\\d{2} ", "HTTPDATE", "] \"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384"), TimestampFormatFinder.findFirstMatch("192.168.62.101 - - [29/Jun/2016:12:11:31 +0000] " + - "\"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384")); + "\"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(24, "", "MMM dd, YYYY h:mm:ss a", "MMM dd, yyyy h:mm:ss a", "\\b[A-Z]\\S{2,8} \\d{1,2}, \\d{4} \\d{1,2}:\\d{2}:\\d{2} [AP]M\\b", "CATALINA_DATESTAMP", " org.apache.tomcat.util.http.Parameters processParameters"), - TimestampFormatFinder.findFirstMatch("Aug 29, 2009 12:03:57 AM org.apache.tomcat.util.http.Parameters processParameters")); + TimestampFormatFinder.findFirstMatch("Aug 29, 2009 12:03:57 AM org.apache.tomcat.util.http.Parameters processParameters", + NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(22, "", Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"), Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"), "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP", " esxi1.acme.com Vpxa: " + "[3CB3FB90 verbose 'vpxavpxaInvtVm' opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed"), TimestampFormatFinder.findFirstMatch("Oct 19 17:04:44 esxi1.acme.com Vpxa: [3CB3FB90 verbose 'vpxavpxaInvtVm' " + - "opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed")); + "opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(10, "559550912540598297\t", "ISO8601", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", "\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp"), TimestampFormatFinder.findFirstMatch("559550912540598297\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t" + - "192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp")); + "192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(22, "", Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"), Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"), "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP", " dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving 'www.elastic.co/A/IN': 95.110.68.206#53"), TimestampFormatFinder.findFirstMatch("Sep 8 11:55:35 dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving " + - "'www.elastic.co/A/IN': 95.110.68.206#53")); + "'www.elastic.co/A/IN': 95.110.68.206#53", NOOP_TIMEOUT_CHECKER)); assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}", "TIMESTAMP_ISO8601", "|INFO |VirtualServer |1 |client 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client " + "'User1'(id:2) in channel '3er Instanz'(id:2)"), TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " + - " 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)")); + " 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)", + NOOP_TIMEOUT_CHECKER)); // Differs from the above as the required format is specified assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS", @@ -204,12 +206,12 @@ public void testFindFirstMatchGivenRealLogMessages() { "'User1'(id:2) in channel '3er Instanz'(id:2)"), TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " + " 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)", - randomFrom("YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS"))); + randomFrom("YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS"), NOOP_TIMEOUT_CHECKER)); // Non-matching required format specified assertNull(TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " + " 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)", - randomFrom("UNIX", "EEE MMM dd YYYY HH:mm zzz"))); + randomFrom("UNIX", "EEE MMM dd YYYY HH:mm zzz"), NOOP_TIMEOUT_CHECKER)); } public void testAdjustRequiredFormat() { @@ -246,18 +248,20 @@ public void testInterpretFractionalSeconds() { private void validateTimestampMatch(TimestampMatch expected, String text, long expectedEpochMs) { - assertEquals(expected, TimestampFormatFinder.findFirstMatch(text)); - assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text)); - assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, expected.candidateIndex)); - assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, expected.candidateIndex)); - assertNull(TimestampFormatFinder.findFirstMatch(text, Integer.MAX_VALUE)); - assertNull(TimestampFormatFinder.findFirstFullMatch(text, Integer.MAX_VALUE)); - assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.jodaTimestampFormats))); - assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.jodaTimestampFormats))); - assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.javaTimestampFormats))); - assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.javaTimestampFormats))); - assertNull(TimestampFormatFinder.findFirstMatch(text, "wrong format")); - assertNull(TimestampFormatFinder.findFirstFullMatch(text, "wrong format")); + assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, NOOP_TIMEOUT_CHECKER)); + assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, NOOP_TIMEOUT_CHECKER)); + assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, expected.candidateIndex, NOOP_TIMEOUT_CHECKER)); + assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, expected.candidateIndex, NOOP_TIMEOUT_CHECKER)); + assertNull(TimestampFormatFinder.findFirstMatch(text, Integer.MAX_VALUE, NOOP_TIMEOUT_CHECKER)); + assertNull(TimestampFormatFinder.findFirstFullMatch(text, Integer.MAX_VALUE, NOOP_TIMEOUT_CHECKER)); + assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.jodaTimestampFormats), NOOP_TIMEOUT_CHECKER)); + assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.jodaTimestampFormats), + NOOP_TIMEOUT_CHECKER)); + assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.javaTimestampFormats), NOOP_TIMEOUT_CHECKER)); + assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.javaTimestampFormats), + NOOP_TIMEOUT_CHECKER)); + assertNull(TimestampFormatFinder.findFirstMatch(text, "wrong format", NOOP_TIMEOUT_CHECKER)); + assertNull(TimestampFormatFinder.findFirstFullMatch(text, "wrong format", NOOP_TIMEOUT_CHECKER)); validateJodaTimestampFormats(expected.jodaTimestampFormats, text, expectedEpochMs); validateJavaTimestampFormats(expected.javaTimestampFormats, text, expectedEpochMs);