diff --git a/data-prepper-api/src/main/java/com/amazon/dataprepper/model/configuration/PluginSetting.java b/data-prepper-api/src/main/java/com/amazon/dataprepper/model/configuration/PluginSetting.java index 29a6469748..bf06294500 100644 --- a/data-prepper-api/src/main/java/com/amazon/dataprepper/model/configuration/PluginSetting.java +++ b/data-prepper-api/src/main/java/com/amazon/dataprepper/model/configuration/PluginSetting.java @@ -254,13 +254,13 @@ public Long getLongOrDefault(final String attribute, final long defaultValue) { throw new IllegalArgumentException(String.format(UNEXPECTED_ATTRIBUTE_TYPE_MSG, object.getClass(), attribute)); } - private void checkObjectType(String attribute, Object object, Class type) { + private void checkObjectType(final String attribute, final Object object, final Class type) { if (!(type.isAssignableFrom(object.getClass()))){ throw new IllegalArgumentException(String.format(UNEXPECTED_ATTRIBUTE_TYPE_MSG, object.getClass(), attribute)); } } - private void checkObjectForListType(String attribute, Object object, Class type) { + private void checkObjectForListType(final String attribute, final Object object, final Class type) { checkObjectType(attribute, object, List.class); ((List) object).forEach(o -> { diff --git a/data-prepper-plugins/grok-prepper/README.md b/data-prepper-plugins/grok-prepper/README.md index 8b13789179..6158588ae0 100644 --- a/data-prepper-plugins/grok-prepper/README.md +++ b/data-prepper-plugins/grok-prepper/README.md @@ -1 +1,38 @@ +# Grok Prepper +This is a prepper that takes unstructured data and utilizes pattern matching +to structure and extract important fields for easier and more insightful aggregation and analysis. + +## Usages + +Example `.yaml` configuration + +``` +prepper: + - grok: + match: + message: [ "%{COMMONAPACHELOG}" ] +``` + +## Configuration + +* `match` (Optional): A `Map>` that specifies which fields of a Record to match which patterns against. Default value is `{}` + +* `keep_empty_captures` (Optional): A `boolean` that specifies whether `null` captures should be kept. Default value is `false` + +* `named_captures_only` (Optional): A `boolean` that specifies whether to only keep named captures. Default value is `true` + +## Notes on Patterns + +The Grok Prepper uses the [java-grok Library](https://github.com/thekrakken/java-grok) internally and supports all java-grok Library compatible patterns. + +[Default Patterns](https://github.com/thekrakken/java-grok/blob/master/src/main/resources/patterns/patterns) + +## Metrics + +TBD + +## Developer Guide +This plugin is compatible with Java 14. See +- [CONTRIBUTING](https://github.com/opensearch-project/data-prepper/blob/main/CONTRIBUTING.md) +- [monitoring](https://github.com/opensearch-project/data-prepper/blob/main/docs/readme/monitoring.md) diff --git a/data-prepper-plugins/grok-prepper/build.gradle b/data-prepper-plugins/grok-prepper/build.gradle index 331ed1e609..cca8964dc8 100644 --- a/data-prepper-plugins/grok-prepper/build.gradle +++ b/data-prepper-plugins/grok-prepper/build.gradle @@ -18,6 +18,7 @@ dependencies { implementation "io.krakens:java-grok:0.1.9" implementation 'com.fasterxml.jackson.core:jackson-databind' testImplementation "org.hamcrest:hamcrest:2.2" + testImplementation "org.mockito:mockito-inline:${versionMap.mockito}" } jacocoTestCoverageVerification { diff --git a/data-prepper-plugins/grok-prepper/src/main/java/com/amazon/dataprepper/plugins/prepper/grok/GrokPrepper.java b/data-prepper-plugins/grok-prepper/src/main/java/com/amazon/dataprepper/plugins/prepper/grok/GrokPrepper.java index 569fcc1564..029056f67d 100644 --- a/data-prepper-plugins/grok-prepper/src/main/java/com/amazon/dataprepper/plugins/prepper/grok/GrokPrepper.java +++ b/data-prepper-plugins/grok-prepper/src/main/java/com/amazon/dataprepper/plugins/prepper/grok/GrokPrepper.java @@ -19,22 +19,45 @@ import com.amazon.dataprepper.model.record.Record; import com.amazon.dataprepper.model.prepper.AbstractPrepper; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.core.type.TypeReference; +import io.krakens.grok.api.Grok; +import io.krakens.grok.api.GrokCompiler; +import io.krakens.grok.api.Match; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; @DataPrepperPlugin(name = "grok", type = PluginType.PREPPER) public class GrokPrepper extends AbstractPrepper, Record> { private static final Logger LOG = LoggerFactory.getLogger(GrokPrepper.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final TypeReference> MAP_TYPE_REFERENCE = new TypeReference>() {}; + + private final GrokCompiler grokCompiler; + private final Map> fieldToGrok; private final GrokPrepperConfig grokPrepperConfig; public GrokPrepper(final PluginSetting pluginSetting) { super(pluginSetting); grokPrepperConfig = GrokPrepperConfig.buildConfig(pluginSetting); + grokCompiler = GrokCompiler.newInstance(); + fieldToGrok = new LinkedHashMap<>(); + + registerPatterns(); + compileMatchPatterns(); } /** @@ -43,10 +66,35 @@ public GrokPrepper(final PluginSetting pluginSetting) { * * @param records Input records that will be modified/processed * @return Record modified output records - */ + */ @Override - public Collection> doExecute(Collection> records) { - return records; + public Collection> doExecute(final Collection> records) { + final List> recordsOut = new LinkedList<>(); + + for (final Record record : records) { + try { + final Map recordMap = OBJECT_MAPPER.readValue(record.getData(), MAP_TYPE_REFERENCE); + + for (final Map.Entry> entry : fieldToGrok.entrySet()) { + for (final Grok grok : entry.getValue()) { + if (recordMap.containsKey(entry.getKey())) { + final Match match = grok.match(recordMap.get(entry.getKey()).toString()); + match.setKeepEmptyCaptures(grokPrepperConfig.isKeepEmptyCaptures()); + + mergeCaptures(recordMap, match.capture()); + } + } + } + + final Record grokkedRecord = new Record<>(OBJECT_MAPPER.writeValueAsString(recordMap), record.getMetadata()); + recordsOut.add(grokkedRecord); + + } catch (JsonProcessingException e) { + LOG.error("Failed to parse the record [{}]", record.getData()); + recordsOut.add(record); + } + } + return recordsOut; } @Override @@ -63,4 +111,42 @@ public boolean isReadyForShutdown() { public void shutdown() { } -} + + private void registerPatterns() { + grokCompiler.registerDefaultPatterns(); + } + + private void compileMatchPatterns() { + for (final Map.Entry> entry : grokPrepperConfig.getMatch().entrySet()) { + fieldToGrok.put(entry.getKey(), entry.getValue() + .stream() + .map(item -> grokCompiler.compile(item, grokPrepperConfig.isNamedCapturesOnly())) + .collect(Collectors.toList())); + } + } + + private void mergeCaptures(final Map original, final Map updates) { + for (final Map.Entry updateEntry : updates.entrySet()) { + if (!(original.containsKey(updateEntry.getKey()))) { + original.put(updateEntry.getKey(), updateEntry.getValue()); + continue; + } + + if (original.get(updateEntry.getKey()) instanceof List) { + mergeValueWithValues(updateEntry.getValue(), (List) original.get(updateEntry.getKey())); + } else { + final List values = new ArrayList<>(Collections.singletonList(original.get(updateEntry.getKey()))); + mergeValueWithValues(updateEntry.getValue(), values); + original.put(updateEntry.getKey(), values); + } + } + } + + private void mergeValueWithValues(final Object value, final List values) { + if (value instanceof List) { + values.addAll((List) value); + } else { + values.add(value); + } + } +} \ No newline at end of file diff --git a/data-prepper-plugins/grok-prepper/src/test/java/com/amazon/dataprepper/plugins/prepper/grok/GrokPrepperIT.java b/data-prepper-plugins/grok-prepper/src/test/java/com/amazon/dataprepper/plugins/prepper/grok/GrokPrepperIT.java new file mode 100644 index 0000000000..63866d524e --- /dev/null +++ b/data-prepper-plugins/grok-prepper/src/test/java/com/amazon/dataprepper/plugins/prepper/grok/GrokPrepperIT.java @@ -0,0 +1,311 @@ +package com.amazon.dataprepper.plugins.prepper.grok; + +import com.amazon.dataprepper.model.configuration.PluginSetting; +import com.amazon.dataprepper.model.record.Record; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.notNullValue; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class GrokPrepperIT { + private PluginSetting pluginSetting; + private GrokPrepper grokPrepper; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final TypeReference> MAP_TYPE_REFERENCE = new TypeReference>() {}; + private final String PLUGIN_NAME = "grok"; + private String messageInput; + + @BeforeEach + public void setup() { + + pluginSetting = completePluginSettingForGrokPrepper( + GrokPrepperConfig.DEFAULT_BREAK_ON_MATCH, + GrokPrepperConfig.DEFAULT_KEEP_EMPTY_CAPTURES, + Collections.emptyMap(), + GrokPrepperConfig.DEFAULT_NAMED_CAPTURES_ONLY, + Collections.emptyList(), + Collections.emptyList(), + GrokPrepperConfig.DEFAULT_PATTERNS_FILES_GLOB, + Collections.emptyMap(), + GrokPrepperConfig.DEFAULT_TIMEOUT_MILLIS, + GrokPrepperConfig.TARGET); + + pluginSetting.setPipelineName("grokPipeline"); + + // This is a COMMONAPACHELOG pattern with the following format + // COMMONAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-) + // Note that rawrequest is missing from the log below, which means that it will not be captured unless keep_empty_captures is true + messageInput = "\"127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] \\\"GET /apache_pb.gif HTTP/1.0\\\" 200 2326\""; + } + + @AfterEach + public void tearDown() { + grokPrepper.shutdown(); + } + + private PluginSetting completePluginSettingForGrokPrepper(final boolean breakOnMatch, + final boolean keepEmptyCaptures, + final Map> match, + final boolean namedCapturesOnly, + final List overwrite, + final List patternsDir, + final String patternsFilesGlob, + final Map patternDefinitions, + final int timeoutMillis, + final String target) { + final Map settings = new HashMap<>(); + settings.put(GrokPrepperConfig.BREAK_ON_MATCH, breakOnMatch); + settings.put(GrokPrepperConfig.NAMED_CAPTURES_ONLY, namedCapturesOnly); + settings.put(GrokPrepperConfig.MATCH, match); + settings.put(GrokPrepperConfig.KEEP_EMPTY_CAPTURES, keepEmptyCaptures); + settings.put(GrokPrepperConfig.OVERWRITE, overwrite); + settings.put(GrokPrepperConfig.PATTERNS_DIR, patternsDir); + settings.put(GrokPrepperConfig.PATTERN_DEFINITIONS, patternDefinitions); + settings.put(GrokPrepperConfig.PATTERNS_FILES_GLOB, patternsFilesGlob); + settings.put(GrokPrepperConfig.TIMEOUT_MILLIS, timeoutMillis); + settings.put(GrokPrepperConfig.TARGET, target); + + return new PluginSetting(PLUGIN_NAME, settings); + } + + @Test + public void testMatchNoCaptures() throws JsonProcessingException { + final String nonMatchingPattern = "%{SYSLOGBASE}"; + final Map> matchConfig = new HashMap<>(); + matchConfig.put("message", Collections.singletonList(nonMatchingPattern)); + + pluginSetting.getSettings().put(GrokPrepperConfig.MATCH, matchConfig); + grokPrepper = new GrokPrepper(pluginSetting); + + String testData = "{\"message\":" + messageInput + "}"; + Record record = new Record<>(testData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), record), equalTo(true)); + } + + @Test + public void testSingleMatchSinglePatternWithDefaults() throws JsonProcessingException { + final Map> matchConfig = new HashMap<>(); + matchConfig.put("message", Collections.singletonList("%{COMMONAPACHELOG}")); + + pluginSetting.getSettings().put(GrokPrepperConfig.MATCH, matchConfig); + grokPrepper = new GrokPrepper(pluginSetting); + + String testData = "{\"message\":" + messageInput + "}"; + Record record = new Record<>(testData); + + String resultData = "{\"message\":" + messageInput + "," + .concat("\"clientip\":\"127.0.0.1\",") + .concat("\"ident\":\"user-identifier\",") + .concat("\"auth\":\"frank\",") + .concat("\"timestamp\":\"10/Oct/2000:13:55:36 -0700\",") + .concat("\"verb\":\"GET\",") + .concat("\"request\":\"/apache_pb.gif\",") + .concat("\"httpversion\":\"1.0\",") + .concat("\"response\":\"200\",") + .concat("\"bytes\":\"2326\"}"); + + Record resultRecord = new Record<>(resultData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), resultRecord), equalTo(true)); + } + + @Test + public void testSingleMatchMultiplePatternWithDefaults() throws JsonProcessingException { + final Map> matchConfig = new HashMap<>(); + final List patternsToMatchMessage = new ArrayList<>(); + patternsToMatchMessage.add("%{COMMONAPACHELOG}"); + patternsToMatchMessage.add("%{IPORHOST:custom_client_field}"); + + matchConfig.put("message", patternsToMatchMessage); + + pluginSetting.getSettings().put(GrokPrepperConfig.MATCH, matchConfig); + grokPrepper = new GrokPrepper(pluginSetting); + + String testData = "{\"message\":" + messageInput + "}"; + Record record = new Record<>(testData); + + String resultData = "{\"message\":" + messageInput + "," + .concat("\"clientip\":\"127.0.0.1\",") + .concat("\"ident\":\"user-identifier\",") + .concat("\"auth\":\"frank\",") + .concat("\"timestamp\":\"10/Oct/2000:13:55:36 -0700\",") + .concat("\"verb\":\"GET\",") + .concat("\"request\":\"/apache_pb.gif\",") + .concat("\"httpversion\":\"1.0\",") + .concat("\"response\":\"200\",") + .concat("\"custom_client_field\":\"127.0.0.1\",") + .concat("\"bytes\":\"2326\"}"); + + Record resultRecord = new Record<>(resultData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), resultRecord), equalTo(true)); + } + + @Test + public void testSingleMatchTypeConversionWithDefaults() throws JsonProcessingException { + final Map> matchConfig = new HashMap<>(); + matchConfig.put("message", Collections.singletonList("\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})\" %{NUMBER:response:int} (?:%{NUMBER:bytes:float}|-)")); + + pluginSetting.getSettings().put(GrokPrepperConfig.MATCH, matchConfig); + grokPrepper = new GrokPrepper(pluginSetting); + + String testData = "{\"message\":" + messageInput + "}"; + Record record = new Record<>(testData); + + String resultData = "{\"message\":" + messageInput + "," + .concat("\"verb\":\"GET\",") + .concat("\"request\":\"/apache_pb.gif\",") + .concat("\"httpversion\":\"1.0\",") + .concat("\"response\":200,") + .concat("\"bytes\":2326.0}"); + + Record resultRecord = new Record<>(resultData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), resultRecord), equalTo(true)); + } + + @Test + public void testMultipleMatchWithDefaults() throws JsonProcessingException { + final Map> matchConfig = new HashMap<>(); + matchConfig.put("message", Collections.singletonList("%{COMMONAPACHELOG}")); + matchConfig.put("extra_field", Collections.singletonList("%{GREEDYDATA} %{IPORHOST:host}")); + + pluginSetting.getSettings().put(GrokPrepperConfig.MATCH, matchConfig); + grokPrepper = new GrokPrepper(pluginSetting); + + String testData = "{\"message\":" + messageInput + "," + .concat("\"extra_field\":\"My host IP is 192.0.2.1\"}"); + + Record record = new Record<>(testData); + + String resultData = "{\"message\":" + messageInput + "," + .concat("\"extra_field\":\"My host IP is 192.0.2.1\",") + .concat("\"clientip\":\"127.0.0.1\",") + .concat("\"ident\":\"user-identifier\",") + .concat("\"auth\":\"frank\",") + .concat("\"timestamp\":\"10/Oct/2000:13:55:36 -0700\",") + .concat("\"verb\":\"GET\",") + .concat("\"request\":\"/apache_pb.gif\",") + .concat("\"httpversion\":\"1.0\",") + .concat("\"response\":\"200\",") + .concat("\"bytes\":\"2326\",") + .concat("\"host\":\"192.0.2.1\"}"); + + Record resultRecord = new Record<>(resultData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), resultRecord), equalTo(true)); + } + + @Test + public void testMatchWithKeepEmptyCapturesTrue() throws JsonProcessingException { + final Map> matchConfig = new HashMap<>(); + matchConfig.put("message", Collections.singletonList("%{COMMONAPACHELOG}")); + + pluginSetting.getSettings().put(GrokPrepperConfig.MATCH, matchConfig); + pluginSetting.getSettings().put(GrokPrepperConfig.KEEP_EMPTY_CAPTURES, true); + grokPrepper = new GrokPrepper(pluginSetting); + + String testData = "{\"message\":" + messageInput + "}"; + Record record = new Record<>(testData); + + String resultData = "{\"message\":" + messageInput + "," + .concat("\"clientip\":\"127.0.0.1\",") + .concat("\"ident\":\"user-identifier\",") + .concat("\"auth\":\"frank\",") + .concat("\"timestamp\":\"10/Oct/2000:13:55:36 -0700\",") + .concat("\"verb\":\"GET\",") + .concat("\"request\":\"/apache_pb.gif\",") + .concat("\"rawrequest\":null,") + .concat("\"httpversion\":\"1.0\",") + .concat("\"response\":\"200\",") + .concat("\"bytes\":\"2326\"}"); + + Record resultRecord = new Record<>(resultData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), resultRecord), equalTo(true)); + } + + @Test + public void testMatchWithNamedCapturesOnlyFalse() throws JsonProcessingException { + final Map> matchConfig = new HashMap<>(); + matchConfig.put("message", Collections.singletonList("%{GREEDYDATA} %{IPORHOST:host} %{NUMBER}")); + + pluginSetting.getSettings().put(GrokPrepperConfig.MATCH, matchConfig); + pluginSetting.getSettings().put(GrokPrepperConfig.NAMED_CAPTURES_ONLY, false); + grokPrepper = new GrokPrepper(pluginSetting); + + String testData = "{\"message\":\"This is my greedy data before matching 192.0.2.1 123456\"}"; + Record record = new Record<>(testData); + + String resultData = "{\"message\":\"This is my greedy data before matching 192.0.2.1 123456\"," + .concat("\"NUMBER\":\"123456\",") + .concat("\"GREEDYDATA\":\"This is my greedy data before matching\",") + .concat("\"host\":\"192.0.2.1\"}"); + + Record resultRecord = new Record<>(resultData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), resultRecord), equalTo(true)); + } + + @Test + public void testCompileNonRegisteredPattern() { + + grokPrepper = new GrokPrepper(pluginSetting); + + final Map> matchConfig = new HashMap<>(); + matchConfig.put("message", Collections.singletonList("%{NONEXISTENTPATTERN}")); + + pluginSetting.getSettings().put(GrokPrepperConfig.MATCH, matchConfig); + + assertThrows(IllegalArgumentException.class, () -> new GrokPrepper(pluginSetting)); + } + + private boolean equalRecords(final Record first, final Record second) throws JsonProcessingException { + final Map recordMapFirst = OBJECT_MAPPER.readValue(first.getData(), MAP_TYPE_REFERENCE); + final Map recordMapSecond = OBJECT_MAPPER.readValue(second.getData(), MAP_TYPE_REFERENCE); + + return recordMapFirst.equals(recordMapSecond); + } +} diff --git a/data-prepper-plugins/grok-prepper/src/test/java/com/amazon/dataprepper/plugins/prepper/grok/GrokPrepperTests.java b/data-prepper-plugins/grok-prepper/src/test/java/com/amazon/dataprepper/plugins/prepper/grok/GrokPrepperTests.java index e320381278..e257e34655 100644 --- a/data-prepper-plugins/grok-prepper/src/test/java/com/amazon/dataprepper/plugins/prepper/grok/GrokPrepperTests.java +++ b/data-prepper-plugins/grok-prepper/src/test/java/com/amazon/dataprepper/plugins/prepper/grok/GrokPrepperTests.java @@ -11,35 +11,91 @@ package com.amazon.dataprepper.plugins.prepper.grok; -import com.amazon.dataprepper.model.record.Record; import com.amazon.dataprepper.model.configuration.PluginSetting; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.krakens.grok.api.Grok; +import io.krakens.grok.api.GrokCompiler; +import io.krakens.grok.api.Match; +import com.amazon.dataprepper.model.record.Record; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.MockedStatic; import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.Mock; +import org.junit.jupiter.api.Test; +import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.UUID; +import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.when; + @ExtendWith(MockitoExtension.class) public class GrokPrepperTests { - PluginSetting pluginSetting; + private PluginSetting pluginSetting; private GrokPrepper grokPrepper; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final TypeReference> MAP_TYPE_REFERENCE = new TypeReference>() {}; + private String messageInput; + + @Mock + private GrokCompiler grokCompiler; + + @Mock + private Match match; + + @Mock + private Grok grok; + + private final String PLUGIN_NAME = "grok"; + private Map capture; @BeforeEach public void setup() { - pluginSetting = new PluginSetting( - "grok", - null - ); + final Map> matchConfig = new HashMap<>(); + matchConfig.put("message", Collections.singletonList("%{COMMONAPACHELOG}")); + + pluginSetting = completePluginSettingForGrokPrepper( + GrokPrepperConfig.DEFAULT_BREAK_ON_MATCH, + GrokPrepperConfig.DEFAULT_KEEP_EMPTY_CAPTURES, + matchConfig, + GrokPrepperConfig.DEFAULT_NAMED_CAPTURES_ONLY, + Collections.emptyList(), + Collections.emptyList(), + GrokPrepperConfig.DEFAULT_PATTERNS_FILES_GLOB, + Collections.emptyMap(), + GrokPrepperConfig.DEFAULT_TIMEOUT_MILLIS, + GrokPrepperConfig.TARGET); + pluginSetting.setPipelineName("grokPipeline"); - grokPrepper = new GrokPrepper(pluginSetting); + + try (MockedStatic grokCompilerMockedStatic = mockStatic(GrokCompiler.class)) { + grokCompilerMockedStatic.when(GrokCompiler::newInstance).thenReturn(grokCompiler); + when(grokCompiler.compile(eq(matchConfig.get("message").get(0)), anyBoolean())).thenReturn(grok); + grokPrepper = new GrokPrepper(pluginSetting); + } + + capture = new HashMap<>(); + + messageInput = UUID.randomUUID().toString(); + + when(grok.match(messageInput)).thenReturn(match); + when(match.capture()).thenReturn(capture); } @AfterEach @@ -48,13 +104,178 @@ public void tearDown() { } @Test - public void testGrokPrepper() { - String testData = "{\"message\": \"127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] BEF25A72965 \\\"GET /apache_pb.gif HTTP/1.0\\\" 200 2326\"}"; + public void testNoCaptures() throws JsonProcessingException { + String testData = "{\"message\":" + "\"" + messageInput + "\"}"; Record record = new Record<>(testData); List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); assertThat(grokkedRecords.size(), equalTo(1)); - assertThat(grokkedRecords.get(0), equalTo(record)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), record), equalTo(true)); + } + + @Test + public void testMatchMerge() throws JsonProcessingException { + capture.put("field_capture_1", "value_capture_1"); + capture.put("field_capture_2", "value_capture_2"); + capture.put("field_capture_3", "value_capture_3"); + + String testData = "{\"message\":" + "\"" + messageInput + "\"}"; + Record record = new Record<>(testData); + + String resultData = "{\"message\":" + "\"" + messageInput + "\"," + .concat("\"field_capture_1\":\"value_capture_1\",") + .concat("\"field_capture_2\":\"value_capture_2\",") + .concat("\"field_capture_3\":\"value_capture_3\"}"); + + Record resultRecord = new Record<>(resultData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), resultRecord), equalTo(true)); + } + + @Test + public void testMatchMergeCollisionStrings() throws JsonProcessingException { + capture.put("field_capture_1", "value_capture_1"); + capture.put("field_capture_2", "value_capture_2"); + capture.put("field_capture_3", "value_capture_3"); + + String testData = "{\"message\":" + "\"" + messageInput + "\"," + .concat("\"field_capture_1\":\"value_capture_collision\"}"); + + Record record = new Record<>(testData); + + String resultData = "{\"message\":" + "\"" + messageInput + "\"," + .concat("\"field_capture_1\":[\"value_capture_collision\",") + .concat("\"value_capture_1\"],") + .concat("\"field_capture_2\":\"value_capture_2\",") + .concat("\"field_capture_3\":\"value_capture_3\"}"); + + Record resultRecord = new Record<>(resultData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), resultRecord), equalTo(true)); + } + + @Test + public void testMatchMergeCollisionInts() throws JsonProcessingException { + capture.put("field_capture_1", 20); + capture.put("field_capture_2", "value_capture_2"); + capture.put("field_capture_3", "value_capture_3"); + + String testData = "{\"message\":" + "\"" + messageInput + "\"," + .concat("\"field_capture_1\": 10}"); + + Record record = new Record<>(testData); + + String resultData = "{\"message\":" + "\"" + messageInput + "\"," + .concat("\"field_capture_1\":[ 10,") + .concat("20 ],") + .concat("\"field_capture_2\":\"value_capture_2\",") + .concat("\"field_capture_3\":\"value_capture_3\"}"); + + Record resultRecord = new Record<>(resultData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), resultRecord), equalTo(true)); + } + + @Test + public void testMatchMergeCollisionWithListMixedTypes() throws JsonProcessingException { + List captureListValues = new ArrayList<>(); + captureListValues.add("30"); + captureListValues.add(40); + captureListValues.add(null); + + capture.put("field_capture_1", captureListValues); + capture.put("field_capture_2", "value_capture_2"); + capture.put("field_capture_3", "value_capture_3"); + + String testData = "{\"message\":" + "\"" + messageInput + "\"," + .concat("\"field_capture_1\":[10,\"20\"]}"); + + Record record = new Record<>(testData); + + String resultData = "{\"message\":" + "\"" + messageInput + "\"," + .concat("\"field_capture_1\":[10,") + .concat("\"20\",\"30\",40,null],") + .concat("\"field_capture_2\":\"value_capture_2\",") + .concat("\"field_capture_3\":\"value_capture_3\"}"); + + Record resultRecord = new Record<>(resultData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), resultRecord), equalTo(true)); + } + + @Test + public void testMatchMergeCollisionWithNullValue() throws JsonProcessingException { + capture.put("field_capture_1", "value_capture_1"); + capture.put("field_capture_2", "value_capture_2"); + capture.put("field_capture_3", "value_capture_3"); + + String testData = "{\"message\":" + "\"" + messageInput + "\"," + .concat("\"field_capture_1\":null}"); + + Record record = new Record<>(testData); + + String resultData = "{\"message\":" + "\"" + messageInput + "\"," + .concat("\"field_capture_1\":[null,") + .concat("\"value_capture_1\"],") + .concat("\"field_capture_2\":\"value_capture_2\",") + .concat("\"field_capture_3\":\"value_capture_3\"}"); + + Record resultRecord = new Record<>(resultData); + + List> grokkedRecords = (List>) grokPrepper.doExecute(Collections.singletonList(record)); + + assertThat(grokkedRecords.size(), equalTo(1)); + assertThat(grokkedRecords.get(0), notNullValue()); + assertThat(equalRecords(grokkedRecords.get(0), resultRecord), equalTo(true)); + } + + private PluginSetting completePluginSettingForGrokPrepper(final boolean breakOnMatch, + final boolean keepEmptyCaptures, + final Map> match, + final boolean namedCapturesOnly, + final List overwrite, + final List patternsDir, + final String patternsFilesGlob, + final Map patternDefinitions, + final int timeoutMillis, + final String target) { + final Map settings = new HashMap<>(); + settings.put(GrokPrepperConfig.BREAK_ON_MATCH, breakOnMatch); + settings.put(GrokPrepperConfig.NAMED_CAPTURES_ONLY, namedCapturesOnly); + settings.put(GrokPrepperConfig.MATCH, match); + settings.put(GrokPrepperConfig.KEEP_EMPTY_CAPTURES, keepEmptyCaptures); + settings.put(GrokPrepperConfig.OVERWRITE, overwrite); + settings.put(GrokPrepperConfig.PATTERNS_DIR, patternsDir); + settings.put(GrokPrepperConfig.PATTERN_DEFINITIONS, patternDefinitions); + settings.put(GrokPrepperConfig.PATTERNS_FILES_GLOB, patternsFilesGlob); + settings.put(GrokPrepperConfig.TIMEOUT_MILLIS, timeoutMillis); + settings.put(GrokPrepperConfig.TARGET, target); + + return new PluginSetting(PLUGIN_NAME, settings); + } + + private boolean equalRecords(final Record first, final Record second) throws JsonProcessingException { + final Map recordMapFirst = OBJECT_MAPPER.readValue(first.getData(), MAP_TYPE_REFERENCE); + final Map recordMapSecond = OBJECT_MAPPER.readValue(second.getData(), MAP_TYPE_REFERENCE); + + return recordMapFirst.equals(recordMapSecond); } }