diff --git a/data-prepper-logstash-configuration/build.gradle b/data-prepper-logstash-configuration/build.gradle index 4a56a8d682..6e7143ae1e 100644 --- a/data-prepper-logstash-configuration/build.gradle +++ b/data-prepper-logstash-configuration/build.gradle @@ -13,6 +13,7 @@ dependencies { implementation project(':data-prepper-api') implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml' implementation 'com.fasterxml.jackson.core:jackson-databind' + implementation "org.apache.commons:commons-lang3:3.12.0" testImplementation 'org.slf4j:slf4j-simple:1.7.32' testImplementation 'org.hamcrest:hamcrest:2.2' testImplementation "org.mockito:mockito-inline:${versionMap.mockito}" diff --git a/data-prepper-logstash-configuration/src/main/java/org/opensearch/dataprepper/logstash/mapping/GrokNamedCapturesUtil.java b/data-prepper-logstash-configuration/src/main/java/org/opensearch/dataprepper/logstash/mapping/GrokNamedCapturesUtil.java new file mode 100644 index 0000000000..562c54d145 --- /dev/null +++ b/data-prepper-logstash-configuration/src/main/java/org/opensearch/dataprepper/logstash/mapping/GrokNamedCapturesUtil.java @@ -0,0 +1,55 @@ +package org.opensearch.dataprepper.logstash.mapping; + + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +class GrokNamedCapturesUtil { + + private static final String namedCapturesRegex = "\\(\\?\\<(.+?)\\>(.+?)\\)"; + private static final Pattern namedCapturesPattern = Pattern.compile(namedCapturesRegex); + private static final int PATTERN_NAME_LENGTH = 8; + + static GrokNamedCapturesPair convertRegexNamedCapturesToGrokPatternDefinitions(String regexPattern) { + Objects.requireNonNull(regexPattern); + final Matcher matcher = namedCapturesPattern.matcher(regexPattern); + final Map mappedPatternDefinitions = new LinkedHashMap<>(); + while (matcher.find()) { + final String patternRegex = matcher.group(2); + final String captureName = matcher.group(1); + final String patternName = GrokNamedCapturesUtil.generateRandomPatternName(); + final String replacementPattern = String.format("%%{%s:%s}", patternName, captureName); + regexPattern = StringUtils.replaceOnce(regexPattern, matcher.group(0), replacementPattern); + mappedPatternDefinitions.put(patternName, patternRegex); + } + return new GrokNamedCapturesPair(regexPattern, mappedPatternDefinitions); + } + + private static String generateRandomPatternName() { + return RandomStringUtils.random(PATTERN_NAME_LENGTH, true, true); + } + + static class GrokNamedCapturesPair { + private final String mappedRegex; + private final Map mappedPatternDefinitions; + + public GrokNamedCapturesPair(final String mappedRegex, final Map mappedPatternDefinitions) { + this.mappedRegex = mappedRegex; + this.mappedPatternDefinitions = mappedPatternDefinitions; + } + + public String getMappedRegex() { + return mappedRegex; + } + + public Map getMappedPatternDefinitions() { + return mappedPatternDefinitions; + } + } +} diff --git a/data-prepper-logstash-configuration/src/test/java/org/opensearch/dataprepper/logstash/mapping/GrokNamedCapturesUtilTest.java b/data-prepper-logstash-configuration/src/test/java/org/opensearch/dataprepper/logstash/mapping/GrokNamedCapturesUtilTest.java new file mode 100644 index 0000000000..f29aa46dc4 --- /dev/null +++ b/data-prepper-logstash-configuration/src/test/java/org/opensearch/dataprepper/logstash/mapping/GrokNamedCapturesUtilTest.java @@ -0,0 +1,125 @@ +package org.opensearch.dataprepper.logstash.mapping; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.opensearch.dataprepper.logstash.mapping.GrokNamedCapturesUtil.GrokNamedCapturesPair; + +public class GrokNamedCapturesUtilTest { + private final String randomPrefix = UUID.randomUUID().toString(); + private final String randomSuffix = UUID.randomUUID().toString(); + private final String randomMiddle = UUID.randomUUID().toString(); + + private final List namedCapturesPatterns = new ArrayList<>(); + private final String firstNamedCapturesPattern = UUID.randomUUID().toString(); + private final String secondNamedCapturesPattern = UUID.randomUUID().toString(); + + private final List namedCapturesNames = new ArrayList<>(); + private final String firstNamedCapturesName = UUID.randomUUID().toString(); + private final String secondNamedCapturesName = UUID.randomUUID().toString(); + + + @BeforeEach + public void setup() { + namedCapturesNames.add(firstNamedCapturesName); + namedCapturesNames.add(secondNamedCapturesName); + + namedCapturesPatterns.add(firstNamedCapturesPattern); + namedCapturesPatterns.add(secondNamedCapturesPattern); + } + + @Test + public void testSingleNamedCaptures() { + final String namedCapturesPattern = UUID.randomUUID().toString(); + final String namedCapturesName = UUID.randomUUID().toString(); + final String regex = String.format("(?<%s>%s)", namedCapturesName, namedCapturesPattern); + final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex); + + + assertThat(result.getMappedPatternDefinitions().size(), equalTo(1)); + + for (final Map.Entry patternDefinition : result.getMappedPatternDefinitions().entrySet()) { + assertThat(patternDefinition.getValue().equals(namedCapturesPattern), equalTo(true)); + final String expectedResult = String.format("%%{%s:%s}", patternDefinition.getKey(), namedCapturesName); + assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true)); + } + } + + @Test + public void testConnectedNamedCaptures() { + final String regex = String.format("%s(?<%s>%s)(?<%s>%s)", + randomPrefix, firstNamedCapturesName, firstNamedCapturesPattern, + secondNamedCapturesName, secondNamedCapturesPattern); + + final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex); + assertThat(result.getMappedPatternDefinitions().size(), equalTo(2)); + + int index = 0; + final List patternDefinitionNames = new ArrayList<>(); + for (final Map.Entry patternDefinition : result.getMappedPatternDefinitions().entrySet()) { + assertThat(patternDefinition.getValue().equals(namedCapturesPatterns.get(index)), equalTo(true)); + patternDefinitionNames.add(patternDefinition.getKey()); + index++; + } + final String expectedResult = String.format("%s%%{%s:%s}%%{%s:%s}", randomPrefix, patternDefinitionNames.get(0), namedCapturesNames.get(0), patternDefinitionNames.get(1), namedCapturesNames.get(1)); + assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true)); + } + + @Test + public void testSeparatedNamedCaptures() { + final String regex = String.format("%s(?<%s>%s) %s (?<%s>%s)%s", + randomPrefix, firstNamedCapturesName, firstNamedCapturesPattern, randomMiddle, + secondNamedCapturesName, secondNamedCapturesPattern, randomSuffix); + + final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex); + assertThat(result.getMappedPatternDefinitions().size(), equalTo(2)); + + int index = 0; + final List patternDefinitionNames = new ArrayList<>(); + for (final Map.Entry patternDefinition : result.getMappedPatternDefinitions().entrySet()) { + assertThat(patternDefinition.getValue().equals(namedCapturesPatterns.get(index)), equalTo(true)); + patternDefinitionNames.add(patternDefinition.getKey()); + index++; + } + final String expectedResult = String.format("%s%%{%s:%s} %s %%{%s:%s}%s", randomPrefix, patternDefinitionNames.get(0), namedCapturesNames.get(0), randomMiddle, + patternDefinitionNames.get(1), namedCapturesNames.get(1), randomSuffix); + assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true)); + } + + @Test + public void testNoNamedCapturesKeepsSameRegex() { + final String regex = String.format("%s %s", UUID.randomUUID().toString(), UUID.randomUUID().toString()); + final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex); + assertThat(result.getMappedPatternDefinitions().size(), equalTo(0)); + assertThat(result.getMappedRegex().equals(regex), equalTo(true)); + } + + @Test + public void testDuplicateNamedCaptures() { + final String namedCapturesName = UUID.randomUUID().toString(); + final String namedCapturesPattern = UUID.randomUUID().toString(); + + final String regex = String.format("%s(?<%s>%s) %s (?<%s>%s)%s", + randomPrefix, namedCapturesName, namedCapturesPattern, randomMiddle, + namedCapturesName, namedCapturesPattern, randomSuffix); + + final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex); + assertThat(result.getMappedPatternDefinitions().size(), equalTo(2)); + + final List patternDefinitionNames = new ArrayList<>(); + for (final Map.Entry patternDefinition : result.getMappedPatternDefinitions().entrySet()) { + assertThat(patternDefinition.getValue().equals(namedCapturesPattern), equalTo(true)); + patternDefinitionNames.add(patternDefinition.getKey()); + } + final String expectedResult = String.format("%s%%{%s:%s} %s %%{%s:%s}%s", randomPrefix, patternDefinitionNames.get(0), namedCapturesName, randomMiddle, + patternDefinitionNames.get(1), namedCapturesName, randomSuffix); + assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true)); + } +}