-
Notifications
You must be signed in to change notification settings - Fork 210
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Named captures conversion to grok pattern_definitions format (#586)
Added GrokNamedCapturesUtil Converter to change to pattern_definitions format Signed-off-by: Taylor Gray <[email protected]>
- Loading branch information
1 parent
42c5954
commit 816a8bd
Showing
3 changed files
with
181 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
55 changes: 55 additions & 0 deletions
55
...tion/src/main/java/org/opensearch/dataprepper/logstash/mapping/GrokNamedCapturesUtil.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package org.opensearch.dataprepper.logstash.mapping; | ||
|
||
|
||
import org.apache.commons.lang3.RandomStringUtils; | ||
import org.apache.commons.lang3.StringUtils; | ||
|
||
import java.util.LinkedHashMap; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
class GrokNamedCapturesUtil { | ||
|
||
private static final String namedCapturesRegex = "\\(\\?\\<(.+?)\\>(.+?)\\)"; | ||
private static final Pattern namedCapturesPattern = Pattern.compile(namedCapturesRegex); | ||
private static final int PATTERN_NAME_LENGTH = 8; | ||
|
||
static GrokNamedCapturesPair convertRegexNamedCapturesToGrokPatternDefinitions(String regexPattern) { | ||
Objects.requireNonNull(regexPattern); | ||
final Matcher matcher = namedCapturesPattern.matcher(regexPattern); | ||
final Map<String, String> mappedPatternDefinitions = new LinkedHashMap<>(); | ||
while (matcher.find()) { | ||
final String patternRegex = matcher.group(2); | ||
final String captureName = matcher.group(1); | ||
final String patternName = GrokNamedCapturesUtil.generateRandomPatternName(); | ||
final String replacementPattern = String.format("%%{%s:%s}", patternName, captureName); | ||
regexPattern = StringUtils.replaceOnce(regexPattern, matcher.group(0), replacementPattern); | ||
mappedPatternDefinitions.put(patternName, patternRegex); | ||
} | ||
return new GrokNamedCapturesPair(regexPattern, mappedPatternDefinitions); | ||
} | ||
|
||
private static String generateRandomPatternName() { | ||
return RandomStringUtils.random(PATTERN_NAME_LENGTH, true, true); | ||
} | ||
|
||
static class GrokNamedCapturesPair { | ||
private final String mappedRegex; | ||
private final Map<String, String> mappedPatternDefinitions; | ||
|
||
public GrokNamedCapturesPair(final String mappedRegex, final Map<String, String> mappedPatternDefinitions) { | ||
this.mappedRegex = mappedRegex; | ||
this.mappedPatternDefinitions = mappedPatternDefinitions; | ||
} | ||
|
||
public String getMappedRegex() { | ||
return mappedRegex; | ||
} | ||
|
||
public Map<String, String> getMappedPatternDefinitions() { | ||
return mappedPatternDefinitions; | ||
} | ||
} | ||
} |
125 changes: 125 additions & 0 deletions
125
.../src/test/java/org/opensearch/dataprepper/logstash/mapping/GrokNamedCapturesUtilTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
package org.opensearch.dataprepper.logstash.mapping; | ||
|
||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.UUID; | ||
|
||
import static org.hamcrest.MatcherAssert.assertThat; | ||
import static org.hamcrest.CoreMatchers.equalTo; | ||
import static org.opensearch.dataprepper.logstash.mapping.GrokNamedCapturesUtil.GrokNamedCapturesPair; | ||
|
||
public class GrokNamedCapturesUtilTest { | ||
private final String randomPrefix = UUID.randomUUID().toString(); | ||
private final String randomSuffix = UUID.randomUUID().toString(); | ||
private final String randomMiddle = UUID.randomUUID().toString(); | ||
|
||
private final List<String> namedCapturesPatterns = new ArrayList<>(); | ||
private final String firstNamedCapturesPattern = UUID.randomUUID().toString(); | ||
private final String secondNamedCapturesPattern = UUID.randomUUID().toString(); | ||
|
||
private final List<String> namedCapturesNames = new ArrayList<>(); | ||
private final String firstNamedCapturesName = UUID.randomUUID().toString(); | ||
private final String secondNamedCapturesName = UUID.randomUUID().toString(); | ||
|
||
|
||
@BeforeEach | ||
public void setup() { | ||
namedCapturesNames.add(firstNamedCapturesName); | ||
namedCapturesNames.add(secondNamedCapturesName); | ||
|
||
namedCapturesPatterns.add(firstNamedCapturesPattern); | ||
namedCapturesPatterns.add(secondNamedCapturesPattern); | ||
} | ||
|
||
@Test | ||
public void testSingleNamedCaptures() { | ||
final String namedCapturesPattern = UUID.randomUUID().toString(); | ||
final String namedCapturesName = UUID.randomUUID().toString(); | ||
final String regex = String.format("(?<%s>%s)", namedCapturesName, namedCapturesPattern); | ||
final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex); | ||
|
||
|
||
assertThat(result.getMappedPatternDefinitions().size(), equalTo(1)); | ||
|
||
for (final Map.Entry<String, String> patternDefinition : result.getMappedPatternDefinitions().entrySet()) { | ||
assertThat(patternDefinition.getValue().equals(namedCapturesPattern), equalTo(true)); | ||
final String expectedResult = String.format("%%{%s:%s}", patternDefinition.getKey(), namedCapturesName); | ||
assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true)); | ||
} | ||
} | ||
|
||
@Test | ||
public void testConnectedNamedCaptures() { | ||
final String regex = String.format("%s(?<%s>%s)(?<%s>%s)", | ||
randomPrefix, firstNamedCapturesName, firstNamedCapturesPattern, | ||
secondNamedCapturesName, secondNamedCapturesPattern); | ||
|
||
final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex); | ||
assertThat(result.getMappedPatternDefinitions().size(), equalTo(2)); | ||
|
||
int index = 0; | ||
final List<String> patternDefinitionNames = new ArrayList<>(); | ||
for (final Map.Entry<String, String> patternDefinition : result.getMappedPatternDefinitions().entrySet()) { | ||
assertThat(patternDefinition.getValue().equals(namedCapturesPatterns.get(index)), equalTo(true)); | ||
patternDefinitionNames.add(patternDefinition.getKey()); | ||
index++; | ||
} | ||
final String expectedResult = String.format("%s%%{%s:%s}%%{%s:%s}", randomPrefix, patternDefinitionNames.get(0), namedCapturesNames.get(0), patternDefinitionNames.get(1), namedCapturesNames.get(1)); | ||
assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true)); | ||
} | ||
|
||
@Test | ||
public void testSeparatedNamedCaptures() { | ||
final String regex = String.format("%s(?<%s>%s) %s (?<%s>%s)%s", | ||
randomPrefix, firstNamedCapturesName, firstNamedCapturesPattern, randomMiddle, | ||
secondNamedCapturesName, secondNamedCapturesPattern, randomSuffix); | ||
|
||
final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex); | ||
assertThat(result.getMappedPatternDefinitions().size(), equalTo(2)); | ||
|
||
int index = 0; | ||
final List<String> patternDefinitionNames = new ArrayList<>(); | ||
for (final Map.Entry<String, String> patternDefinition : result.getMappedPatternDefinitions().entrySet()) { | ||
assertThat(patternDefinition.getValue().equals(namedCapturesPatterns.get(index)), equalTo(true)); | ||
patternDefinitionNames.add(patternDefinition.getKey()); | ||
index++; | ||
} | ||
final String expectedResult = String.format("%s%%{%s:%s} %s %%{%s:%s}%s", randomPrefix, patternDefinitionNames.get(0), namedCapturesNames.get(0), randomMiddle, | ||
patternDefinitionNames.get(1), namedCapturesNames.get(1), randomSuffix); | ||
assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true)); | ||
} | ||
|
||
@Test | ||
public void testNoNamedCapturesKeepsSameRegex() { | ||
final String regex = String.format("%s %s", UUID.randomUUID().toString(), UUID.randomUUID().toString()); | ||
final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex); | ||
assertThat(result.getMappedPatternDefinitions().size(), equalTo(0)); | ||
assertThat(result.getMappedRegex().equals(regex), equalTo(true)); | ||
} | ||
|
||
@Test | ||
public void testDuplicateNamedCaptures() { | ||
final String namedCapturesName = UUID.randomUUID().toString(); | ||
final String namedCapturesPattern = UUID.randomUUID().toString(); | ||
|
||
final String regex = String.format("%s(?<%s>%s) %s (?<%s>%s)%s", | ||
randomPrefix, namedCapturesName, namedCapturesPattern, randomMiddle, | ||
namedCapturesName, namedCapturesPattern, randomSuffix); | ||
|
||
final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex); | ||
assertThat(result.getMappedPatternDefinitions().size(), equalTo(2)); | ||
|
||
final List<String> patternDefinitionNames = new ArrayList<>(); | ||
for (final Map.Entry<String, String> patternDefinition : result.getMappedPatternDefinitions().entrySet()) { | ||
assertThat(patternDefinition.getValue().equals(namedCapturesPattern), equalTo(true)); | ||
patternDefinitionNames.add(patternDefinition.getKey()); | ||
} | ||
final String expectedResult = String.format("%s%%{%s:%s} %s %%{%s:%s}%s", randomPrefix, patternDefinitionNames.get(0), namedCapturesName, randomMiddle, | ||
patternDefinitionNames.get(1), namedCapturesName, randomSuffix); | ||
assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true)); | ||
} | ||
} |