Skip to content

Commit

Permalink
Named captures conversion to grok pattern_definitions format (#586)
Browse files Browse the repository at this point in the history
Added GrokNamedCapturesUtil Converter to change to pattern_definitions format

Signed-off-by: Taylor Gray <[email protected]>
  • Loading branch information
graytaylor0 authored Nov 13, 2021
1 parent 42c5954 commit 816a8bd
Show file tree
Hide file tree
Showing 3 changed files with 181 additions and 0 deletions.
1 change: 1 addition & 0 deletions data-prepper-logstash-configuration/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies {
implementation project(':data-prepper-api')
implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml'
implementation 'com.fasterxml.jackson.core:jackson-databind'
implementation "org.apache.commons:commons-lang3:3.12.0"
testImplementation 'org.slf4j:slf4j-simple:1.7.32'
testImplementation 'org.hamcrest:hamcrest:2.2'
testImplementation "org.mockito:mockito-inline:${versionMap.mockito}"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package org.opensearch.dataprepper.logstash.mapping;


import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.StringUtils;

import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

class GrokNamedCapturesUtil {

private static final String namedCapturesRegex = "\\(\\?\\<(.+?)\\>(.+?)\\)";
private static final Pattern namedCapturesPattern = Pattern.compile(namedCapturesRegex);
private static final int PATTERN_NAME_LENGTH = 8;

static GrokNamedCapturesPair convertRegexNamedCapturesToGrokPatternDefinitions(String regexPattern) {
Objects.requireNonNull(regexPattern);
final Matcher matcher = namedCapturesPattern.matcher(regexPattern);
final Map<String, String> mappedPatternDefinitions = new LinkedHashMap<>();
while (matcher.find()) {
final String patternRegex = matcher.group(2);
final String captureName = matcher.group(1);
final String patternName = GrokNamedCapturesUtil.generateRandomPatternName();
final String replacementPattern = String.format("%%{%s:%s}", patternName, captureName);
regexPattern = StringUtils.replaceOnce(regexPattern, matcher.group(0), replacementPattern);
mappedPatternDefinitions.put(patternName, patternRegex);
}
return new GrokNamedCapturesPair(regexPattern, mappedPatternDefinitions);
}

private static String generateRandomPatternName() {
return RandomStringUtils.random(PATTERN_NAME_LENGTH, true, true);
}

static class GrokNamedCapturesPair {
private final String mappedRegex;
private final Map<String, String> mappedPatternDefinitions;

public GrokNamedCapturesPair(final String mappedRegex, final Map<String, String> mappedPatternDefinitions) {
this.mappedRegex = mappedRegex;
this.mappedPatternDefinitions = mappedPatternDefinitions;
}

public String getMappedRegex() {
return mappedRegex;
}

public Map<String, String> getMappedPatternDefinitions() {
return mappedPatternDefinitions;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package org.opensearch.dataprepper.logstash.mapping;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.UUID;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.opensearch.dataprepper.logstash.mapping.GrokNamedCapturesUtil.GrokNamedCapturesPair;

public class GrokNamedCapturesUtilTest {
private final String randomPrefix = UUID.randomUUID().toString();
private final String randomSuffix = UUID.randomUUID().toString();
private final String randomMiddle = UUID.randomUUID().toString();

private final List<String> namedCapturesPatterns = new ArrayList<>();
private final String firstNamedCapturesPattern = UUID.randomUUID().toString();
private final String secondNamedCapturesPattern = UUID.randomUUID().toString();

private final List<String> namedCapturesNames = new ArrayList<>();
private final String firstNamedCapturesName = UUID.randomUUID().toString();
private final String secondNamedCapturesName = UUID.randomUUID().toString();


@BeforeEach
public void setup() {
namedCapturesNames.add(firstNamedCapturesName);
namedCapturesNames.add(secondNamedCapturesName);

namedCapturesPatterns.add(firstNamedCapturesPattern);
namedCapturesPatterns.add(secondNamedCapturesPattern);
}

@Test
public void testSingleNamedCaptures() {
final String namedCapturesPattern = UUID.randomUUID().toString();
final String namedCapturesName = UUID.randomUUID().toString();
final String regex = String.format("(?<%s>%s)", namedCapturesName, namedCapturesPattern);
final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex);


assertThat(result.getMappedPatternDefinitions().size(), equalTo(1));

for (final Map.Entry<String, String> patternDefinition : result.getMappedPatternDefinitions().entrySet()) {
assertThat(patternDefinition.getValue().equals(namedCapturesPattern), equalTo(true));
final String expectedResult = String.format("%%{%s:%s}", patternDefinition.getKey(), namedCapturesName);
assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true));
}
}

@Test
public void testConnectedNamedCaptures() {
final String regex = String.format("%s(?<%s>%s)(?<%s>%s)",
randomPrefix, firstNamedCapturesName, firstNamedCapturesPattern,
secondNamedCapturesName, secondNamedCapturesPattern);

final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex);
assertThat(result.getMappedPatternDefinitions().size(), equalTo(2));

int index = 0;
final List<String> patternDefinitionNames = new ArrayList<>();
for (final Map.Entry<String, String> patternDefinition : result.getMappedPatternDefinitions().entrySet()) {
assertThat(patternDefinition.getValue().equals(namedCapturesPatterns.get(index)), equalTo(true));
patternDefinitionNames.add(patternDefinition.getKey());
index++;
}
final String expectedResult = String.format("%s%%{%s:%s}%%{%s:%s}", randomPrefix, patternDefinitionNames.get(0), namedCapturesNames.get(0), patternDefinitionNames.get(1), namedCapturesNames.get(1));
assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true));
}

@Test
public void testSeparatedNamedCaptures() {
final String regex = String.format("%s(?<%s>%s) %s (?<%s>%s)%s",
randomPrefix, firstNamedCapturesName, firstNamedCapturesPattern, randomMiddle,
secondNamedCapturesName, secondNamedCapturesPattern, randomSuffix);

final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex);
assertThat(result.getMappedPatternDefinitions().size(), equalTo(2));

int index = 0;
final List<String> patternDefinitionNames = new ArrayList<>();
for (final Map.Entry<String, String> patternDefinition : result.getMappedPatternDefinitions().entrySet()) {
assertThat(patternDefinition.getValue().equals(namedCapturesPatterns.get(index)), equalTo(true));
patternDefinitionNames.add(patternDefinition.getKey());
index++;
}
final String expectedResult = String.format("%s%%{%s:%s} %s %%{%s:%s}%s", randomPrefix, patternDefinitionNames.get(0), namedCapturesNames.get(0), randomMiddle,
patternDefinitionNames.get(1), namedCapturesNames.get(1), randomSuffix);
assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true));
}

@Test
public void testNoNamedCapturesKeepsSameRegex() {
final String regex = String.format("%s %s", UUID.randomUUID().toString(), UUID.randomUUID().toString());
final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex);
assertThat(result.getMappedPatternDefinitions().size(), equalTo(0));
assertThat(result.getMappedRegex().equals(regex), equalTo(true));
}

@Test
public void testDuplicateNamedCaptures() {
final String namedCapturesName = UUID.randomUUID().toString();
final String namedCapturesPattern = UUID.randomUUID().toString();

final String regex = String.format("%s(?<%s>%s) %s (?<%s>%s)%s",
randomPrefix, namedCapturesName, namedCapturesPattern, randomMiddle,
namedCapturesName, namedCapturesPattern, randomSuffix);

final GrokNamedCapturesPair result = GrokNamedCapturesUtil.convertRegexNamedCapturesToGrokPatternDefinitions(regex);
assertThat(result.getMappedPatternDefinitions().size(), equalTo(2));

final List<String> patternDefinitionNames = new ArrayList<>();
for (final Map.Entry<String, String> patternDefinition : result.getMappedPatternDefinitions().entrySet()) {
assertThat(patternDefinition.getValue().equals(namedCapturesPattern), equalTo(true));
patternDefinitionNames.add(patternDefinition.getKey());
}
final String expectedResult = String.format("%s%%{%s:%s} %s %%{%s:%s}%s", randomPrefix, patternDefinitionNames.get(0), namedCapturesName, randomMiddle,
patternDefinitionNames.get(1), namedCapturesName, randomSuffix);
assertThat(result.getMappedRegex().equals(expectedResult), equalTo(true));
}
}

0 comments on commit 816a8bd

Please sign in to comment.