Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Pattern.programSize() and Matcher.programSize() #180

Merged
merged 2 commits into from
Jan 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions java/com/google/re2j/Matcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ public final class Matcher {
// The number of submatches (groups) in the pattern.
private final int groupCount;

// The number of instructions in the pattern.
private final int numberOfInstructions;

private MatcherInput matcherInput;

// The input length in UTF16 codes.
Expand Down Expand Up @@ -77,6 +80,7 @@ private Matcher(Pattern pattern) {
groupCount = re2.numberOfCapturingGroups();
groups = new int[2 + 2 * groupCount];
namedGroups = re2.namedGroups;
numberOfInstructions = re2.numberOfInstructions();
}

/** Creates a new {@code Matcher} with the given pattern and input. */
Expand Down Expand Up @@ -209,6 +213,20 @@ public int end(String group) {
return end(g);
}

/**
* Returns the program size of this pattern.
*
* <p>
* Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
* "cost". Larger numbers are more expensive than smaller numbers.
* </p>
*
* @return the program size of this pattern
*/
public int programSize() {
return numberOfInstructions;
}

/**
* Returns the most recent match.
*
Expand Down
14 changes: 14 additions & 0 deletions java/com/google/re2j/Pattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,20 @@ public String toString() {
return pattern;
}

/**
* Returns the program size of this pattern.
*
* <p>
* Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
* "cost". Larger numbers are more expensive than smaller numbers.
* </p>
*
* @return the program size of this pattern
*/
public int programSize() {
return re2.numberOfInstructions();
}

/**
* Returns the number of capturing groups in this matcher's pattern. Group zero denotes the entire
* pattern and is excluded from this count.
Expand Down
7 changes: 7 additions & 0 deletions java/com/google/re2j/RE2.java
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,13 @@ int numberOfCapturingGroups() {
return numSubexp;
}

/**
* Returns the number of instructions in this compiled regular expression program.
*/
int numberOfInstructions() {
return prog.numInst();
}

// get() returns a machine to use for matching |this|. It uses |this|'s
// machine cache if possible, to avoid unnecessary allocation.
Machine get() {
Expand Down
21 changes: 21 additions & 0 deletions javatests/com/google/re2j/ApiTestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,27 @@ public static void testGroupCount(String pattern, int count) {
assertEquals(count, mj.groupCount());
}

// Tests that both RE2 Patterns and Matchers give the same groupCount.
public static void testProgramSize(String pattern, int expectedSize) {
Pattern p = Pattern.compile(pattern);

String input = "foo";
byte[] inputBytes = getUtf8Bytes(input);
Matcher m1 = p.matcher(input);
Matcher m2 = p.matcher(inputBytes);

Truth.assertWithMessage("Pattern(\"%s\") program size", p)
.that(p.programSize())
.isEqualTo(expectedSize);
Truth.assertWithMessage("Matcher(\"%s\", \"%s\") program size", m1.pattern(), input)
.that(m1.programSize())
.isEqualTo(expectedSize);
Truth.assertWithMessage(
"Matcher(\"%s\", %s) program size", m2.pattern(), Arrays.toString(inputBytes))
.that(m2.programSize())
.isEqualTo(expectedSize);
}

public static void testGroup(String text, String regexp, String[] output) {
// RE2
Pattern p = Pattern.compile(regexp);
Expand Down
17 changes: 17 additions & 0 deletions javatests/com/google/re2j/MatcherTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,25 @@ public void testReplaceFirst() {
ApiTestUtils.testReplaceFirst("aab", "a*?", "<$0>", "<>aab");
}

@Test
public void testProgramSize() {
// It is a simple delegation, but still test it.
// More test cases are covered in PatternTest#testProgramSize.
Pattern pattern = Pattern.compile("go+d");
int programSize = pattern.programSize();
Truth.assertWithMessage("Pattern program size").that(programSize).isGreaterThan(1);
Truth.assertWithMessage("Positive matcher program size")
.that(pattern.matcher("good").programSize())
.isEqualTo(programSize);
Truth.assertWithMessage("Negative matcher program size")
.that(pattern.matcher("bad").programSize())
.isEqualTo(programSize);
}

@Test
public void testGroupCount() {
// It is a simple delegation, but still test it.
// More test cases are covered in PatternTest#testGroupCount.
ApiTestUtils.testGroupCount("(a)(b(c))d?(e)", 4);
}

Expand Down
14 changes: 13 additions & 1 deletion javatests/com/google/re2j/PatternTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,21 @@ public void testSplit() {
ApiTestUtils.testSplit(":", ":a::b", new String[] {"", "a", "", "b"});
}

@Test
public void testProgramSize() {
ApiTestUtils.testProgramSize("", 3);
ApiTestUtils.testProgramSize("a", 3);
ApiTestUtils.testProgramSize("^", 3);
ApiTestUtils.testProgramSize("^$", 4);
ApiTestUtils.testProgramSize("a+b", 5);
ApiTestUtils.testProgramSize("a+b?", 6);
ApiTestUtils.testProgramSize("(a+b)", 7);
ApiTestUtils.testProgramSize("a+b.*", 7);
ApiTestUtils.testProgramSize("(a+b?)", 8);
}

@Test
public void testGroupCount() {
// It is a simple delegation, but still test it.
ApiTestUtils.testGroupCount("(.*)ab(.*)a", 2);
ApiTestUtils.testGroupCount("(.*)(ab)(.*)a", 3);
ApiTestUtils.testGroupCount("(.*)((a)b)(.*)a", 4);
Expand Down
Loading