Skip to content

Commit

Permalink
lib/regex: use string version of regexp methods to reduce allocs (#1614)
Browse files Browse the repository at this point in the history
Both `(*Regexp).Match` and `(*Regexp).FindAllSubmatchIndex` have
string-based equivalents: `(*Regexp).MatchString` and
`(*Regexp).FindAllStringSubmatchIndex`. We should use the string version
to avoid unnecessary `[]byte` conversions.

Benchmark:

var regex = regexp.MustCompile("foo.*")

func BenchmarkMatch(b *testing.B) {
	for i := 0; i < b.N; i++ {
		if match := regex.Match([]byte("foo bar baz")); !match {
			b.Fail()
		}
	}
}

func BenchmarkMatchString(b *testing.B) {
	for i := 0; i < b.N; i++ {
		if match := regex.MatchString("foo bar baz"); !match {
			b.Fail()
		}
	}
}

func BenchmarkFindAllSubmatchIndex(b *testing.B) {
	for i := 0; i < b.N; i++ {
		if match := regex.FindAllSubmatchIndex([]byte("foo bar baz"), -1); len(match) == 0 {
			b.Fail()
		}
	}
}

func BenchmarkFindAllStringSubmatchIndex(b *testing.B) {
	for i := 0; i < b.N; i++ {
		if match := regex.FindAllStringSubmatchIndex("foo bar baz", -1); len(match) == 0 {
			b.Fail()
		}
	}
}

goos: linux
goarch: amd64
pkg: github.com/johnkerl/miller/pkg/lib
cpu: AMD Ryzen 7 PRO 4750U with Radeon Graphics
BenchmarkMatch-16                         	 2198350	       517.5 ns/op	      16 B/op	       1 allocs/op
BenchmarkMatchString-16                   	 3143605	       371.5 ns/op	       0 B/op	       0 allocs/op
BenchmarkFindAllSubmatchIndex-16          	  921711	      1199 ns/op	     273 B/op	       3 allocs/op
BenchmarkFindAllStringSubmatchIndex-16    	 1212321	       981.0 ns/op	     257 B/op	       2 allocs/op
PASS
coverage: 0.0% of statements
ok  	github.com/johnkerl/miller/pkg/lib	6.576s

Signed-off-by: Eng Zer Jun <[email protected]>
  • Loading branch information
Juneezee authored Aug 9, 2024
1 parent dfe1ca1 commit 3966a6a
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions pkg/lib/regex.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ func regexCompiledSubOrGsub(
replacementCaptureMatrix [][]int,
breakOnFirst bool,
) string {
matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
matrix := regex.FindAllStringSubmatchIndex(input, -1)
if matrix == nil || len(matrix) == 0 {
return input
}
Expand Down Expand Up @@ -290,7 +290,7 @@ func RegexCompiledMatchSimple(
input string,
regex *regexp.Regexp,
) bool {
return regex.Match([]byte(input))
return regex.MatchString(input)
}

// RegexStringMatchWithMapResults implements much of the `strmatchx` DSL function. This returns
Expand Down Expand Up @@ -320,7 +320,7 @@ func RegexCompiledMatchWithMapResults(
starts := make([]int, 0, 10)
ends := make([]int, 0, 10)

matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
matrix := regex.FindAllStringSubmatchIndex(input, -1)
if matrix == nil || len(matrix) == 0 {
return false, captures, starts, ends
}
Expand Down Expand Up @@ -406,7 +406,7 @@ func RegexCompiledMatchWithCaptures(
input string,
regex *regexp.Regexp,
) (bool, []string) {
matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
matrix := regex.FindAllStringSubmatchIndex(input, -1)
if matrix == nil || len(matrix) == 0 {
// Set all captures to ""
return false, make([]string, 10)
Expand Down Expand Up @@ -474,7 +474,7 @@ func ReplacementHasCaptures(
matrix [][]int,
) {
if captureDetector.MatchString(replacement) {
return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1)
return true, captureSplitter.FindAllStringSubmatchIndex(replacement, -1)
} else {
return false, nil
}
Expand Down

0 comments on commit 3966a6a

Please sign in to comment.