From 3966a6a0a18cfe9d0437e33618a103ae940df8e8 Mon Sep 17 00:00:00 2001 From: Eng Zer Jun Date: Sat, 10 Aug 2024 01:09:53 +0800 Subject: [PATCH] lib/regex: use string version of regexp methods to reduce allocs (#1614) Both `(*Regexp).Match` and `(*Regexp).FindAllSubmatchIndex` have string-based equivalents: `(*Regexp).MatchString` and `(*Regexp).FindAllStringSubmatchIndex`. We should use the string version to avoid unnecessary `[]byte` conversions. Benchmark: var regex = regexp.MustCompile("foo.*") func BenchmarkMatch(b *testing.B) { for i := 0; i < b.N; i++ { if match := regex.Match([]byte("foo bar baz")); !match { b.Fail() } } } func BenchmarkMatchString(b *testing.B) { for i := 0; i < b.N; i++ { if match := regex.MatchString("foo bar baz"); !match { b.Fail() } } } func BenchmarkFindAllSubmatchIndex(b *testing.B) { for i := 0; i < b.N; i++ { if match := regex.FindAllSubmatchIndex([]byte("foo bar baz"), -1); len(match) == 0 { b.Fail() } } } func BenchmarkFindAllStringSubmatchIndex(b *testing.B) { for i := 0; i < b.N; i++ { if match := regex.FindAllStringSubmatchIndex("foo bar baz", -1); len(match) == 0 { b.Fail() } } } goos: linux goarch: amd64 pkg: github.com/johnkerl/miller/pkg/lib cpu: AMD Ryzen 7 PRO 4750U with Radeon Graphics BenchmarkMatch-16 2198350 517.5 ns/op 16 B/op 1 allocs/op BenchmarkMatchString-16 3143605 371.5 ns/op 0 B/op 0 allocs/op BenchmarkFindAllSubmatchIndex-16 921711 1199 ns/op 273 B/op 3 allocs/op BenchmarkFindAllStringSubmatchIndex-16 1212321 981.0 ns/op 257 B/op 2 allocs/op PASS coverage: 0.0% of statements ok github.com/johnkerl/miller/pkg/lib 6.576s Signed-off-by: Eng Zer Jun --- pkg/lib/regex.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/lib/regex.go b/pkg/lib/regex.go index af0a188059..d8d5511674 100644 --- a/pkg/lib/regex.go +++ b/pkg/lib/regex.go @@ -202,7 +202,7 @@ func regexCompiledSubOrGsub( replacementCaptureMatrix [][]int, breakOnFirst bool, ) string { - matrix := regex.FindAllSubmatchIndex([]byte(input), -1) + matrix := regex.FindAllStringSubmatchIndex(input, -1) if matrix == nil || len(matrix) == 0 { return input } @@ -290,7 +290,7 @@ func RegexCompiledMatchSimple( input string, regex *regexp.Regexp, ) bool { - return regex.Match([]byte(input)) + return regex.MatchString(input) } // RegexStringMatchWithMapResults implements much of the `strmatchx` DSL function. This returns @@ -320,7 +320,7 @@ func RegexCompiledMatchWithMapResults( starts := make([]int, 0, 10) ends := make([]int, 0, 10) - matrix := regex.FindAllSubmatchIndex([]byte(input), -1) + matrix := regex.FindAllStringSubmatchIndex(input, -1) if matrix == nil || len(matrix) == 0 { return false, captures, starts, ends } @@ -406,7 +406,7 @@ func RegexCompiledMatchWithCaptures( input string, regex *regexp.Regexp, ) (bool, []string) { - matrix := regex.FindAllSubmatchIndex([]byte(input), -1) + matrix := regex.FindAllStringSubmatchIndex(input, -1) if matrix == nil || len(matrix) == 0 { // Set all captures to "" return false, make([]string, 10) @@ -474,7 +474,7 @@ func ReplacementHasCaptures( matrix [][]int, ) { if captureDetector.MatchString(replacement) { - return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1) + return true, captureSplitter.FindAllStringSubmatchIndex(replacement, -1) } else { return false, nil }