Skip to content

Commit

Permalink
FastRegexMatcher: use stack memory for lowercase copy of string
Browse files Browse the repository at this point in the history
Saves garbage, runs faster.
For prefixes, only `toLower` the part we need for the map lookup.

Signed-off-by: Bryan Boreham <[email protected]>
  • Loading branch information
bboreham committed Oct 24, 2024
1 parent 2182b83 commit b9fab24
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 11 deletions.
32 changes: 23 additions & 9 deletions model/labels/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,8 @@ type equalMultiStringMapMatcher struct {

func (m *equalMultiStringMapMatcher) add(s string) {
if !m.caseSensitive {
s = toNormalisedLower(s)
var a [256]byte
s = toNormalisedLower(s, a[:])
}

m.values[s] = struct{}{}
Expand Down Expand Up @@ -840,15 +841,23 @@ func (m *equalMultiStringMapMatcher) setMatches() []string {
}

func (m *equalMultiStringMapMatcher) Matches(s string) bool {
if !m.caseSensitive {
s = toNormalisedLower(s)
var a [256]byte
if len(m.values) > 0 {
sNorm := s
if !m.caseSensitive {
sNorm = toNormalisedLower(s, a[:])
}
if _, ok := m.values[sNorm]; ok {
return true
}
}

if _, ok := m.values[s]; ok {
return true
}
if m.minPrefixLen > 0 && len(s) >= m.minPrefixLen {
for _, matcher := range m.prefixes[s[:m.minPrefixLen]] {
prefix := s[:m.minPrefixLen]
if !m.caseSensitive {
prefix = toNormalisedLower(s[:m.minPrefixLen], a[:])
}
for _, matcher := range m.prefixes[prefix] {
if matcher.Matches(s) {
return true
}
Expand All @@ -859,7 +868,7 @@ func (m *equalMultiStringMapMatcher) Matches(s string) bool {

// toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert
// it to lower case.
func toNormalisedLower(s string) string {
func toNormalisedLower(s string, a []byte) string {
var buf []byte
for i := 0; i < len(s); i++ {
c := s[i]
Expand All @@ -868,7 +877,12 @@ func toNormalisedLower(s string) string {
}
if 'A' <= c && c <= 'Z' {
if buf == nil {
buf = []byte(s)
if cap(a) > len(s) {
buf = a[:len(s)]
} else {
buf = make([]byte, len(s))
}
copy(buf, s)
}
buf[i] = c + 'a' - 'A'
}
Expand Down
5 changes: 3 additions & 2 deletions model/labels/regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,8 @@ func BenchmarkToNormalizedLower(b *testing.B) {
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
toNormalisedLower(inputs[n%len(inputs)])
var a [256]byte
toNormalisedLower(inputs[n%len(inputs)], a[:])
}
})
}
Expand Down Expand Up @@ -1390,6 +1391,6 @@ func TestToNormalisedLower(t *testing.T) {
"ſſAſſa": "ssassa",
}
for input, expectedOutput := range testCases {
require.Equal(t, expectedOutput, toNormalisedLower(input))
require.Equal(t, expectedOutput, toNormalisedLower(input, nil))
}
}

0 comments on commit b9fab24

Please sign in to comment.