-
-
Notifications
You must be signed in to change notification settings - Fork 219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore: adds memoize implementation for regexes and ahocorasick #836
Changes from 5 commits
18a78cb
23382ac
4dae331
be2fc24
b8df0c2
73be18c
d43a91f
62043be
3ed990b
8d52562
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
"github.com/corazawaf/coraza/v3/experimental/plugins/macro" | ||
"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" | ||
"github.com/corazawaf/coraza/v3/internal/corazarules" | ||
"github.com/corazawaf/coraza/v3/internal/memoize" | ||
"github.com/corazawaf/coraza/v3/types" | ||
"github.com/corazawaf/coraza/v3/types/variables" | ||
) | ||
|
@@ -456,7 +457,12 @@ | |
var re *regexp.Regexp | ||
if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' { | ||
key = key[1 : len(key)-1] | ||
re = regexp.MustCompile(key) | ||
|
||
if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil { | ||
panic(err) | ||
} else { | ||
re = vare.(*regexp.Regexp) | ||
} | ||
} | ||
|
||
if multiphaseEvaluation { | ||
|
@@ -521,7 +527,11 @@ | |
var re *regexp.Regexp | ||
if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' { | ||
key = key[1 : len(key)-1] | ||
re = regexp.MustCompile(key) | ||
if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil { | ||
panic(err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should never panic, you can return error here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree, however this should be fixed in |
||
} else { | ||
re = vare.(*regexp.Regexp) | ||
} | ||
} | ||
// Prevent sigsev | ||
if r == nil { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Memoize | ||
|
||
Memoize allows to cache certain expensive function calls and | ||
cache the result. The main advantage in Coraza is to memoize | ||
the regexes when the connects spins up more than one WAF in | ||
the same process and hence same regexes are being compiled | ||
over and over. | ||
|
||
Currently it is opt-in under the `memoize_regex` build tag | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: what about also adding a one-line description in the main readme, under https://github.com/corazawaf/coraza#build-tags? |
||
as under a misuse it could lead to a memory leak as currently | ||
the cache is global. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
//go:build !tinygo && memoize_regex | ||
|
||
// https://github.com/kofalt/go-memoize/blob/master/memoize.go | ||
|
||
package memoize | ||
|
||
import ( | ||
"sync" | ||
|
||
"golang.org/x/sync/singleflight" | ||
) | ||
|
||
var doer = makeDoer(new(sync.Map), new(singleflight.Group)) | ||
|
||
// Do executes and returns the results of the given function, unless there was a cached | ||
// value of the same key. Only one execution is in-flight for a given key at a time. | ||
// The boolean return value indicates whether v was previously stored. | ||
func Do(key string, fn func() (interface{}, error)) (interface{}, error) { | ||
value, err, _ := doer(key, fn) | ||
return value, err | ||
} | ||
|
||
// makeDoer returns a function that executes and returns the results of the given function | ||
func makeDoer(cache *sync.Map, group *singleflight.Group) func(string, func() (interface{}, error)) (interface{}, error, bool) { | ||
return func(key string, fn func() (interface{}, error)) (interface{}, error, bool) { | ||
// Check cache | ||
value, found := cache.Load(key) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit, can combine two lines |
||
if found { | ||
return value, nil, true | ||
} | ||
|
||
// Combine memoized function with a cache store | ||
value, err, _ := group.Do(key, func() (interface{}, error) { | ||
data, innerErr := fn() | ||
if innerErr == nil { | ||
cache.Store(key, data) | ||
} | ||
|
||
return data, innerErr | ||
}) | ||
|
||
return value, err, false | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
//go:build !tinygo && memoize_regex | ||
|
||
// https://github.com/kofalt/go-memoize/blob/master/memoize.go | ||
|
||
package memoize | ||
|
||
import ( | ||
"errors" | ||
"sync" | ||
"testing" | ||
|
||
"golang.org/x/sync/singleflight" | ||
) | ||
|
||
func TestDo(t *testing.T) { | ||
expensiveCalls := 0 | ||
|
||
// Function tracks how many times its been called | ||
expensive := func() (interface{}, error) { | ||
expensiveCalls++ | ||
return expensiveCalls, nil | ||
} | ||
|
||
// First call SHOULD NOT be cached | ||
result, err := Do("key1", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 1, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
// Second call on same key SHOULD be cached | ||
result, err = Do("key1", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 1, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
// First call on a new key SHOULD NOT be cached | ||
result, err = Do("key2", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 2, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
} | ||
|
||
func TestSuccessCall(t *testing.T) { | ||
do := makeDoer(new(sync.Map), &singleflight.Group{}) | ||
|
||
expensiveCalls := 0 | ||
|
||
// Function tracks how many times its been called | ||
expensive := func() (interface{}, error) { | ||
expensiveCalls++ | ||
return expensiveCalls, nil | ||
} | ||
|
||
// First call SHOULD NOT be cached | ||
result, err, cached := do("key1", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 1, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := false, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
|
||
// Second call on same key SHOULD be cached | ||
result, err, cached = do("key1", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 1, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := true, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
|
||
// First call on a new key SHOULD NOT be cached | ||
result, err, cached = do("key2", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 2, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := false, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
} | ||
|
||
func TestFailedCall(t *testing.T) { | ||
do := makeDoer(new(sync.Map), &singleflight.Group{}) | ||
|
||
calls := 0 | ||
|
||
// This function will fail IFF it has not been called before. | ||
twoForTheMoney := func() (interface{}, error) { | ||
calls++ | ||
|
||
if calls == 1 { | ||
return calls, errors.New("Try again") | ||
} else { | ||
return calls, nil | ||
} | ||
} | ||
|
||
// First call should fail, and not be cached | ||
result, err, cached := do("key1", twoForTheMoney) | ||
if err == nil { | ||
t.Fatalf("expected error") | ||
} | ||
|
||
if want, have := 1, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := false, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
|
||
// Second call should succeed, and not be cached | ||
result, err, cached = do("key1", twoForTheMoney) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 2, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := false, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
|
||
// Third call should succeed, and be cached | ||
result, err, cached = do("key1", twoForTheMoney) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 2, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := true, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
//go:build tinygo || !memoize_regex | ||
|
||
package memoize | ||
|
||
func Do(_ string, fn func() (interface{}, error)) (interface{}, error) { | ||
return fn() | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It should be worth extracting a function for the two usages
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You mean for the regex and binaryregex?