-
-
Notifications
You must be signed in to change notification settings - Fork 234
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: adds memoize implementation for regexes and ahocorasick (#836)
* chore: adds memoize implementation for regexes. Currently we create and allocate memory for every regex we compile, however there are cases where you compile the same regex over and over e.g. corazawaf/coraza-caddy#76. Here we implement the memoize pattern to be able to reuse the regex and reduce the memory consumption. * docs: adds comments to code. * chore: simplify the memoize package by using sync.Map. * feat: extends memoize to ahocorasick and allow impl for tinygo but not synced as no concurrency. * tests: covers memoize_builders in tinygo. * chore: fixes nosync for tinygo. * docs: updates docs. --------- Co-authored-by: Juan Pablo Tosso <jptosso@gmail.com>
- Loading branch information
Showing
21 changed files
with
518 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,3 +47,6 @@ jobs: | |
- name: Tests | ||
run: tinygo test ./... | ||
|
||
- name: Tests memoize | ||
run: tinygo test -tags=memoize_builders ./... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Memoize | ||
|
||
Memoize allows to cache certain expensive function calls and | ||
cache the result. The main advantage in Coraza is to memoize | ||
the regexes and aho-corasick dictionaries when the connects | ||
spins up more than one WAF in the same process and hence same | ||
regexes are being compiled over and over. | ||
|
||
Currently it is opt-in under the `memoize_builders` build tag | ||
as under a misuse (e.g. using after build time) it could lead | ||
to a memory leak as currently the cache is global. | ||
|
||
**Important:** Connectors with *live reload* functionality (e.g. Caddy) | ||
could lead to memory leaks which might or might not be negligible in | ||
most of the cases as usually config changes in a WAF are about a few | ||
rules, this is old objects will be still alive in memory until the program | ||
stops. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
//go:build !memoize_builders | ||
|
||
package memoize | ||
|
||
func Do(_ string, fn func() (interface{}, error)) (interface{}, error) { | ||
return fn() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
//go:build tinygo && memoize_builders | ||
|
||
package memoize | ||
|
||
import "sync" | ||
|
||
var doer = makeDoer(new(sync.Map)) | ||
|
||
// Do executes and returns the results of the given function, unless there was a cached | ||
// value of the same key. Only one execution is in-flight for a given key at a time. | ||
// The boolean return value indicates whether v was previously stored. | ||
func Do(key string, fn func() (interface{}, error)) (interface{}, error) { | ||
value, err, _ := doer(key, fn) | ||
return value, err | ||
} | ||
|
||
// makeDoer returns a function that executes and returns the results of the given function | ||
func makeDoer(cache *sync.Map) func(string, func() (interface{}, error)) (interface{}, error, bool) { | ||
return func(key string, fn func() (interface{}, error)) (interface{}, error, bool) { | ||
// Check cache | ||
value, found := cache.Load(key) | ||
if found { | ||
return value, nil, true | ||
} | ||
|
||
data, err := fn() | ||
if err == nil { | ||
cache.Store(key, data) | ||
} | ||
|
||
return data, err, false | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
//go:build tinygo && memoize_builders | ||
|
||
// https://github.com/kofalt/go-memoize/blob/master/memoize.go | ||
|
||
package memoize | ||
|
||
import ( | ||
"errors" | ||
"sync" | ||
"testing" | ||
) | ||
|
||
func TestDo(t *testing.T) { | ||
expensiveCalls := 0 | ||
|
||
// Function tracks how many times its been called | ||
expensive := func() (interface{}, error) { | ||
expensiveCalls++ | ||
return expensiveCalls, nil | ||
} | ||
|
||
// First call SHOULD NOT be cached | ||
result, err := Do("key1", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 1, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
// Second call on same key SHOULD be cached | ||
result, err = Do("key1", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 1, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
// First call on a new key SHOULD NOT be cached | ||
result, err = Do("key2", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 2, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
} | ||
|
||
func TestSuccessCall(t *testing.T) { | ||
do := makeDoer(new(sync.Map)) | ||
|
||
expensiveCalls := 0 | ||
|
||
// Function tracks how many times its been called | ||
expensive := func() (interface{}, error) { | ||
expensiveCalls++ | ||
return expensiveCalls, nil | ||
} | ||
|
||
// First call SHOULD NOT be cached | ||
result, err, cached := do("key1", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 1, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := false, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
|
||
// Second call on same key SHOULD be cached | ||
result, err, cached = do("key1", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 1, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := true, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
|
||
// First call on a new key SHOULD NOT be cached | ||
result, err, cached = do("key2", expensive) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 2, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := false, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
} | ||
|
||
func TestFailedCall(t *testing.T) { | ||
do := makeDoer(new(sync.Map)) | ||
|
||
calls := 0 | ||
|
||
// This function will fail IFF it has not been called before. | ||
twoForTheMoney := func() (interface{}, error) { | ||
calls++ | ||
|
||
if calls == 1 { | ||
return calls, errors.New("Try again") | ||
} else { | ||
return calls, nil | ||
} | ||
} | ||
|
||
// First call should fail, and not be cached | ||
result, err, cached := do("key1", twoForTheMoney) | ||
if err == nil { | ||
t.Fatalf("expected error") | ||
} | ||
|
||
if want, have := 1, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := false, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
|
||
// Second call should succeed, and not be cached | ||
result, err, cached = do("key1", twoForTheMoney) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 2, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := false, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
|
||
// Third call should succeed, and be cached | ||
result, err, cached = do("key1", twoForTheMoney) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %s", err.Error()) | ||
} | ||
|
||
if want, have := 2, result.(int); want != have { | ||
t.Fatalf("unexpected value, want %d, have %d", want, have) | ||
} | ||
|
||
if want, have := true, cached; want != have { | ||
t.Fatalf("unexpected caching, want %t, have %t", want, have) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
//go:build !tinygo && memoize_builders | ||
|
||
// https://github.com/kofalt/go-memoize/blob/master/memoize.go | ||
|
||
package memoize | ||
|
||
import ( | ||
"sync" | ||
|
||
"golang.org/x/sync/singleflight" | ||
) | ||
|
||
var doer = makeDoer(new(sync.Map), new(singleflight.Group)) | ||
|
||
// Do executes and returns the results of the given function, unless there was a cached | ||
// value of the same key. Only one execution is in-flight for a given key at a time. | ||
// The boolean return value indicates whether v was previously stored. | ||
func Do(key string, fn func() (interface{}, error)) (interface{}, error) { | ||
value, err, _ := doer(key, fn) | ||
return value, err | ||
} | ||
|
||
// makeDoer returns a function that executes and returns the results of the given function | ||
func makeDoer(cache *sync.Map, group *singleflight.Group) func(string, func() (interface{}, error)) (interface{}, error, bool) { | ||
return func(key string, fn func() (interface{}, error)) (interface{}, error, bool) { | ||
// Check cache | ||
value, found := cache.Load(key) | ||
if found { | ||
return value, nil, true | ||
} | ||
|
||
// Combine memoized function with a cache store | ||
value, err, _ := group.Do(key, func() (interface{}, error) { | ||
data, innerErr := fn() | ||
if innerErr == nil { | ||
cache.Store(key, data) | ||
} | ||
|
||
return data, innerErr | ||
}) | ||
|
||
return value, err, false | ||
} | ||
} |
Oops, something went wrong.