-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add filterset package that can be used to filter by string properties (…
…#597) Add filterset helper package to internal/processor that can be used by processors to filter metrics and spans by string properties. import "github.com/open-telemetry/opentelemetry-collector/internal/processor/filterset" This adds two types of filtersets - regexp: filter strings using https://golang.org/pkg/regexp/ patterns - strict: filter strings using exact string matches Link to tracking Issue: First part of Filter Processor Proposal #560, though the package can be used by other processors too This can be used for the MatchSpan implementation and the upcoming MatchMetric function. Testing: Unit tests Documentation: None, no public surface changes yet
- Loading branch information
Showing
12 changed files
with
557 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// Copyright 2020 OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
// Package filterset provides an interface for matching strings against a set of string filters. | ||
package filterset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
// Copyright 2020 OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package filterset | ||
|
||
// FilterSet is an interface for matching strings against a set of filters. | ||
type FilterSet interface { | ||
// Matches returns true if the given string matches at least one | ||
// of the filters encapsulated by the FilterSet. | ||
Matches(string) bool | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
// Copyright 2020 OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
// Empty test file so the CI build does not fail when enforcing a test file in every directory. | ||
// The filterset package only defines an interface so there's nothing to test. | ||
package filterset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// Copyright 2020 OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
// Package regexp provides an implementation to match strings against a set of regexp string filters. | ||
package regexp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
// Copyright 2020 OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package regexp | ||
|
||
import ( | ||
"github.com/golang/groupcache/lru" | ||
) | ||
|
||
// Option is the type for regexp filtering options that can be passed to NewRegexpFilterSet. | ||
type Option func(*regexpFilterSet) | ||
|
||
// WithCache enables an LRU cache that stores the previous results of calls to Matches. | ||
// The cache's max number of entries is set to maxNumEntries. Passing a value of 0 results in an unlimited cache size. | ||
func WithCache(maxNumEntries int) Option { | ||
return func(rfs *regexpFilterSet) { | ||
rfs.cacheEnabled = true | ||
rfs.cache = lru.New(maxNumEntries) | ||
} | ||
} |
187 changes: 187 additions & 0 deletions
187
internal/processor/filterset/regexp/regexpfilterset_test.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
// Copyright 2020 OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package regexp | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
var ( | ||
validRegexpFilters = []string{ | ||
"prefix/.*", | ||
"prefix_.*", | ||
".*/suffix", | ||
".*_suffix", | ||
".*/contains/.*", | ||
".*_contains_.*", | ||
"full/name/match", | ||
"full_name_match", | ||
} | ||
) | ||
|
||
func TestNewRegexpFilterSet(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
filters []string | ||
success bool | ||
}{ | ||
{ | ||
name: "validFilters", | ||
filters: validRegexpFilters, | ||
success: true, | ||
}, { | ||
name: "invalidFilter", | ||
filters: []string{ | ||
"exact_string_match", | ||
"(a|b))", // invalid regex | ||
}, | ||
success: false, | ||
}, { | ||
name: "emptyFilter", | ||
filters: []string{}, | ||
success: true, | ||
}, | ||
} | ||
|
||
for _, test := range tests { | ||
t.Run(test.name, func(t *testing.T) { | ||
fs, err := NewRegexpFilterSet(test.filters) | ||
assert.Equal(t, test.success, fs != nil) | ||
assert.Equal(t, test.success, err == nil) | ||
|
||
if err == nil { | ||
// sanity call | ||
fs.Matches("test") | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func TestRegexpMatches(t *testing.T) { | ||
fs, err := NewRegexpFilterSet(validRegexpFilters) | ||
assert.NotNil(t, fs) | ||
assert.Nil(t, err) | ||
assert.False(t, fs.(*regexpFilterSet).cacheEnabled) | ||
|
||
matches := []string{ | ||
"full/name/match", | ||
"full_name_match", | ||
"prefix/test/match", | ||
"prefix_test_match", | ||
"test/match/suffix", | ||
"test_match_suffix", | ||
"test/contains/match", | ||
"test_contains_match", | ||
} | ||
|
||
for _, m := range matches { | ||
t.Run(m, func(t *testing.T) { | ||
assert.True(t, fs.Matches(m)) | ||
}) | ||
} | ||
|
||
mismatches := []string{ | ||
"not_exact_string_match", | ||
"random", | ||
"test/match/suffixwrong", | ||
"wrongprefix/metric/one", | ||
"c", | ||
} | ||
|
||
for _, m := range mismatches { | ||
t.Run(m, func(t *testing.T) { | ||
assert.False(t, fs.Matches(m)) | ||
}) | ||
} | ||
} | ||
|
||
func TestRegexpMatchesCaches(t *testing.T) { | ||
// 0 means unlimited cache | ||
fs, err := NewRegexpFilterSet(validRegexpFilters, WithCache(0)) | ||
assert.NotNil(t, fs) | ||
assert.Nil(t, err) | ||
assert.True(t, fs.(*regexpFilterSet).cacheEnabled) | ||
|
||
matches := []string{ | ||
"full/name/match", | ||
"full_name_match", | ||
"prefix/test/match", | ||
"prefix_test_match", | ||
"test/match/suffix", | ||
"test_match_suffix", | ||
"test/contains/match", | ||
"test_contains_match", | ||
} | ||
|
||
for _, m := range matches { | ||
t.Run(m, func(t *testing.T) { | ||
assert.True(t, fs.Matches(m)) | ||
|
||
matched, ok := fs.(*regexpFilterSet).cache.Get(m) | ||
assert.True(t, matched.(bool) && ok) | ||
}) | ||
} | ||
|
||
mismatches := []string{ | ||
"not_exact_string_match", | ||
"wrongprefix/test/match", | ||
"test/match/suffixwrong", | ||
"not_exact_string_match", | ||
} | ||
|
||
for _, m := range mismatches { | ||
t.Run(m, func(t *testing.T) { | ||
assert.False(t, fs.Matches(m)) | ||
|
||
matched, ok := fs.(*regexpFilterSet).cache.Get(m) | ||
assert.True(t, !matched.(bool) && ok) | ||
}) | ||
} | ||
} | ||
|
||
func TestWithCacheSize(t *testing.T) { | ||
size := 3 | ||
fs, err := NewRegexpFilterSet(validRegexpFilters, WithCache(size)) | ||
assert.NotNil(t, fs) | ||
assert.Nil(t, err) | ||
|
||
matches := []string{ | ||
"prefix/test/match", | ||
"prefix_test_match", | ||
"test/match/suffix", | ||
} | ||
|
||
// fill cache | ||
for _, m := range matches { | ||
fs.Matches(m) | ||
_, ok := fs.(*regexpFilterSet).cache.Get(m) | ||
assert.True(t, ok) | ||
} | ||
|
||
// refresh oldest entry | ||
fs.Matches(matches[0]) | ||
|
||
// cause LRU cache eviction | ||
newest := "new" | ||
fs.Matches(newest) | ||
|
||
_, evictedOk := fs.(*regexpFilterSet).cache.Get(matches[1]) | ||
assert.False(t, evictedOk) | ||
|
||
_, newOk := fs.(*regexpFilterSet).cache.Get(newest) | ||
assert.True(t, newOk) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
// Copyright 2020 OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package regexp | ||
|
||
import ( | ||
"fmt" | ||
"regexp" | ||
|
||
"github.com/golang/groupcache/lru" | ||
|
||
"github.com/open-telemetry/opentelemetry-collector/internal/processor/filterset" | ||
) | ||
|
||
// regexpFilterSet encapsulates a set of filters and caches match results. | ||
// Filters are re2 regex strings. | ||
type regexpFilterSet struct { | ||
regexes map[string]*regexp.Regexp | ||
cacheEnabled bool | ||
cache *lru.Cache | ||
} | ||
|
||
// NewRegexpFilterSet constructs a FilterSet of re2 regex strings. | ||
// If any of the given filters fail to compile into re2, an error is returned. | ||
func NewRegexpFilterSet(filters []string, opts ...Option) (filterset.FilterSet, error) { | ||
fs := ®expFilterSet{ | ||
regexes: map[string]*regexp.Regexp{}, | ||
} | ||
|
||
for _, o := range opts { | ||
o(fs) | ||
} | ||
|
||
if err := fs.addFilters(filters); err != nil { | ||
return nil, err | ||
} | ||
|
||
return fs, nil | ||
} | ||
|
||
// Matches returns true if the given string matches any of the FilterSet's filters. | ||
// The given string must be fully matched by at least one filter's re2 regex. | ||
func (rfs *regexpFilterSet) Matches(toMatch string) bool { | ||
if rfs.cacheEnabled { | ||
if v, ok := rfs.cache.Get(toMatch); ok { | ||
return v.(bool) | ||
} | ||
} | ||
|
||
for _, r := range rfs.regexes { | ||
if r.MatchString(toMatch) { | ||
if rfs.cacheEnabled { | ||
rfs.cache.Add(toMatch, true) | ||
} | ||
return true | ||
} | ||
} | ||
|
||
if rfs.cacheEnabled { | ||
rfs.cache.Add(toMatch, false) | ||
} | ||
return false | ||
} | ||
|
||
// addFilters compiles all the given filters and stores them as regexes. | ||
// All regexes are automatically anchored to enforce full string matches. | ||
func (rfs *regexpFilterSet) addFilters(filters []string) error { | ||
for _, f := range filters { | ||
// anchor all regexes to enforce full matches | ||
anchored := fmt.Sprintf("^%s$", f) | ||
if _, ok := rfs.regexes[anchored]; ok { | ||
continue | ||
} | ||
|
||
re, err := regexp.Compile(anchored) | ||
if err != nil { | ||
return err | ||
} | ||
rfs.regexes[f] = re | ||
} | ||
|
||
return nil | ||
} |
Oops, something went wrong.