Skip to content

Commit

Permalink
Add filterset package that can be used to filter by string properties (
Browse files Browse the repository at this point in the history
…#597)

Add filterset helper package to internal/processor that can be used by processors to
filter metrics and spans by string properties.

  import "github.com/open-telemetry/opentelemetry-collector/internal/processor/filterset"

This adds two types of filtersets

- regexp: filter strings using https://golang.org/pkg/regexp/ patterns
- strict: filter strings using exact string matches

Link to tracking Issue: First part of Filter Processor Proposal #560, though the package
can be used by other processors too

This can be used for the MatchSpan implementation and the upcoming MatchMetric function.

Testing: Unit tests

Documentation: None, no public surface changes yet
  • Loading branch information
anniefu authored Mar 12, 2020
1 parent 4f385cd commit 275a932
Show file tree
Hide file tree
Showing 12 changed files with 557 additions and 0 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd // indirect
github.com/go-kit/kit v0.9.0
github.com/gogo/googleapis v1.3.0 // indirect
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6
github.com/golang/protobuf v1.3.2
github.com/golangci/golangci-lint v1.21.0
github.com/google/addlicense v0.0.0-20190510175307-22550fa7c1b0
Expand Down
16 changes: 16 additions & 0 deletions internal/processor/filterset/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2020 OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package filterset provides an interface for matching strings against a set of string filters.
package filterset
22 changes: 22 additions & 0 deletions internal/processor/filterset/filterset.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2020 OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package filterset

// FilterSet is an interface for matching strings against a set of filters.
type FilterSet interface {
// Matches returns true if the given string matches at least one
// of the filters encapsulated by the FilterSet.
Matches(string) bool
}
17 changes: 17 additions & 0 deletions internal/processor/filterset/filterset_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright 2020 OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Empty test file so the CI build does not fail when enforcing a test file in every directory.
// The filterset package only defines an interface so there's nothing to test.
package filterset
16 changes: 16 additions & 0 deletions internal/processor/filterset/regexp/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2020 OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package regexp provides an implementation to match strings against a set of regexp string filters.
package regexp
31 changes: 31 additions & 0 deletions internal/processor/filterset/regexp/options.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright 2020 OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package regexp

import (
"github.com/golang/groupcache/lru"
)

// Option is the type for regexp filtering options that can be passed to NewRegexpFilterSet.
type Option func(*regexpFilterSet)

// WithCache enables an LRU cache that stores the previous results of calls to Matches.
// The cache's max number of entries is set to maxNumEntries. Passing a value of 0 results in an unlimited cache size.
func WithCache(maxNumEntries int) Option {
return func(rfs *regexpFilterSet) {
rfs.cacheEnabled = true
rfs.cache = lru.New(maxNumEntries)
}
}
187 changes: 187 additions & 0 deletions internal/processor/filterset/regexp/regexpfilterset_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
// Copyright 2020 OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package regexp

import (
"testing"

"github.com/stretchr/testify/assert"
)

var (
validRegexpFilters = []string{
"prefix/.*",
"prefix_.*",
".*/suffix",
".*_suffix",
".*/contains/.*",
".*_contains_.*",
"full/name/match",
"full_name_match",
}
)

func TestNewRegexpFilterSet(t *testing.T) {
tests := []struct {
name string
filters []string
success bool
}{
{
name: "validFilters",
filters: validRegexpFilters,
success: true,
}, {
name: "invalidFilter",
filters: []string{
"exact_string_match",
"(a|b))", // invalid regex
},
success: false,
}, {
name: "emptyFilter",
filters: []string{},
success: true,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fs, err := NewRegexpFilterSet(test.filters)
assert.Equal(t, test.success, fs != nil)
assert.Equal(t, test.success, err == nil)

if err == nil {
// sanity call
fs.Matches("test")
}
})
}
}

func TestRegexpMatches(t *testing.T) {
fs, err := NewRegexpFilterSet(validRegexpFilters)
assert.NotNil(t, fs)
assert.Nil(t, err)
assert.False(t, fs.(*regexpFilterSet).cacheEnabled)

matches := []string{
"full/name/match",
"full_name_match",
"prefix/test/match",
"prefix_test_match",
"test/match/suffix",
"test_match_suffix",
"test/contains/match",
"test_contains_match",
}

for _, m := range matches {
t.Run(m, func(t *testing.T) {
assert.True(t, fs.Matches(m))
})
}

mismatches := []string{
"not_exact_string_match",
"random",
"test/match/suffixwrong",
"wrongprefix/metric/one",
"c",
}

for _, m := range mismatches {
t.Run(m, func(t *testing.T) {
assert.False(t, fs.Matches(m))
})
}
}

func TestRegexpMatchesCaches(t *testing.T) {
// 0 means unlimited cache
fs, err := NewRegexpFilterSet(validRegexpFilters, WithCache(0))
assert.NotNil(t, fs)
assert.Nil(t, err)
assert.True(t, fs.(*regexpFilterSet).cacheEnabled)

matches := []string{
"full/name/match",
"full_name_match",
"prefix/test/match",
"prefix_test_match",
"test/match/suffix",
"test_match_suffix",
"test/contains/match",
"test_contains_match",
}

for _, m := range matches {
t.Run(m, func(t *testing.T) {
assert.True(t, fs.Matches(m))

matched, ok := fs.(*regexpFilterSet).cache.Get(m)
assert.True(t, matched.(bool) && ok)
})
}

mismatches := []string{
"not_exact_string_match",
"wrongprefix/test/match",
"test/match/suffixwrong",
"not_exact_string_match",
}

for _, m := range mismatches {
t.Run(m, func(t *testing.T) {
assert.False(t, fs.Matches(m))

matched, ok := fs.(*regexpFilterSet).cache.Get(m)
assert.True(t, !matched.(bool) && ok)
})
}
}

func TestWithCacheSize(t *testing.T) {
size := 3
fs, err := NewRegexpFilterSet(validRegexpFilters, WithCache(size))
assert.NotNil(t, fs)
assert.Nil(t, err)

matches := []string{
"prefix/test/match",
"prefix_test_match",
"test/match/suffix",
}

// fill cache
for _, m := range matches {
fs.Matches(m)
_, ok := fs.(*regexpFilterSet).cache.Get(m)
assert.True(t, ok)
}

// refresh oldest entry
fs.Matches(matches[0])

// cause LRU cache eviction
newest := "new"
fs.Matches(newest)

_, evictedOk := fs.(*regexpFilterSet).cache.Get(matches[1])
assert.False(t, evictedOk)

_, newOk := fs.(*regexpFilterSet).cache.Get(newest)
assert.True(t, newOk)
}
94 changes: 94 additions & 0 deletions internal/processor/filterset/regexp/regexpfliterset.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Copyright 2020 OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package regexp

import (
"fmt"
"regexp"

"github.com/golang/groupcache/lru"

"github.com/open-telemetry/opentelemetry-collector/internal/processor/filterset"
)

// regexpFilterSet encapsulates a set of filters and caches match results.
// Filters are re2 regex strings.
type regexpFilterSet struct {
regexes map[string]*regexp.Regexp
cacheEnabled bool
cache *lru.Cache
}

// NewRegexpFilterSet constructs a FilterSet of re2 regex strings.
// If any of the given filters fail to compile into re2, an error is returned.
func NewRegexpFilterSet(filters []string, opts ...Option) (filterset.FilterSet, error) {
fs := &regexpFilterSet{
regexes: map[string]*regexp.Regexp{},
}

for _, o := range opts {
o(fs)
}

if err := fs.addFilters(filters); err != nil {
return nil, err
}

return fs, nil
}

// Matches returns true if the given string matches any of the FilterSet's filters.
// The given string must be fully matched by at least one filter's re2 regex.
func (rfs *regexpFilterSet) Matches(toMatch string) bool {
if rfs.cacheEnabled {
if v, ok := rfs.cache.Get(toMatch); ok {
return v.(bool)
}
}

for _, r := range rfs.regexes {
if r.MatchString(toMatch) {
if rfs.cacheEnabled {
rfs.cache.Add(toMatch, true)
}
return true
}
}

if rfs.cacheEnabled {
rfs.cache.Add(toMatch, false)
}
return false
}

// addFilters compiles all the given filters and stores them as regexes.
// All regexes are automatically anchored to enforce full string matches.
func (rfs *regexpFilterSet) addFilters(filters []string) error {
for _, f := range filters {
// anchor all regexes to enforce full matches
anchored := fmt.Sprintf("^%s$", f)
if _, ok := rfs.regexes[anchored]; ok {
continue
}

re, err := regexp.Compile(anchored)
if err != nil {
return err
}
rfs.regexes[f] = re
}

return nil
}
Loading

0 comments on commit 275a932

Please sign in to comment.