From 275a932999584cce7b99e924833c5218358aa772 Mon Sep 17 00:00:00 2001 From: Annie Fu <16651409+anniefu@users.noreply.github.com> Date: Thu, 12 Mar 2020 12:29:57 -0700 Subject: [PATCH] Add filterset package that can be used to filter by string properties (#597) Add filterset helper package to internal/processor that can be used by processors to filter metrics and spans by string properties. import "github.com/open-telemetry/opentelemetry-collector/internal/processor/filterset" This adds two types of filtersets - regexp: filter strings using https://golang.org/pkg/regexp/ patterns - strict: filter strings using exact string matches Link to tracking Issue: First part of Filter Processor Proposal #560, though the package can be used by other processors too This can be used for the MatchSpan implementation and the upcoming MatchMetric function. Testing: Unit tests Documentation: None, no public surface changes yet --- go.mod | 1 + internal/processor/filterset/doc.go | 16 ++ internal/processor/filterset/filterset.go | 22 +++ .../processor/filterset/filterset_test.go | 17 ++ internal/processor/filterset/regexp/doc.go | 16 ++ .../processor/filterset/regexp/options.go | 31 +++ .../filterset/regexp/regexpfilterset_test.go | 187 ++++++++++++++++++ .../filterset/regexp/regexpfliterset.go | 94 +++++++++ internal/processor/filterset/strict/doc.go | 16 ++ .../processor/filterset/strict/options.go | 18 ++ .../filterset/strict/strictfilterset.go | 56 ++++++ .../filterset/strict/strictfilterset_test.go | 83 ++++++++ 12 files changed, 557 insertions(+) create mode 100644 internal/processor/filterset/doc.go create mode 100644 internal/processor/filterset/filterset.go create mode 100644 internal/processor/filterset/filterset_test.go create mode 100644 internal/processor/filterset/regexp/doc.go create mode 100644 internal/processor/filterset/regexp/options.go create mode 100644 internal/processor/filterset/regexp/regexpfilterset_test.go create mode 100644 internal/processor/filterset/regexp/regexpfliterset.go create mode 100644 internal/processor/filterset/strict/doc.go create mode 100644 internal/processor/filterset/strict/options.go create mode 100644 internal/processor/filterset/strict/strictfilterset.go create mode 100644 internal/processor/filterset/strict/strictfilterset_test.go diff --git a/go.mod b/go.mod index 8b3b8829a56..49b2b8970bc 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd // indirect github.com/go-kit/kit v0.9.0 github.com/gogo/googleapis v1.3.0 // indirect + github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6 github.com/golang/protobuf v1.3.2 github.com/golangci/golangci-lint v1.21.0 github.com/google/addlicense v0.0.0-20190510175307-22550fa7c1b0 diff --git a/internal/processor/filterset/doc.go b/internal/processor/filterset/doc.go new file mode 100644 index 00000000000..3e564451a35 --- /dev/null +++ b/internal/processor/filterset/doc.go @@ -0,0 +1,16 @@ +// Copyright 2020 OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package filterset provides an interface for matching strings against a set of string filters. +package filterset diff --git a/internal/processor/filterset/filterset.go b/internal/processor/filterset/filterset.go new file mode 100644 index 00000000000..7b8c45cef6c --- /dev/null +++ b/internal/processor/filterset/filterset.go @@ -0,0 +1,22 @@ +// Copyright 2020 OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filterset + +// FilterSet is an interface for matching strings against a set of filters. +type FilterSet interface { + // Matches returns true if the given string matches at least one + // of the filters encapsulated by the FilterSet. + Matches(string) bool +} diff --git a/internal/processor/filterset/filterset_test.go b/internal/processor/filterset/filterset_test.go new file mode 100644 index 00000000000..f0b135459d7 --- /dev/null +++ b/internal/processor/filterset/filterset_test.go @@ -0,0 +1,17 @@ +// Copyright 2020 OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Empty test file so the CI build does not fail when enforcing a test file in every directory. +// The filterset package only defines an interface so there's nothing to test. +package filterset diff --git a/internal/processor/filterset/regexp/doc.go b/internal/processor/filterset/regexp/doc.go new file mode 100644 index 00000000000..31520913676 --- /dev/null +++ b/internal/processor/filterset/regexp/doc.go @@ -0,0 +1,16 @@ +// Copyright 2020 OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package regexp provides an implementation to match strings against a set of regexp string filters. +package regexp diff --git a/internal/processor/filterset/regexp/options.go b/internal/processor/filterset/regexp/options.go new file mode 100644 index 00000000000..b78fb686f49 --- /dev/null +++ b/internal/processor/filterset/regexp/options.go @@ -0,0 +1,31 @@ +// Copyright 2020 OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package regexp + +import ( + "github.com/golang/groupcache/lru" +) + +// Option is the type for regexp filtering options that can be passed to NewRegexpFilterSet. +type Option func(*regexpFilterSet) + +// WithCache enables an LRU cache that stores the previous results of calls to Matches. +// The cache's max number of entries is set to maxNumEntries. Passing a value of 0 results in an unlimited cache size. +func WithCache(maxNumEntries int) Option { + return func(rfs *regexpFilterSet) { + rfs.cacheEnabled = true + rfs.cache = lru.New(maxNumEntries) + } +} diff --git a/internal/processor/filterset/regexp/regexpfilterset_test.go b/internal/processor/filterset/regexp/regexpfilterset_test.go new file mode 100644 index 00000000000..fb81818b128 --- /dev/null +++ b/internal/processor/filterset/regexp/regexpfilterset_test.go @@ -0,0 +1,187 @@ +// Copyright 2020 OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package regexp + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +var ( + validRegexpFilters = []string{ + "prefix/.*", + "prefix_.*", + ".*/suffix", + ".*_suffix", + ".*/contains/.*", + ".*_contains_.*", + "full/name/match", + "full_name_match", + } +) + +func TestNewRegexpFilterSet(t *testing.T) { + tests := []struct { + name string + filters []string + success bool + }{ + { + name: "validFilters", + filters: validRegexpFilters, + success: true, + }, { + name: "invalidFilter", + filters: []string{ + "exact_string_match", + "(a|b))", // invalid regex + }, + success: false, + }, { + name: "emptyFilter", + filters: []string{}, + success: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + fs, err := NewRegexpFilterSet(test.filters) + assert.Equal(t, test.success, fs != nil) + assert.Equal(t, test.success, err == nil) + + if err == nil { + // sanity call + fs.Matches("test") + } + }) + } +} + +func TestRegexpMatches(t *testing.T) { + fs, err := NewRegexpFilterSet(validRegexpFilters) + assert.NotNil(t, fs) + assert.Nil(t, err) + assert.False(t, fs.(*regexpFilterSet).cacheEnabled) + + matches := []string{ + "full/name/match", + "full_name_match", + "prefix/test/match", + "prefix_test_match", + "test/match/suffix", + "test_match_suffix", + "test/contains/match", + "test_contains_match", + } + + for _, m := range matches { + t.Run(m, func(t *testing.T) { + assert.True(t, fs.Matches(m)) + }) + } + + mismatches := []string{ + "not_exact_string_match", + "random", + "test/match/suffixwrong", + "wrongprefix/metric/one", + "c", + } + + for _, m := range mismatches { + t.Run(m, func(t *testing.T) { + assert.False(t, fs.Matches(m)) + }) + } +} + +func TestRegexpMatchesCaches(t *testing.T) { + // 0 means unlimited cache + fs, err := NewRegexpFilterSet(validRegexpFilters, WithCache(0)) + assert.NotNil(t, fs) + assert.Nil(t, err) + assert.True(t, fs.(*regexpFilterSet).cacheEnabled) + + matches := []string{ + "full/name/match", + "full_name_match", + "prefix/test/match", + "prefix_test_match", + "test/match/suffix", + "test_match_suffix", + "test/contains/match", + "test_contains_match", + } + + for _, m := range matches { + t.Run(m, func(t *testing.T) { + assert.True(t, fs.Matches(m)) + + matched, ok := fs.(*regexpFilterSet).cache.Get(m) + assert.True(t, matched.(bool) && ok) + }) + } + + mismatches := []string{ + "not_exact_string_match", + "wrongprefix/test/match", + "test/match/suffixwrong", + "not_exact_string_match", + } + + for _, m := range mismatches { + t.Run(m, func(t *testing.T) { + assert.False(t, fs.Matches(m)) + + matched, ok := fs.(*regexpFilterSet).cache.Get(m) + assert.True(t, !matched.(bool) && ok) + }) + } +} + +func TestWithCacheSize(t *testing.T) { + size := 3 + fs, err := NewRegexpFilterSet(validRegexpFilters, WithCache(size)) + assert.NotNil(t, fs) + assert.Nil(t, err) + + matches := []string{ + "prefix/test/match", + "prefix_test_match", + "test/match/suffix", + } + + // fill cache + for _, m := range matches { + fs.Matches(m) + _, ok := fs.(*regexpFilterSet).cache.Get(m) + assert.True(t, ok) + } + + // refresh oldest entry + fs.Matches(matches[0]) + + // cause LRU cache eviction + newest := "new" + fs.Matches(newest) + + _, evictedOk := fs.(*regexpFilterSet).cache.Get(matches[1]) + assert.False(t, evictedOk) + + _, newOk := fs.(*regexpFilterSet).cache.Get(newest) + assert.True(t, newOk) +} diff --git a/internal/processor/filterset/regexp/regexpfliterset.go b/internal/processor/filterset/regexp/regexpfliterset.go new file mode 100644 index 00000000000..ad7e5125c52 --- /dev/null +++ b/internal/processor/filterset/regexp/regexpfliterset.go @@ -0,0 +1,94 @@ +// Copyright 2020 OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package regexp + +import ( + "fmt" + "regexp" + + "github.com/golang/groupcache/lru" + + "github.com/open-telemetry/opentelemetry-collector/internal/processor/filterset" +) + +// regexpFilterSet encapsulates a set of filters and caches match results. +// Filters are re2 regex strings. +type regexpFilterSet struct { + regexes map[string]*regexp.Regexp + cacheEnabled bool + cache *lru.Cache +} + +// NewRegexpFilterSet constructs a FilterSet of re2 regex strings. +// If any of the given filters fail to compile into re2, an error is returned. +func NewRegexpFilterSet(filters []string, opts ...Option) (filterset.FilterSet, error) { + fs := ®expFilterSet{ + regexes: map[string]*regexp.Regexp{}, + } + + for _, o := range opts { + o(fs) + } + + if err := fs.addFilters(filters); err != nil { + return nil, err + } + + return fs, nil +} + +// Matches returns true if the given string matches any of the FilterSet's filters. +// The given string must be fully matched by at least one filter's re2 regex. +func (rfs *regexpFilterSet) Matches(toMatch string) bool { + if rfs.cacheEnabled { + if v, ok := rfs.cache.Get(toMatch); ok { + return v.(bool) + } + } + + for _, r := range rfs.regexes { + if r.MatchString(toMatch) { + if rfs.cacheEnabled { + rfs.cache.Add(toMatch, true) + } + return true + } + } + + if rfs.cacheEnabled { + rfs.cache.Add(toMatch, false) + } + return false +} + +// addFilters compiles all the given filters and stores them as regexes. +// All regexes are automatically anchored to enforce full string matches. +func (rfs *regexpFilterSet) addFilters(filters []string) error { + for _, f := range filters { + // anchor all regexes to enforce full matches + anchored := fmt.Sprintf("^%s$", f) + if _, ok := rfs.regexes[anchored]; ok { + continue + } + + re, err := regexp.Compile(anchored) + if err != nil { + return err + } + rfs.regexes[f] = re + } + + return nil +} diff --git a/internal/processor/filterset/strict/doc.go b/internal/processor/filterset/strict/doc.go new file mode 100644 index 00000000000..9bf6e77bbe6 --- /dev/null +++ b/internal/processor/filterset/strict/doc.go @@ -0,0 +1,16 @@ +// Copyright 2020 OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package strict provides an implementation to match strings against a set of exact match string filters. +package strict diff --git a/internal/processor/filterset/strict/options.go b/internal/processor/filterset/strict/options.go new file mode 100644 index 00000000000..67812c528b5 --- /dev/null +++ b/internal/processor/filterset/strict/options.go @@ -0,0 +1,18 @@ +// Copyright 2020 OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package strict + +// Option is the type for strict filtering options that can be passed to NewStrictFilterSet. +type Option func(*strictFilterSet) diff --git a/internal/processor/filterset/strict/strictfilterset.go b/internal/processor/filterset/strict/strictfilterset.go new file mode 100644 index 00000000000..5024f0f8b5c --- /dev/null +++ b/internal/processor/filterset/strict/strictfilterset.go @@ -0,0 +1,56 @@ +// Copyright 2020 OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package strict + +import ( + "github.com/open-telemetry/opentelemetry-collector/internal/processor/filterset" +) + +// strictFilterSet encapsulates a set of exact string match filters. +type strictFilterSet struct { + filters map[string]struct{} +} + +// NewStrictFilterSet constructs a FilterSet of exact string matches. +func NewStrictFilterSet(filters []string, opts ...Option) (filterset.FilterSet, error) { + fs := &strictFilterSet{ + filters: map[string]struct{}{}, + } + + for _, o := range opts { + o(fs) + } + + if err := fs.addFilters(filters); err != nil { + return nil, err + } + + return fs, nil +} + +// Matches returns true if the given string matches any of the FitlerSet's filters. +func (sfs *strictFilterSet) Matches(toMatch string) bool { + _, ok := sfs.filters[toMatch] + return ok +} + +// addFilters all the given filters. +func (sfs *strictFilterSet) addFilters(filters []string) error { + for _, f := range filters { + sfs.filters[f] = struct{}{} + } + + return nil +} diff --git a/internal/processor/filterset/strict/strictfilterset_test.go b/internal/processor/filterset/strict/strictfilterset_test.go new file mode 100644 index 00000000000..156cc6f0248 --- /dev/null +++ b/internal/processor/filterset/strict/strictfilterset_test.go @@ -0,0 +1,83 @@ +// Copyright 2020 OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package strict + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +var ( + validStrictFilters = []string{ + "exact_string_match", + ".*/suffix", + "(a|b)", + } +) + +func TestNewStrictFilterSet(t *testing.T) { + tests := []struct { + name string + filters []string + success bool + }{ + { + name: "validFilters", + filters: validStrictFilters, + success: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + fs, err := NewStrictFilterSet(test.filters) + assert.Equal(t, test.success, fs != nil) + assert.Equal(t, test.success, err == nil) + }) + } +} + +func TestStrictMatches(t *testing.T) { + fs, err := NewStrictFilterSet(validStrictFilters) + assert.NotNil(t, fs) + assert.Nil(t, err) + + matches := []string{ + "exact_string_match", + ".*/suffix", + "(a|b)", + } + + for _, m := range matches { + t.Run(m, func(t *testing.T) { + assert.True(t, fs.Matches(m)) + }) + } + + mismatches := []string{ + "not_exact_string_match", + "random", + "test/match/suffix", + "prefix/metric/one", + "c", + } + + for _, m := range mismatches { + t.Run(m, func(t *testing.T) { + assert.False(t, fs.Matches(m)) + }) + } +}