From 53121ab62851924d88bdd9497173498cb9e6bea7 Mon Sep 17 00:00:00 2001 From: Dan Jaglowski Date: Wed, 2 Aug 2023 11:08:45 -0400 Subject: [PATCH] [chore][fileconsumer] Move file finding and filtering into subpackage --- .chloggen/pkg-stanza-fileconsumer-sort.yaml | 27 + pkg/stanza/fileconsumer/config.go | 41 +- pkg/stanza/fileconsumer/config_test.go | 81 ++- pkg/stanza/fileconsumer/file.go | 13 +- pkg/stanza/fileconsumer/file_sort.go | 216 ------- pkg/stanza/fileconsumer/file_test.go | 30 +- pkg/stanza/fileconsumer/finder.go | 71 +-- pkg/stanza/fileconsumer/finder_test.go | 449 -------------- .../matcher/internal/filter/filter.go | 54 ++ .../matcher/internal/filter/filter_test.go | 105 ++++ .../matcher/internal/filter/item.go | 36 ++ .../matcher/internal/filter/item_test.go | 61 ++ .../matcher/internal/filter/sort.go | 124 ++++ .../matcher/internal/filter/sort_test.go | 151 +++++ .../{ => matcher}/internal/finder/finder.go | 14 +- .../internal/finder/finder_test.go | 46 +- pkg/stanza/fileconsumer/matcher/matcher.go | 130 ++++ .../fileconsumer/matcher/matcher_test.go | 563 ++++++++++++++++++ receiver/otlpjsonfilereceiver/file_test.go | 3 +- 19 files changed, 1385 insertions(+), 830 deletions(-) create mode 100755 .chloggen/pkg-stanza-fileconsumer-sort.yaml delete mode 100644 pkg/stanza/fileconsumer/file_sort.go delete mode 100644 pkg/stanza/fileconsumer/finder_test.go create mode 100644 pkg/stanza/fileconsumer/matcher/internal/filter/filter.go create mode 100644 pkg/stanza/fileconsumer/matcher/internal/filter/filter_test.go create mode 100644 pkg/stanza/fileconsumer/matcher/internal/filter/item.go create mode 100644 pkg/stanza/fileconsumer/matcher/internal/filter/item_test.go create mode 100644 pkg/stanza/fileconsumer/matcher/internal/filter/sort.go create mode 100644 pkg/stanza/fileconsumer/matcher/internal/filter/sort_test.go rename pkg/stanza/fileconsumer/{ => matcher}/internal/finder/finder.go (75%) rename pkg/stanza/fileconsumer/{ => matcher}/internal/finder/finder_test.go (82%) create mode 100644 pkg/stanza/fileconsumer/matcher/matcher.go create mode 100644 pkg/stanza/fileconsumer/matcher/matcher_test.go diff --git a/.chloggen/pkg-stanza-fileconsumer-sort.yaml b/.chloggen/pkg-stanza-fileconsumer-sort.yaml new file mode 100755 index 000000000000..9796594b4b7f --- /dev/null +++ b/.chloggen/pkg-stanza-fileconsumer-sort.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: deprecation + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/stanza + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Deprecate fileconsumer MatchingCriteria in favor of new matcher package + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [24853] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [api] diff --git a/pkg/stanza/fileconsumer/config.go b/pkg/stanza/fileconsumer/config.go index 206e7e7c16fd..cf02b131aa64 100644 --- a/pkg/stanza/fileconsumer/config.go +++ b/pkg/stanza/fileconsumer/config.go @@ -9,13 +9,13 @@ import ( "fmt" "time" - "github.com/bmatcuk/doublestar/v4" "go.opentelemetry.io/collector/featuregate" "go.uber.org/zap" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/emit" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/internal/fingerprint" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/internal/header" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/matcher" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" ) @@ -58,7 +58,7 @@ func NewConfig() *Config { // Config is the configuration of a file input operator type Config struct { - MatchingCriteria `mapstructure:",squash"` + matcher.Criteria `mapstructure:",squash"` IncludeFileName bool `mapstructure:"include_file_name,omitempty"` IncludeFilePath bool `mapstructure:"include_file_path,omitempty"` IncludeFileNameResolved bool `mapstructure:"include_file_name_resolved,omitempty"` @@ -140,6 +140,11 @@ func (c Config) buildManager(logger *zap.SugaredLogger, emit emit.Callback, fact } } + fileMatcher, err := matcher.New(c.Criteria) + if err != nil { + return nil, err + } + return &Manager{ SugaredLogger: logger.With("component", "fileconsumer"), cancel: func() {}, @@ -159,7 +164,7 @@ func (c Config) buildManager(logger *zap.SugaredLogger, emit emit.Callback, fact encodingConfig: c.Splitter.EncodingConfig, headerConfig: hCfg, }, - finder: c.MatchingCriteria, + fileMatcher: fileMatcher, roller: newRoller(), pollInterval: c.PollInterval, maxBatchFiles: c.MaxConcurrentFiles / 2, @@ -179,34 +184,8 @@ func (c Config) validate() error { return fmt.Errorf("`header` requires feature gate `%s`", AllowHeaderMetadataParsing.ID()) } - if len(c.Include) == 0 { - return fmt.Errorf("required argument `include` is empty") - } - - // Ensure includes can be parsed as globs - for _, include := range c.Include { - _, err := doublestar.PathMatch(include, "matchstring") - if err != nil { - return fmt.Errorf("parse include glob: %w", err) - } - } - - // Ensure excludes can be parsed as globs - for _, exclude := range c.Exclude { - _, err := doublestar.PathMatch(exclude, "matchstring") - if err != nil { - return fmt.Errorf("parse exclude glob: %w", err) - } - } - - if len(c.OrderingCriteria.SortBy) != 0 && c.OrderingCriteria.Regex == "" { - return fmt.Errorf("`regex` must be specified when `sort_by` is specified") - } - - for _, sr := range c.OrderingCriteria.SortBy { - if err := sr.validate(); err != nil { - return err - } + if _, err := matcher.New(c.Criteria); err != nil { + return err } if c.MaxLogSize <= 0 { diff --git a/pkg/stanza/fileconsumer/config_test.go b/pkg/stanza/fileconsumer/config_test.go index 99762e15f43a..2c54ba1ad0c1 100644 --- a/pkg/stanza/fileconsumer/config_test.go +++ b/pkg/stanza/fileconsumer/config_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/featuregate" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/matcher" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/operatortest" @@ -163,17 +164,15 @@ func TestUnmarshal(t *testing.T) { Name: "sort_by_timestamp", Expect: func() *mockOperatorConfig { cfg := NewConfig() - cfg.OrderingCriteria.Regex = `err\.[a-zA-Z]\.\d+\.(?P\d{10})\.log` - cfg.OrderingCriteria.SortBy = []sortRuleImpl{ - { - &TimestampSortRule{ - baseSortRule: baseSortRule{ - SortType: sortTypeTimestamp, - RegexKey: "rotation_time", - Ascending: true, - }, - Location: "utc", - Layout: `%Y%m%d%H`, + cfg.OrderingCriteria = matcher.OrderingCriteria{ + Regex: `err\.[a-zA-Z]\.\d+\.(?P\d{10})\.log`, + SortBy: []matcher.Sort{ + { + SortType: "timestamp", + RegexKey: "rotation_time", + Ascending: true, + Location: "utc", + Layout: `%Y%m%d%H`, }, }, } @@ -184,14 +183,12 @@ func TestUnmarshal(t *testing.T) { Name: "sort_by_numeric", Expect: func() *mockOperatorConfig { cfg := NewConfig() - cfg.OrderingCriteria.Regex = `err\.(?P[a-zA-Z])\.\d+\.\d{10}\.log` - cfg.OrderingCriteria.SortBy = []sortRuleImpl{ - { - &NumericSortRule{ - baseSortRule: baseSortRule{ - SortType: sortTypeNumeric, - RegexKey: "file_num", - }, + cfg.OrderingCriteria = matcher.OrderingCriteria{ + Regex: `err\.(?P[a-zA-Z])\.\d+\.\d{10}\.log`, + SortBy: []matcher.Sort{ + { + SortType: "numeric", + RegexKey: "file_num", }, }, } @@ -432,7 +429,6 @@ func TestBuild(t *testing.T) { func(f *Config) {}, require.NoError, func(t *testing.T, f *Manager) { - require.Equal(t, f.finder.Include, []string{"/var/log/testpath.*"}) require.Equal(t, f.pollInterval, 10*time.Millisecond) }, }, @@ -575,13 +571,11 @@ func TestBuild(t *testing.T) { { "BadOrderingCriteriaRegex", func(f *Config) { - f.OrderingCriteria.SortBy = []sortRuleImpl{ - { - &NumericSortRule{ - baseSortRule: baseSortRule{ - RegexKey: "value", - SortType: sortTypeNumeric, - }, + f.OrderingCriteria = matcher.OrderingCriteria{ + SortBy: []matcher.Sort{ + { + SortType: "numeric", + RegexKey: "value", }, }, } @@ -590,16 +584,14 @@ func TestBuild(t *testing.T) { nil, }, { - "BasicOrderingCriteriaTimetsamp", + "OrderingCriteriaTimestampMissingLayout", func(f *Config) { - f.OrderingCriteria.Regex = ".*" - f.OrderingCriteria.SortBy = []sortRuleImpl{ - { - &TimestampSortRule{ - baseSortRule: baseSortRule{ - RegexKey: "value", - SortType: sortTypeTimestamp, - }, + f.OrderingCriteria = matcher.OrderingCriteria{ + Regex: ".*", + SortBy: []matcher.Sort{ + { + SortType: "timestamp", + RegexKey: "value", }, }, } @@ -610,15 +602,13 @@ func TestBuild(t *testing.T) { { "GoodOrderingCriteriaTimestamp", func(f *Config) { - f.OrderingCriteria.Regex = ".*" - f.OrderingCriteria.SortBy = []sortRuleImpl{ - { - &TimestampSortRule{ - baseSortRule: baseSortRule{ - RegexKey: "value", - SortType: sortTypeTimestamp, - }, - Layout: "%Y%m%d%H", + f.OrderingCriteria = matcher.OrderingCriteria{ + Regex: ".*", + SortBy: []matcher.Sort{ + { + SortType: "timestamp", + RegexKey: "value", + Layout: "%Y%m%d%H", }, }, } @@ -668,7 +658,6 @@ func TestBuildWithSplitFunc(t *testing.T) { func(f *Config) {}, require.NoError, func(t *testing.T, f *Manager) { - require.Equal(t, f.finder.Include, []string{"/var/log/testpath.*"}) require.Equal(t, f.pollInterval, 10*time.Millisecond) }, }, diff --git a/pkg/stanza/fileconsumer/file.go b/pkg/stanza/fileconsumer/file.go index 50bc0ee40546..d87546a2b5c4 100644 --- a/pkg/stanza/fileconsumer/file.go +++ b/pkg/stanza/fileconsumer/file.go @@ -15,6 +15,7 @@ import ( "go.uber.org/zap" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/internal/fingerprint" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/matcher" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" ) @@ -31,7 +32,7 @@ type Manager struct { cancel context.CancelFunc readerFactory readerFactory - finder MatchingCriteria + fileMatcher *matcher.Matcher roller roller persister operator.Persister @@ -56,12 +57,8 @@ func (m *Manager) Start(persister operator.Persister) error { return fmt.Errorf("read known files from database: %w", err) } - if files, err := m.finder.findFiles(); err != nil { - m.Warnw("error occurred while finding files", "error", err.Error()) - } else if len(files) == 0 { - m.Warnw("no files match the configured include patterns", - "include", m.finder.Include, - "exclude", m.finder.Exclude) + if _, err := m.fileMatcher.MatchFiles(); err != nil { + m.Warnw("finding files", "error", err.Error()) } // Start polling goroutine @@ -116,7 +113,7 @@ func (m *Manager) poll(ctx context.Context) { batchesProcessed := 0 // Get the list of paths on disk - matches, err := m.finder.findFiles() + matches, err := m.fileMatcher.MatchFiles() if err != nil { m.Errorf("error finding files: %s", err) } diff --git a/pkg/stanza/fileconsumer/file_sort.go b/pkg/stanza/fileconsumer/file_sort.go deleted file mode 100644 index 2c71b9af49c7..000000000000 --- a/pkg/stanza/fileconsumer/file_sort.go +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package fileconsumer // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer" - -import ( - "fmt" - "regexp" - "sort" - "strconv" - - "go.opentelemetry.io/collector/confmap" - "go.uber.org/multierr" - - "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/timeutils" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/errors" -) - -const ( - sortTypeNumeric = "numeric" - sortTypeTimestamp = "timestamp" - sortTypeAlphabetical = "alphabetical" -) - -type sortRule interface { - validate() error - sort(re *regexp.Regexp, files []string) ([]string, error) -} - -func (sr *sortRuleImpl) Unmarshal(component *confmap.Conf) error { - if !component.IsSet("sort_type") { - return fmt.Errorf("missing required field 'sort_type'") - } - typeInterface := component.Get("sort_type") - - typeString, ok := typeInterface.(string) - if !ok { - return fmt.Errorf("non-string type %T for field 'sort_type'", typeInterface) - } - - switch typeString { - case sortTypeNumeric: - var numericSortRule *NumericSortRule - err := component.Unmarshal(&numericSortRule, confmap.WithErrorUnused()) - if err != nil { - return err - } - sr.sortRule = numericSortRule - case sortTypeAlphabetical: - var alphabeticalSortRule *AlphabeticalSortRule - err := component.Unmarshal(&alphabeticalSortRule, confmap.WithErrorUnused()) - if err != nil { - return err - } - sr.sortRule = alphabeticalSortRule - case sortTypeTimestamp: - var timestampSortRule *TimestampSortRule - err := component.Unmarshal(×tampSortRule, confmap.WithErrorUnused()) - if err != nil { - return err - } - sr.sortRule = timestampSortRule - default: - return fmt.Errorf("invalid sort type %s", typeString) - } - - return nil -} - -func (f NumericSortRule) validate() error { - if f.RegexKey == "" { - return fmt.Errorf("regex key must be specified for numeric sort") - } - return nil -} - -func (f *AlphabeticalSortRule) validate() error { - if f.RegexKey == "" { - return fmt.Errorf("regex key must be specified for alphabetical sort") - } - return nil -} - -func (f *TimestampSortRule) validate() error { - if f.RegexKey == "" { - return fmt.Errorf("regex key must be specified for timestamp sort") - } - if f.Layout == "" { - return fmt.Errorf("format must be specified for timestamp sort") - } - - if f.Location == "" { - f.Location = "UTC" - } - - _, err := timeutils.StrptimeToGotime(f.Layout) - if err != nil { - return errors.Wrap(err, "parse strptime layout") - } - - return nil -} - -func (f *NumericSortRule) sort(re *regexp.Regexp, files []string) ([]string, error) { - var errs error - sort.Slice(files, func(i, j int) bool { - valI, valJ, err := extractValues(re, f.RegexKey, files[i], files[j]) - if err != nil { - errs = multierr.Append(errs, err) - return false - } - - numI, err := strconv.Atoi(valI) - if err != nil { - errs = multierr.Append(errs, fmt.Errorf("parse %s to int: %w", valI, err)) - return false - } - - numJ, err := strconv.Atoi(valJ) - if err != nil { - errs = multierr.Append(errs, fmt.Errorf("parse %s to int: %w", valJ, err)) - return false - } - - if f.Ascending { - return numI < numJ - } - return numI > numJ - }) - - return files, errs -} - -func (f *TimestampSortRule) sort(re *regexp.Regexp, files []string) ([]string, error) { - // apply regex to each file and sort the results - location, err := timeutils.GetLocation(&f.Location, nil) - if err != nil { - return files, fmt.Errorf("load location %s: %w", f.Location, err) - } - - var errs error - - sort.Slice(files, func(i, j int) bool { - valI, valJ, err := extractValues(re, f.RegexKey, files[i], files[j]) - if err != nil { - errs = multierr.Append(errs, err) - return false - } - - timeI, err := timeutils.ParseStrptime(f.Layout, valI, location) - if err != nil { - errs = multierr.Append(errs, fmt.Errorf("parse %s to Time: %w", timeI, err)) - return false - } - - timeJ, err := timeutils.ParseStrptime(f.Layout, valJ, location) - if err != nil { - errs = multierr.Append(errs, fmt.Errorf("parse %s to Time: %w", timeI, err)) - return false - } - - // if ascending, return true if timeI is before timeJ - if f.Ascending { - return timeI.Before(timeJ) - } - return timeI.After(timeJ) - }) - - return files, errs -} - -func (f *AlphabeticalSortRule) sort(re *regexp.Regexp, files []string) ([]string, error) { - var errs error - sort.Slice(files, func(i, j int) bool { - valI, valJ, err := extractValues(re, f.RegexKey, files[i], files[j]) - if err != nil { - errs = multierr.Append(errs, err) - return false - } - - if f.Ascending { - return valI < valJ - } - return valI > valJ - }) - - return files, errs -} - -func extractValues(re *regexp.Regexp, reKey, file1, file2 string) (string, string, error) { - valI := extractValue(re, reKey, file1) - if valI == "" { - return "", "", fmt.Errorf("find capture group %q in regex for file: %s", reKey, file1) - } - valJ := extractValue(re, reKey, file2) - if valJ == "" { - return "", "", fmt.Errorf("find capture group %q in regex for file: %s", reKey, file2) - } - - return valI, valJ, nil -} - -func extractValue(re *regexp.Regexp, reKey, input string) string { - match := re.FindStringSubmatch(input) - if match == nil { - return "" - } - - for i, name := range re.SubexpNames() { - if name == reKey && i < len(match) { - return match[i] - } - } - - return "" -} diff --git a/pkg/stanza/fileconsumer/file_test.go b/pkg/stanza/fileconsumer/file_test.go index 36b6c0a7f56f..d04d4e64b413 100644 --- a/pkg/stanza/fileconsumer/file_test.go +++ b/pkg/stanza/fileconsumer/file_test.go @@ -21,6 +21,7 @@ import ( "go.uber.org/zap/zapcore" "go.uber.org/zap/zaptest/observer" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/matcher" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" ) @@ -730,13 +731,12 @@ func TestMultiFileSort(t *testing.T) { tempDir := t.TempDir() cfg := NewConfig().includeDir(tempDir) cfg.StartAt = "beginning" - cfg.MatchingCriteria.OrderingCriteria.Regex = `.*(?P\d)` - cfg.MatchingCriteria.OrderingCriteria.SortBy = []sortRuleImpl{ - { - &NumericSortRule{ - baseSortRule: baseSortRule{ - RegexKey: `value`, - }, + cfg.OrderingCriteria = matcher.OrderingCriteria{ + Regex: `.*(?P\d)`, + SortBy: []matcher.Sort{ + { + SortType: "numeric", + RegexKey: "value", }, }, } @@ -764,15 +764,13 @@ func TestMultiFileSortTimestamp(t *testing.T) { tempDir := t.TempDir() cfg := NewConfig().includeDir(tempDir) cfg.StartAt = "beginning" - cfg.MatchingCriteria.OrderingCriteria.Regex = `.(?P\d{10})\.log` - cfg.MatchingCriteria.OrderingCriteria.SortBy = []sortRuleImpl{ - { - &TimestampSortRule{ - baseSortRule: baseSortRule{ - RegexKey: `value`, - SortType: "timestamp", - }, - Layout: "%Y%m%d%H", + cfg.OrderingCriteria = matcher.OrderingCriteria{ + Regex: `.(?P\d{10})\.log`, + SortBy: []matcher.Sort{ + { + SortType: "timestamp", + RegexKey: `value`, + Layout: "%Y%m%d%H", }, }, } diff --git a/pkg/stanza/fileconsumer/finder.go b/pkg/stanza/fileconsumer/finder.go index 19bf315830fd..d04942a22b91 100644 --- a/pkg/stanza/fileconsumer/finder.go +++ b/pkg/stanza/fileconsumer/finder.go @@ -3,68 +3,19 @@ package fileconsumer // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer" -import ( - "regexp" +import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/matcher" - "go.uber.org/multierr" +// Deprecated: [v0.83.0] Use matcher.Criteria instead. +type MatchingCriteria = matcher.Criteria - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/internal/finder" -) +// Deprecated: [v0.83.0] Use matcher.OrderingCriteria instead. +type OrderingCriteria = matcher.OrderingCriteria -type MatchingCriteria struct { - Include []string `mapstructure:"include,omitempty"` - Exclude []string `mapstructure:"exclude,omitempty"` - OrderingCriteria OrderingCriteria `mapstructure:"ordering_criteria,omitempty"` -} +// Deprecated: [v0.83.0] Use matcher.Sort instead. +type NumericSortRule = matcher.Sort -type OrderingCriteria struct { - Regex string `mapstructure:"regex,omitempty"` - SortBy []sortRuleImpl `mapstructure:"sort_by,omitempty"` -} +// Deprecated: [v0.83.0] Use matcher.Sort instead. +type AlphabeticalSortRule = matcher.Sort -type NumericSortRule struct { - baseSortRule `mapstructure:",squash"` -} - -type AlphabeticalSortRule struct { - baseSortRule `mapstructure:",squash"` -} - -type TimestampSortRule struct { - baseSortRule `mapstructure:",squash"` - Layout string `mapstructure:"layout,omitempty"` - Location string `mapstructure:"location,omitempty"` -} - -type baseSortRule struct { - RegexKey string `mapstructure:"regex_key,omitempty"` - Ascending bool `mapstructure:"ascending,omitempty"` - SortType string `mapstructure:"sort_type,omitempty"` -} - -type sortRuleImpl struct { - sortRule -} - -// findFiles gets a list of paths given an array of glob patterns to include and exclude -func (f MatchingCriteria) findFiles() ([]string, error) { - all := finder.FindFiles(f.Include, f.Exclude) - - if len(all) == 0 || len(f.OrderingCriteria.SortBy) == 0 { - return all, nil - } - - re := regexp.MustCompile(f.OrderingCriteria.Regex) - - var errs error - for _, SortPattern := range f.OrderingCriteria.SortBy { - sortedFiles, err := SortPattern.sort(re, all) - if err != nil { - errs = multierr.Append(errs, err) - continue - } - all = sortedFiles - } - - return []string{all[0]}, errs -} +// Deprecated: [v0.83.0] Use matcher.Sort instead. +type TimestampSortRule = matcher.Sort diff --git a/pkg/stanza/fileconsumer/finder_test.go b/pkg/stanza/fileconsumer/finder_test.go deleted file mode 100644 index 14adaada1d84..000000000000 --- a/pkg/stanza/fileconsumer/finder_test.go +++ /dev/null @@ -1,449 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package fileconsumer - -import ( - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestFinder(t *testing.T) { - t.Parallel() - cases := []struct { - name string - files []string - include []string - exclude []string - filterSortRule OrderingCriteria - expected []string - }{ - { - name: "Timestamp Sorting", - files: []string{"err.2023020611.log", "err.2023020612.log", "err.2023020610.log", "err.2023020609.log"}, - include: []string{"err.*.log"}, - exclude: []string{}, - filterSortRule: OrderingCriteria{ - Regex: `err\.(?P\d{4}\d{2}\d{2}\d{2}).*log`, - SortBy: []sortRuleImpl{ - { - &TimestampSortRule{ - baseSortRule: baseSortRule{ - RegexKey: "value", - Ascending: false, - }, - Location: "UTC", - Layout: `%Y%m%d%H`, - }, - }, - }, - }, - expected: []string{"err.2023020612.log"}, - }, - { - name: "Timestamp Sorting Ascending", - files: []string{"err.2023020612.log", "err.2023020611.log", "err.2023020609.log", "err.2023020610.log"}, - include: []string{"err.*.log"}, - exclude: []string{}, - filterSortRule: OrderingCriteria{ - Regex: `err\.(?P\d{4}\d{2}\d{2}\d{2}).*log`, - SortBy: []sortRuleImpl{ - { - &TimestampSortRule{ - baseSortRule: baseSortRule{ - RegexKey: "value", - Ascending: true, - }, - Location: "UTC", - Layout: `%Y%m%d%H`, - }, - }, - }, - }, - expected: []string{"err.2023020609.log"}, - }, - { - name: "Numeric Sorting", - files: []string{"err.123456788.log", "err.123456789.log", "err.123456787.log", "err.123456786.log"}, - include: []string{"err.*.log"}, - exclude: []string{}, - filterSortRule: OrderingCriteria{ - Regex: `err\.(?P\d+).*log`, - SortBy: []sortRuleImpl{ - { - &NumericSortRule{ - baseSortRule: baseSortRule{ - RegexKey: "value", - Ascending: false, - }, - }, - }, - }, - }, - expected: []string{"err.123456789.log"}, - }, - { - name: "Numeric Sorting Ascending", - files: []string{"err.123456789.log", "err.123456788.log", "err.123456786.log", "err.123456787.log"}, - include: []string{"err.*.log"}, - exclude: []string{}, - filterSortRule: OrderingCriteria{ - Regex: `err\.(?P\d+).*log`, - SortBy: []sortRuleImpl{ - { - &NumericSortRule{ - baseSortRule: baseSortRule{ - RegexKey: "value", - Ascending: true, - }, - }, - }, - }, - }, - expected: []string{"err.123456786.log"}, - }, - { - name: "Alphabetical Sorting", - files: []string{"err.a.log", "err.d.log", "err.b.log", "err.c.log"}, - include: []string{"err.*.log"}, - exclude: []string{}, - filterSortRule: OrderingCriteria{ - Regex: `err\.(?P[a-zA-Z]+).*log`, - SortBy: []sortRuleImpl{ - { - &AlphabeticalSortRule{ - baseSortRule: baseSortRule{ - RegexKey: "value", - Ascending: false, - }, - }, - }, - }, - }, - expected: []string{"err.d.log"}, - }, - { - name: "Alphabetical Sorting Ascending", - files: []string{"err.b.log", "err.a.log", "err.c.log", "err.d.log"}, - include: []string{"err.*.log"}, - exclude: []string{}, - filterSortRule: OrderingCriteria{ - Regex: `err\.(?P[a-zA-Z]+).*log`, - SortBy: []sortRuleImpl{ - { - &AlphabeticalSortRule{ - baseSortRule: baseSortRule{ - RegexKey: "value", - Ascending: true, - }, - }, - }, - }, - }, - expected: []string{"err.a.log"}, - }, - { - name: "Multiple Sorting - timestamp priority sort", - files: []string{ - "err.b.1.2023020601.log", - "err.b.2.2023020601.log", - "err.a.1.2023020601.log", - "err.a.2.2023020601.log", - "err.b.1.2023020602.log", - "err.a.2.2023020602.log", - "err.b.2.2023020602.log", - "err.a.1.2023020602.log", - }, - include: []string{"err.*.log"}, - exclude: []string{}, - filterSortRule: OrderingCriteria{ - Regex: `err\.(?P[a-zA-Z])\.(?P\d+)\.(?P