Skip to content
This repository has been archived by the owner on May 25, 2022. It is now read-only.

Commit

Permalink
Fix issue where doublestar globbing only worked at a single level (#268)
Browse files Browse the repository at this point in the history
* Refactor file finding into a testable struct

* Use doublestar for globbing
  • Loading branch information
djaglowski authored Sep 17, 2021
1 parent bc0bc79 commit 48fd163
Show file tree
Hide file tree
Showing 8 changed files with 214 additions and 92 deletions.
1 change: 1 addition & 0 deletions internal/tools/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ github.com/google/trillian v1.3.11/go.mod h1:0tPraVHrSDkA3BO6vKX67zgLXs6SsOAbHEi
github.com/google/uuid v0.0.0-20161128191214-064e2069ce9c/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
Expand Down
7 changes: 2 additions & 5 deletions operator/builtin/input/file/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ func NewInputConfig(operatorID string) *InputConfig {
// InputConfig is the configuration of a file input operator
type InputConfig struct {
helper.InputConfig `mapstructure:",squash" yaml:",inline"`

Include []string `mapstructure:"include,omitempty" json:"include,omitempty" yaml:"include,omitempty"`
Exclude []string `mapstructure:"exclude,omitempty" json:"exclude,omitempty" yaml:"exclude,omitempty"`
Finder `mapstructure:",squash" yaml:",inline"`

PollInterval helper.Duration `mapstructure:"poll_interval,omitempty" json:"poll_interval,omitempty" yaml:"poll_interval,omitempty"`
IncludeFileName bool `mapstructure:"include_file_name,omitempty" json:"include_file_name,omitempty" yaml:"include_file_name,omitempty"`
Expand Down Expand Up @@ -156,8 +154,7 @@ func (c InputConfig) Build(context operator.BuildContext) ([]operator.Operator,

op := &InputOperator{
InputOperator: inputOperator,
Include: c.Include,
Exclude: c.Exclude,
finder: c.Finder,
PollInterval: c.PollInterval.Raw(),
FilePathField: filePathField,
FileNameField: fileNameField,
Expand Down
2 changes: 1 addition & 1 deletion operator/builtin/input/file/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ func TestBuild(t *testing.T) {
require.NoError,
func(t *testing.T, f *InputOperator) {
require.Equal(t, f.OutputOperators[0], fakeOutput)
require.Equal(t, f.Include, []string{"/var/log/testpath.*"})
require.Equal(t, f.finder.Include, []string{"/var/log/testpath.*"})
require.Equal(t, f.FilePathField, entry.NewNilField())
require.Equal(t, f.FileNameField, entry.NewAttributeField("file.name"))
require.Equal(t, f.PollInterval, 10*time.Millisecond)
Expand Down
37 changes: 5 additions & 32 deletions operator/builtin/input/file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@ import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"time"

"github.com/bmatcuk/doublestar/v3"
"go.uber.org/zap"

"github.com/open-telemetry/opentelemetry-log-collection/entry"
Expand All @@ -36,8 +34,7 @@ import (
type InputOperator struct {
helper.InputOperator

Include []string
Exclude []string
finder Finder
FilePathField entry.Field
FileNameField entry.Field
FilePathResolvedField entry.Field
Expand Down Expand Up @@ -137,9 +134,11 @@ func (f *InputOperator) poll(ctx context.Context) {
}

// Get the list of paths on disk
matches = getMatches(f.Include, f.Exclude)
matches = f.finder.FindFiles()
if f.firstCheck && len(matches) == 0 {
f.Warnw("no files match the configured include patterns", "include", f.Include)
f.Warnw("no files match the configured include patterns",
"include", f.finder.Include,
"exclude", f.finder.Exclude)
} else if len(matches) > f.maxBatchFiles {
matches, f.queuedMatches = matches[:f.maxBatchFiles], matches[f.maxBatchFiles:]
}
Expand Down Expand Up @@ -192,32 +191,6 @@ OUTER:
f.syncLastPollFiles(ctx)
}

// getMatches gets a list of paths given an array of glob patterns to include and exclude
func getMatches(includes, excludes []string) []string {
all := make([]string, 0, len(includes))
for _, include := range includes {
matches, _ := filepath.Glob(include) // compile error checked in build
INCLUDE:
for _, match := range matches {
for _, exclude := range excludes {
if itMatches, _ := doublestar.PathMatch(exclude, match); itMatches {
continue INCLUDE
}
}

for _, existing := range all {
if existing == match {
continue INCLUDE
}
}

all = append(all, match)
}
}

return all
}

// makeReaders takes a list of paths, then creates readers from each of those paths,
// discarding any that have a duplicate fingerprint to other files that have already
// been read this polling interval
Expand Down
43 changes: 0 additions & 43 deletions operator/builtin/input/file/file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -929,46 +929,3 @@ func TestEncodings(t *testing.T) {
})
}
}

// TestExclude tests that a log file will be excluded if it matches the
// glob specified in the operator.
func TestExclude(t *testing.T) {
tempDir := testutil.NewTempDir(t)
paths := writeTempFiles(tempDir, []string{"include.log", "exclude.log"})

includes := []string{filepath.Join(tempDir, "*")}
excludes := []string{filepath.Join(tempDir, "*exclude.log")}

matches := getMatches(includes, excludes)
require.ElementsMatch(t, matches, paths[:1])
}
func TestExcludeEmpty(t *testing.T) {
tempDir := testutil.NewTempDir(t)
paths := writeTempFiles(tempDir, []string{"include.log", "exclude.log"})

includes := []string{filepath.Join(tempDir, "*")}
excludes := []string{}

matches := getMatches(includes, excludes)
require.ElementsMatch(t, matches, paths)
}
func TestExcludeMany(t *testing.T) {
tempDir := testutil.NewTempDir(t)
paths := writeTempFiles(tempDir, []string{"a1.log", "a2.log", "b1.log", "b2.log"})

includes := []string{filepath.Join(tempDir, "*")}
excludes := []string{filepath.Join(tempDir, "a*.log"), filepath.Join(tempDir, "*2.log")}

matches := getMatches(includes, excludes)
require.ElementsMatch(t, matches, paths[2:3])
}
func TestExcludeDuplicates(t *testing.T) {
tempDir := testutil.NewTempDir(t)
paths := writeTempFiles(tempDir, []string{"a1.log", "a2.log", "b1.log", "b2.log"})

includes := []string{filepath.Join(tempDir, "*1*"), filepath.Join(tempDir, "a*")}
excludes := []string{filepath.Join(tempDir, "a*.log"), filepath.Join(tempDir, "*2.log")}

matches := getMatches(includes, excludes)
require.ElementsMatch(t, matches, paths[2:3])
}
50 changes: 50 additions & 0 deletions operator/builtin/input/file/finder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package file

import (
"github.com/bmatcuk/doublestar/v3"
)

type Finder struct {
Include []string `mapstructure:"include,omitempty" json:"include,omitempty" yaml:"include,omitempty"`
Exclude []string `mapstructure:"exclude,omitempty" json:"exclude,omitempty" yaml:"exclude,omitempty"`
}

// FindFiles gets a list of paths given an array of glob patterns to include and exclude
func (f Finder) FindFiles() []string {
all := make([]string, 0, len(f.Include))
for _, include := range f.Include {
matches, _ := doublestar.Glob(include) // compile error checked in build
INCLUDE:
for _, match := range matches {
for _, exclude := range f.Exclude {
if itMatches, _ := doublestar.PathMatch(exclude, match); itMatches {
continue INCLUDE
}
}

for _, existing := range all {
if existing == match {
continue INCLUDE
}
}

all = append(all, match)
}
}

return all
}
155 changes: 155 additions & 0 deletions operator/builtin/input/file/finder_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package file

import (
"io/ioutil"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/require"

"github.com/open-telemetry/opentelemetry-log-collection/testutil"
)

func TestFinder(t *testing.T) {
t.Parallel()
cases := []struct {
name string
files []string
include []string
exclude []string
expected []string
}{
{
name: "IncludeOne",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"a1.log"},
exclude: []string{},
expected: []string{"a1.log"},
},
{
name: "IncludeNone",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"c*.log"},
exclude: []string{},
expected: []string{},
},
{
name: "IncludeAll",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*"},
exclude: []string{},
expected: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
},
{
name: "IncludeLogs",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*.log"},
exclude: []string{},
expected: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
},
{
name: "IncludeA",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"a*.log"},
exclude: []string{},
expected: []string{"a1.log", "a2.log"},
},
{
name: "Include2s",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*2.log"},
exclude: []string{},
expected: []string{"a2.log", "b2.log"},
},
{
name: "Exclude",
files: []string{"include.log", "exclude.log"},
include: []string{"*"},
exclude: []string{"exclude.log"},
expected: []string{"include.log"},
},
{
name: "ExcludeMany",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*"},
exclude: []string{"a*.log", "*2.log"},
expected: []string{"b1.log"},
},
{
name: "ExcludeDuplicates",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*1*", "a*"},
exclude: []string{"a*.log", "*2.log"},
expected: []string{"b1.log"},
},
{
name: "IncludeMultipleDirectories",
files: []string{"a/1.log", "a/2.log", "b/1.log", "b/2.log"},
include: []string{"a/*.log", "b/*.log"},
exclude: []string{},
expected: []string{"a/1.log", "a/2.log", "b/1.log", "b/2.log"},
},
{
name: "IncludeMultipleDirectoriesVaryingDepth",
files: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"},
include: []string{"*.log", "a/*.log", "a/b/*.log", "c/*.log"},
exclude: []string{},
expected: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"},
},
{
name: "DoubleStarSameDepth",
files: []string{"a/1.log", "b/1.log", "c/1.log"},
include: []string{"**/*.log"},
exclude: []string{},
expected: []string{"a/1.log", "b/1.log", "c/1.log"},
},
{
name: "DoubleStarVaryingDepth",
files: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"},
include: []string{"**/*.log"},
exclude: []string{},
expected: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"},
},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
tempDir := testutil.NewTempDir(t)
files := absPath(tempDir, tc.files)
include := absPath(tempDir, tc.include)
exclude := absPath(tempDir, tc.exclude)
expected := absPath(tempDir, tc.expected)

for _, f := range files {
os.MkdirAll(filepath.Dir(f), 0755)
ioutil.WriteFile(f, []byte(filepath.Base(f)), 0755)
}

finder := Finder{include, exclude}
require.ElementsMatch(t, finder.FindFiles(), expected)
})
}
}

func absPath(tempDir string, files []string) []string {
absFiles := make([]string, 0, len(files))
for _, f := range files {
absFiles = append(absFiles, filepath.Join(tempDir, f))
}
return absFiles
}
11 changes: 0 additions & 11 deletions operator/builtin/input/file/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,14 +186,3 @@ func expectNoMessagesUntil(t *testing.T, c chan *entry.Entry, d time.Duration) {
case <-time.After(d):
}
}

// writes file with the specified file names and returns their full paths in order
func writeTempFiles(tempDir string, names []string) []string {
result := make([]string, 0, len(names))
for _, name := range names {
path := filepath.Join(tempDir, name)
ioutil.WriteFile(path, []byte(name), 0755)
result = append(result, path)
}
return result
}

0 comments on commit 48fd163

Please sign in to comment.