Skip to content

Commit

Permalink
filebeat: expand double wildcards in prospector (#3980)
Browse files Browse the repository at this point in the history
Expand double wildcards into standard glob patterns, up to a maximum
depth of 8 levels after the wildcard.

Resolves #2084
  • Loading branch information
7AC authored and ruflin committed Apr 26, 2017
1 parent ca2e449 commit aaee997
Show file tree
Hide file tree
Showing 10 changed files with 420 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff]
- Add base for supporting prospector level processors {pull}3853[3853]
- Add auditd module for reading audit logs on Linux. {pull}3750[3750] {pull}3941[3941]
- Add filebeat.config.path as replacement for config_dir. {pull}4051[4051]
- Add a `recursive_glob.enabled` setting to expand "**" in patterns. {{pull}}3980[3980]

*Heartbeat*

Expand Down
5 changes: 5 additions & 0 deletions filebeat/_meta/common.full.p2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ filebeat.prospectors:
# This is especially useful for multiline log messages which can get large.
#max_bytes: 10485760

### Recursive glob configuration

# Expand "**" patterns into regular glob patterns.
#recursive_glob.enabled: true

### JSON configuration

# Decode JSON options. Enable this if your logs are structured in JSON.
Expand Down
15 changes: 12 additions & 3 deletions filebeat/docs/reference/configuration/filebeat-options.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,22 @@ supported by https://golang.org/pkg/path/filepath/#Glob[Golang Glob] are also
supported here. For example, to fetch all files from a predefined level of
subdirectories, the following pattern can be used: `/var/log/*/*.log`. This
fetches all `.log` files from the subfolders of `/var/log`. It does not
fetch log files from the `/var/log` folder itself. Currently it is not possible
to recursively fetch all files in all subdirectories of a directory.
fetch log files from the `/var/log` folder itself.
It is possible to recursively fetch all files in all subdirectories of a directory
using the optional <<recursive_glob>> settings.

Filebeat starts a harvester for each file that it finds under the specified
paths. You can specify one path per line. Each line begins with a dash (-).

[[recursive_glob]]
===== recursive_glob

*`enabled`*:: Enable expanding `**` into recursive glob patterns. With this feature enabled,
the rightmost `**` in each path is expanded into a fixed
number of glob patterns. For example: `/foo/**` expands to `/foo`, `/foo/*`,
`/foo/*/*`, and so on. The feature is disabled by default, and if enabled it expands a single `**`
into a 8-level deep `*` pattern.

===== encoding

The file encoding to use for reading files that contain international characters.
Expand Down Expand Up @@ -591,4 +601,3 @@ include::../../../../libbeat/docs/dashboardsconfig.asciidoc[]
include::../../../../libbeat/docs/loggingconfig.asciidoc[]

include::../../../../libbeat/docs/processors-config.asciidoc[]

5 changes: 5 additions & 0 deletions filebeat/filebeat.full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,11 @@ filebeat.prospectors:
# This is especially useful for multiline log messages which can get large.
#max_bytes: 10485760

### Recursive glob configuration

# Expand "**" patterns into regular glob patterns.
#recursive_glob.enabled: true

### JSON configuration

# Decode JSON options. Enable this if your logs are structured in JSON.
Expand Down
71 changes: 71 additions & 0 deletions filebeat/input/file/glob.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package file

import (
"fmt"
"path/filepath"
)

func wildcards(doubleStarPatternDepth uint8, dir string, suffix string) []string {
wildcardList := []string{}
w := ""
i := uint8(0)
if dir == "" && suffix == "" {
// Don't expand to "" on relative paths
w = "*"
i = 1
}
for ; i <= doubleStarPatternDepth; i++ {
wildcardList = append(wildcardList, w)
w = filepath.Join(w, "*")
}
return wildcardList
}

// globPattern detects the use of "**" and expands it to standard glob patterns up to a max depth
func globPatterns(pattern string, doubleStarPatternDepth uint8) ([]string, error) {
if doubleStarPatternDepth == 0 {
return []string{pattern}, nil
}
var wildcardList []string
var prefix string
var suffix string
dir, file := filepath.Split(filepath.Clean(pattern))
for file != "" && file != "." {
if file == "**" {
if len(wildcardList) > 0 {
return nil, fmt.Errorf("multiple ** in %q", pattern)
}
wildcardList = wildcards(doubleStarPatternDepth, dir, suffix)
prefix = dir
} else if len(wildcardList) == 0 {
suffix = filepath.Join(file, suffix)
}
dir, file = filepath.Split(filepath.Clean(dir))
}
if len(wildcardList) == 0 {
return []string{pattern}, nil
}
var patterns []string
for _, w := range wildcardList {
patterns = append(patterns, filepath.Join(prefix, w, suffix))
}
return patterns, nil
}

// Glob expands '**' patterns into multiple patterns to satisfy https://golang.org/pkg/path/filepath/#Match
func Glob(pattern string, doubleStarPatternDepth uint8) ([]string, error) {
patterns, err := globPatterns(pattern, doubleStarPatternDepth)
if err != nil {
return nil, err
}
var matches []string
for _, p := range patterns {
// Evaluate the path as a wildcards/shell glob
match, err := filepath.Glob(p)
if err != nil {
return nil, err
}
matches = append(matches, match...)
}
return matches, nil
}
140 changes: 140 additions & 0 deletions filebeat/input/file/glob_other_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// +build !windows

package file

var globTests = []globTest{
{
"*",
[]string{
"foo",
},
},
{
"foo/*",
[]string{
"foo/bar",
},
},
{
"*/*",
[]string{
"foo/bar",
},
},
{
"**",
[]string{
"",
"foo",
"foo/bar",
"foo/bar/baz",
"foo/bar/baz/qux",
},
},
{
"foo**",
[]string{
"foo",
},
},
{
"foo/**",
[]string{
"foo",
"foo/bar",
"foo/bar/baz",
"foo/bar/baz/qux",
"foo/bar/baz/qux/quux",
},
},
{
"foo/**/baz",
[]string{
"foo/bar/baz",
},
},
{
"foo/**/bazz",
[]string{},
},
{
"foo/**/bar",
[]string{
"foo/bar",
},
},
{
"foo//bar",
[]string{
"foo/bar",
},
},
}

var globPatternsTests = []globPatternsTest{
{
"**",
[]string{"*", "*/*"},
false,
},
{
"/**",
[]string{"/", "/*", "/*/*"},
false,
},
{
"**/",
[]string{"*", "*/*"},
false,
},
{
"/foo/**",
[]string{"/foo", "/foo/*", "/foo/*/*"},
false,
},
{
"/foo/**/bar",
[]string{"/foo/bar", "/foo/*/bar", "/foo/*/*/bar"},
false,
},
{
"**/bar",
[]string{"bar", "*/bar", "*/*/bar"},
false,
},
{
"/**/bar",
[]string{"/bar", "/*/bar", "/*/*/bar"},
false,
},
{
"**/**",
[]string{"*", "*/*"},
true,
},
{
"/**/**",
[]string{"*", "*/*"},
true,
},
{
"foo**/bar",
[]string{"foo**/bar"},
false,
},
{
"**foo/bar",
[]string{"**foo/bar"},
false,
},
{
"foo/**bar",
[]string{"foo/**bar"},
false,
},
{
"foo/bar**",
[]string{"foo/bar**"},
false,
},
}
82 changes: 82 additions & 0 deletions filebeat/input/file/glob_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package file

import (
"io/ioutil"
"os"
"path/filepath"
"testing"
)

type globTest struct {
pattern string
expectedMatches []string
}

func TestGlob(t *testing.T) {
root, err := ioutil.TempDir("", "testglob")
if err != nil {
t.Fatal(err)
}
os.MkdirAll(filepath.Join(root, "foo/bar/baz/qux/quux"), 0755)
for _, test := range globTests {
pattern := filepath.Join(root, test.pattern)
matches, err := Glob(pattern, 4)
if err != nil {
t.Fatal(err)
continue
}
var normalizedMatches []string
for _, m := range matches {
if len(m) < len(root) {
t.Fatalf("Matches for %q are expected to be under %s and %q is not", test.pattern, root, m)
}
var normalizedMatch string
if len(m) > len(root) {
normalizedMatch = m[len(root)+1:]
} else {
normalizedMatch = m[len(root):]
}
normalizedMatches = append(normalizedMatches, normalizedMatch)
}
matchError := func() {
t.Fatalf("Pattern %q matched %q instead of %q", test.pattern, normalizedMatches, test.expectedMatches)
}
if len(normalizedMatches) != len(test.expectedMatches) {
matchError()
continue
}
for i, expectedMatch := range test.expectedMatches {
if normalizedMatches[i] != expectedMatch {
matchError()
}
}
}
}

type globPatternsTest struct {
pattern string
expectedPatterns []string
expectedError bool
}

func TestGlobPatterns(t *testing.T) {
for _, test := range globPatternsTests {
patterns, err := globPatterns(test.pattern, 2)
if err != nil {
if test.expectedError {
continue
}
t.Fatal(err)
}
if len(patterns) != len(test.expectedPatterns) {
t.Fatalf("%q expanded to %q (%d) instead of %q (%d)", test.pattern, patterns, len(patterns),
test.expectedPatterns, len(test.expectedPatterns))
}
for i, p := range patterns {
if p != test.expectedPatterns[i] {
t.Fatalf("%q expanded to %q instead of %q", test.pattern, patterns, test.expectedPatterns)
break
}
}
}
}
Loading

0 comments on commit aaee997

Please sign in to comment.