From aaee997204159b9cfdf10a4425ae331bd92a2b58 Mon Sep 17 00:00:00 2001 From: Giuseppe Valente <7AC@users.noreply.github.com> Date: Tue, 25 Apr 2017 23:21:32 -0700 Subject: [PATCH] filebeat: expand double wildcards in prospector (#3980) Expand double wildcards into standard glob patterns, up to a maximum depth of 8 levels after the wildcard. Resolves https://github.com/elastic/beats/issues/2084 --- CHANGELOG.asciidoc | 1 + filebeat/_meta/common.full.p2.yml | 5 + .../configuration/filebeat-options.asciidoc | 15 +- filebeat/filebeat.full.yml | 5 + filebeat/input/file/glob.go | 71 +++++++++ filebeat/input/file/glob_other_test.go | 140 ++++++++++++++++++ filebeat/input/file/glob_test.go | 82 ++++++++++ filebeat/input/file/glob_windows_test.go | 92 ++++++++++++ filebeat/prospector/config.go | 1 + filebeat/prospector/prospector_log.go | 15 +- 10 files changed, 420 insertions(+), 7 deletions(-) create mode 100644 filebeat/input/file/glob.go create mode 100644 filebeat/input/file/glob_other_test.go create mode 100644 filebeat/input/file/glob_test.go create mode 100644 filebeat/input/file/glob_windows_test.go diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 8fe093b551c4..f9d7f2e4505a 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -121,6 +121,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff] - Add base for supporting prospector level processors {pull}3853[3853] - Add auditd module for reading audit logs on Linux. {pull}3750[3750] {pull}3941[3941] - Add filebeat.config.path as replacement for config_dir. {pull}4051[4051] +- Add a `recursive_glob.enabled` setting to expand "**" in patterns. {{pull}}3980[3980] *Heartbeat* diff --git a/filebeat/_meta/common.full.p2.yml b/filebeat/_meta/common.full.p2.yml index bc949f75aef1..a70f75f00a80 100644 --- a/filebeat/_meta/common.full.p2.yml +++ b/filebeat/_meta/common.full.p2.yml @@ -82,6 +82,11 @@ filebeat.prospectors: # This is especially useful for multiline log messages which can get large. #max_bytes: 10485760 + ### Recursive glob configuration + + # Expand "**" patterns into regular glob patterns. + #recursive_glob.enabled: true + ### JSON configuration # Decode JSON options. Enable this if your logs are structured in JSON. diff --git a/filebeat/docs/reference/configuration/filebeat-options.asciidoc b/filebeat/docs/reference/configuration/filebeat-options.asciidoc index dd072016d7e5..9af1dde0e2ad 100644 --- a/filebeat/docs/reference/configuration/filebeat-options.asciidoc +++ b/filebeat/docs/reference/configuration/filebeat-options.asciidoc @@ -41,12 +41,22 @@ supported by https://golang.org/pkg/path/filepath/#Glob[Golang Glob] are also supported here. For example, to fetch all files from a predefined level of subdirectories, the following pattern can be used: `/var/log/*/*.log`. This fetches all `.log` files from the subfolders of `/var/log`. It does not -fetch log files from the `/var/log` folder itself. Currently it is not possible -to recursively fetch all files in all subdirectories of a directory. +fetch log files from the `/var/log` folder itself. +It is possible to recursively fetch all files in all subdirectories of a directory +using the optional <> settings. Filebeat starts a harvester for each file that it finds under the specified paths. You can specify one path per line. Each line begins with a dash (-). +[[recursive_glob]] +===== recursive_glob + +*`enabled`*:: Enable expanding `**` into recursive glob patterns. With this feature enabled, +the rightmost `**` in each path is expanded into a fixed +number of glob patterns. For example: `/foo/**` expands to `/foo`, `/foo/*`, +`/foo/*/*`, and so on. The feature is disabled by default, and if enabled it expands a single `**` +into a 8-level deep `*` pattern. + ===== encoding The file encoding to use for reading files that contain international characters. @@ -591,4 +601,3 @@ include::../../../../libbeat/docs/dashboardsconfig.asciidoc[] include::../../../../libbeat/docs/loggingconfig.asciidoc[] include::../../../../libbeat/docs/processors-config.asciidoc[] - diff --git a/filebeat/filebeat.full.yml b/filebeat/filebeat.full.yml index 52c2f3583fc4..18ef8a7a5761 100644 --- a/filebeat/filebeat.full.yml +++ b/filebeat/filebeat.full.yml @@ -249,6 +249,11 @@ filebeat.prospectors: # This is especially useful for multiline log messages which can get large. #max_bytes: 10485760 + ### Recursive glob configuration + + # Expand "**" patterns into regular glob patterns. + #recursive_glob.enabled: true + ### JSON configuration # Decode JSON options. Enable this if your logs are structured in JSON. diff --git a/filebeat/input/file/glob.go b/filebeat/input/file/glob.go new file mode 100644 index 000000000000..71917fcbaa41 --- /dev/null +++ b/filebeat/input/file/glob.go @@ -0,0 +1,71 @@ +package file + +import ( + "fmt" + "path/filepath" +) + +func wildcards(doubleStarPatternDepth uint8, dir string, suffix string) []string { + wildcardList := []string{} + w := "" + i := uint8(0) + if dir == "" && suffix == "" { + // Don't expand to "" on relative paths + w = "*" + i = 1 + } + for ; i <= doubleStarPatternDepth; i++ { + wildcardList = append(wildcardList, w) + w = filepath.Join(w, "*") + } + return wildcardList +} + +// globPattern detects the use of "**" and expands it to standard glob patterns up to a max depth +func globPatterns(pattern string, doubleStarPatternDepth uint8) ([]string, error) { + if doubleStarPatternDepth == 0 { + return []string{pattern}, nil + } + var wildcardList []string + var prefix string + var suffix string + dir, file := filepath.Split(filepath.Clean(pattern)) + for file != "" && file != "." { + if file == "**" { + if len(wildcardList) > 0 { + return nil, fmt.Errorf("multiple ** in %q", pattern) + } + wildcardList = wildcards(doubleStarPatternDepth, dir, suffix) + prefix = dir + } else if len(wildcardList) == 0 { + suffix = filepath.Join(file, suffix) + } + dir, file = filepath.Split(filepath.Clean(dir)) + } + if len(wildcardList) == 0 { + return []string{pattern}, nil + } + var patterns []string + for _, w := range wildcardList { + patterns = append(patterns, filepath.Join(prefix, w, suffix)) + } + return patterns, nil +} + +// Glob expands '**' patterns into multiple patterns to satisfy https://golang.org/pkg/path/filepath/#Match +func Glob(pattern string, doubleStarPatternDepth uint8) ([]string, error) { + patterns, err := globPatterns(pattern, doubleStarPatternDepth) + if err != nil { + return nil, err + } + var matches []string + for _, p := range patterns { + // Evaluate the path as a wildcards/shell glob + match, err := filepath.Glob(p) + if err != nil { + return nil, err + } + matches = append(matches, match...) + } + return matches, nil +} diff --git a/filebeat/input/file/glob_other_test.go b/filebeat/input/file/glob_other_test.go new file mode 100644 index 000000000000..23373bf5a495 --- /dev/null +++ b/filebeat/input/file/glob_other_test.go @@ -0,0 +1,140 @@ +// +build !windows + +package file + +var globTests = []globTest{ + { + "*", + []string{ + "foo", + }, + }, + { + "foo/*", + []string{ + "foo/bar", + }, + }, + { + "*/*", + []string{ + "foo/bar", + }, + }, + { + "**", + []string{ + "", + "foo", + "foo/bar", + "foo/bar/baz", + "foo/bar/baz/qux", + }, + }, + { + "foo**", + []string{ + "foo", + }, + }, + { + "foo/**", + []string{ + "foo", + "foo/bar", + "foo/bar/baz", + "foo/bar/baz/qux", + "foo/bar/baz/qux/quux", + }, + }, + { + "foo/**/baz", + []string{ + "foo/bar/baz", + }, + }, + { + "foo/**/bazz", + []string{}, + }, + { + "foo/**/bar", + []string{ + "foo/bar", + }, + }, + { + "foo//bar", + []string{ + "foo/bar", + }, + }, +} + +var globPatternsTests = []globPatternsTest{ + { + "**", + []string{"*", "*/*"}, + false, + }, + { + "/**", + []string{"/", "/*", "/*/*"}, + false, + }, + { + "**/", + []string{"*", "*/*"}, + false, + }, + { + "/foo/**", + []string{"/foo", "/foo/*", "/foo/*/*"}, + false, + }, + { + "/foo/**/bar", + []string{"/foo/bar", "/foo/*/bar", "/foo/*/*/bar"}, + false, + }, + { + "**/bar", + []string{"bar", "*/bar", "*/*/bar"}, + false, + }, + { + "/**/bar", + []string{"/bar", "/*/bar", "/*/*/bar"}, + false, + }, + { + "**/**", + []string{"*", "*/*"}, + true, + }, + { + "/**/**", + []string{"*", "*/*"}, + true, + }, + { + "foo**/bar", + []string{"foo**/bar"}, + false, + }, + { + "**foo/bar", + []string{"**foo/bar"}, + false, + }, + { + "foo/**bar", + []string{"foo/**bar"}, + false, + }, + { + "foo/bar**", + []string{"foo/bar**"}, + false, + }, +} diff --git a/filebeat/input/file/glob_test.go b/filebeat/input/file/glob_test.go new file mode 100644 index 000000000000..aa9d2c58e633 --- /dev/null +++ b/filebeat/input/file/glob_test.go @@ -0,0 +1,82 @@ +package file + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" +) + +type globTest struct { + pattern string + expectedMatches []string +} + +func TestGlob(t *testing.T) { + root, err := ioutil.TempDir("", "testglob") + if err != nil { + t.Fatal(err) + } + os.MkdirAll(filepath.Join(root, "foo/bar/baz/qux/quux"), 0755) + for _, test := range globTests { + pattern := filepath.Join(root, test.pattern) + matches, err := Glob(pattern, 4) + if err != nil { + t.Fatal(err) + continue + } + var normalizedMatches []string + for _, m := range matches { + if len(m) < len(root) { + t.Fatalf("Matches for %q are expected to be under %s and %q is not", test.pattern, root, m) + } + var normalizedMatch string + if len(m) > len(root) { + normalizedMatch = m[len(root)+1:] + } else { + normalizedMatch = m[len(root):] + } + normalizedMatches = append(normalizedMatches, normalizedMatch) + } + matchError := func() { + t.Fatalf("Pattern %q matched %q instead of %q", test.pattern, normalizedMatches, test.expectedMatches) + } + if len(normalizedMatches) != len(test.expectedMatches) { + matchError() + continue + } + for i, expectedMatch := range test.expectedMatches { + if normalizedMatches[i] != expectedMatch { + matchError() + } + } + } +} + +type globPatternsTest struct { + pattern string + expectedPatterns []string + expectedError bool +} + +func TestGlobPatterns(t *testing.T) { + for _, test := range globPatternsTests { + patterns, err := globPatterns(test.pattern, 2) + if err != nil { + if test.expectedError { + continue + } + t.Fatal(err) + } + if len(patterns) != len(test.expectedPatterns) { + t.Fatalf("%q expanded to %q (%d) instead of %q (%d)", test.pattern, patterns, len(patterns), + test.expectedPatterns, len(test.expectedPatterns)) + } + for i, p := range patterns { + if p != test.expectedPatterns[i] { + t.Fatalf("%q expanded to %q instead of %q", test.pattern, patterns, test.expectedPatterns) + break + } + } + } +} diff --git a/filebeat/input/file/glob_windows_test.go b/filebeat/input/file/glob_windows_test.go new file mode 100644 index 000000000000..aadbe8979fd5 --- /dev/null +++ b/filebeat/input/file/glob_windows_test.go @@ -0,0 +1,92 @@ +// +build windows + +package file + +var globTests = []globTest{ + { + "*", + []string{ + "foo", + }, + }, + { + "foo\\*", + []string{ + "foo\\bar", + }, + }, + { + "foo/*", + []string{ + "foo\\bar", + }, + }, + { + "*\\*", + []string{ + "foo\\bar", + }, + }, + { + "**", + []string{ + "", + "foo", + "foo\\bar", + "foo\\bar\\baz", + "foo\\bar\\baz\\qux", + }, + }, + { + "foo**", + []string{ + "foo", + }, + }, + { + "foo\\**", + []string{ + "foo", + "foo\\bar", + "foo\\bar\\baz", + "foo\\bar\\baz\\qux", + "foo\\bar\\baz\\qux\\quux", + }, + }, + { + "foo\\**\\baz", + []string{ + "foo\\bar\\baz", + }, + }, + { + "foo/**\\baz", + []string{ + "foo\\bar\\baz", + }, + }, + { + "foo\\**\\bazz", + []string{}, + }, + { + "foo\\**\\bar", + []string{ + "foo\\bar", + }, + }, + { + "foo\\\\bar", + []string{ + "foo\\bar", + }, + }, +} + +var globPatternsTests = []globPatternsTest{ + { + "C:\\foo\\**\\bar", + []string{"C:\\foo\\bar", "C:\\foo\\*\\bar", "C:\\foo\\*\\*\\bar"}, + false, + }, +} diff --git a/filebeat/prospector/config.go b/filebeat/prospector/config.go index 839fa2e79326..1cef49f67b4d 100644 --- a/filebeat/prospector/config.go +++ b/filebeat/prospector/config.go @@ -45,6 +45,7 @@ type prospectorConfig struct { Module string `config:"_module_name"` // hidden option to set the module name Fileset string `config:"_fileset_name"` // hidden option to set the fileset name Processors processors.PluginConfig `config:"processors"` + recursiveGlob bool `config:"recursive_glob.enabled"` } func (config *prospectorConfig) Validate() error { diff --git a/filebeat/prospector/prospector_log.go b/filebeat/prospector/prospector_log.go index 9ddfb4c5af94..01ad464263d3 100644 --- a/filebeat/prospector/prospector_log.go +++ b/filebeat/prospector/prospector_log.go @@ -13,6 +13,10 @@ import ( "github.com/elastic/beats/libbeat/monitoring" ) +const ( + recursiveGlobDepth = 8 +) + var ( filesRenamed = monitoring.NewInt(nil, "filebeat.prospector.log.files.renamed") filesTruncated = monitoring.NewInt(nil, "filebeat.prospector.log.files.truncated") @@ -140,11 +144,14 @@ func (l *Log) getFiles() map[string]os.FileInfo { paths := map[string]os.FileInfo{} - for _, glob := range l.config.Paths { - // Evaluate the path as a wildcards/shell glob - matches, err := filepath.Glob(glob) + for _, path := range l.config.Paths { + depth := uint8(0) + if l.config.recursiveGlob { + depth = recursiveGlobDepth + } + matches, err := file.Glob(path, depth) if err != nil { - logp.Err("glob(%s) failed: %v", glob, err) + logp.Err("glob(%s) failed: %v", path, err) continue }