From 1c1a2af22f674e5f24abc1f18688e7aaaf85d868 Mon Sep 17 00:00:00 2001 From: Monica Sarbu Date: Thu, 17 Dec 2015 21:06:39 +0100 Subject: [PATCH] Add exclude_files configuration option You can define exclude_files as a list of regular expressions. --- filebeat/config/config.go | 3 +++ filebeat/crawler/prospector.go | 26 +++++++++++++++++++++ filebeat/crawler/prospector_test.go | 20 +++++++++++++++++ filebeat/etc/beat.yml | 4 ++++ filebeat/etc/filebeat.yml | 4 ++++ filebeat/harvester/log.go | 30 ------------------------- filebeat/harvester/util.go | 35 ++++++++++++++++++++++++++++- 7 files changed, 91 insertions(+), 31 deletions(-) diff --git a/filebeat/config/config.go b/filebeat/config/config.go index 5174d8f793e1..db45ea305b22 100644 --- a/filebeat/config/config.go +++ b/filebeat/config/config.go @@ -4,6 +4,7 @@ import ( "log" "os" "path/filepath" + "regexp" "time" "github.com/elastic/beats/libbeat/cfgfile" @@ -49,6 +50,8 @@ type ProspectorConfig struct { ScanFrequency string `yaml:"scan_frequency"` ScanFrequencyDuration time.Duration Harvester HarvesterConfig `yaml:",inline"` + ExcludeFiles []string `yaml:"exclude_files"` + ExcludeFilesRegexp []*regexp.Regexp } type HarvesterConfig struct { diff --git a/filebeat/crawler/prospector.go b/filebeat/crawler/prospector.go index b92eae189958..800ce0dabba5 100644 --- a/filebeat/crawler/prospector.go +++ b/filebeat/crawler/prospector.go @@ -52,6 +52,10 @@ func (p *Prospector) setupProspectorConfig() error { if err != nil { return err } + config.ExcludeFilesRegexp, err = harvester.InitRegexps(config.ExcludeFiles) + if err != nil { + return err + } // Init File Stat list p.prospectorList = make(map[string]harvester.FileStat) @@ -219,11 +223,27 @@ func (p *Prospector) stdinRun(spoolChan chan *input.FileEvent) { } } +func (p *Prospector) isFileExcluded(file string) bool { + + config := &p.ProspectorConfig + + if len(config.ExcludeFilesRegexp) > 0 { + + if harvester.MatchAnyRegexps(config.ExcludeFilesRegexp, file) { + logp.Debug("prospector", "Exclude file: %s", file) + return true + } + } + + return false +} + // Scans the specific path which can be a glob (/**/**/*.log) // For all found files it is checked if a harvester should be started func (p *Prospector) scan(path string, output chan *input.FileEvent) { logp.Debug("prospector", "scan path %s", path) + logp.Debug("prospector", "exclude_files: %s", p.ProspectorConfig.ExcludeFiles) // Evaluate the path as a wildcards/shell glob matches, err := filepath.Glob(path) if err != nil { @@ -237,6 +257,12 @@ func (p *Prospector) scan(path string, output chan *input.FileEvent) { for _, file := range matches { logp.Debug("prospector", "Check file for harvesting: %s", file) + // check if the file is in the exclude_files list + if p.isFileExcluded(file) { + logp.Debug("prospector", "Exclude file: %s", file) + continue + } + // Stat the file, following any symlinks. fileinfo, err := os.Stat(file) diff --git a/filebeat/crawler/prospector_test.go b/filebeat/crawler/prospector_test.go index e186908b5e05..8ffe9a733f79 100644 --- a/filebeat/crawler/prospector_test.go +++ b/filebeat/crawler/prospector_test.go @@ -171,3 +171,23 @@ func TestProspectorInitInputTypeWrong(t *testing.T) { assert.Nil(t, err) assert.Equal(t, "log", prospector.ProspectorConfig.Harvester.InputType) } + +func TestProspectorFileExclude(t *testing.T) { + + prospectorConfig := config.ProspectorConfig{ + ExcludeFiles: []string{"\\.gz$"}, + Harvester: config.HarvesterConfig{ + BufferSize: 0, + }, + } + + prospector := Prospector{ + ProspectorConfig: prospectorConfig, + } + + prospector.Init() + + assert.True(t, prospector.isFileExcluded("/tmp/log/logw.gz")) + assert.False(t, prospector.isFileExcluded("/tmp/log/logw.log")) + +} diff --git a/filebeat/etc/beat.yml b/filebeat/etc/beat.yml index de8de4cedc60..e133ebe9e26a 100644 --- a/filebeat/etc/beat.yml +++ b/filebeat/etc/beat.yml @@ -40,6 +40,10 @@ filebeat: # exclude_lines. By default, all the lines are exported. # include_lines: ["^ERR", "^WARN"] + # Exclude files. A list of regular expressions to match. Filebeat drops the files that + # are matching any regular expression from the list. By default, no files are dropped. + # exclude_files: ["*.gz"] + # Optional additional fields. These field can be freely picked # to add additional information to the crawled log files for filtering #fields: diff --git a/filebeat/etc/filebeat.yml b/filebeat/etc/filebeat.yml index 8139278c1610..27743d4d2db9 100644 --- a/filebeat/etc/filebeat.yml +++ b/filebeat/etc/filebeat.yml @@ -40,6 +40,10 @@ filebeat: # exclude_lines. By default, all the lines are exported. # include_lines: ["^ERR", "^WARN"] + # Exclude files. A list of regular expressions to match. Filebeat drops the files that + # are matching any regular expression from the list. By default, no files are dropped. + # exclude_files: ["*.gz"] + # Optional additional fields. These field can be freely picked # to add additional information to the crawled log files for filtering #fields: diff --git a/filebeat/harvester/log.go b/filebeat/harvester/log.go index ab1699eddca2..e7efa22a3e4c 100644 --- a/filebeat/harvester/log.go +++ b/filebeat/harvester/log.go @@ -5,7 +5,6 @@ import ( "fmt" "io" "os" - "regexp" "time" "github.com/elastic/beats/filebeat/config" @@ -332,35 +331,6 @@ func (h *Harvester) handleReadlineError(lastTimeRead time.Time, err error) error func (h *Harvester) Stop() { } -func InitRegexps(exprs []string) ([]*regexp.Regexp, error) { - - result := []*regexp.Regexp{} - - for _, exp := range exprs { - - rexp, err := regexp.CompilePOSIX(exp) - if err != nil { - logp.Err("Fail to compile the regexp %s: %s", exp, err) - return nil, err - } - result = append(result, rexp) - } - return result, nil -} - -func MatchAnyRegexps(regexps []*regexp.Regexp, text string) bool { - - for _, rexp := range regexps { - if rexp.MatchString(text) { - // drop line - return true - - } - } - - return false -} - const maxConsecutiveEmptyReads = 100 // timedReader keeps track of last time bytes have been read from underlying diff --git a/filebeat/harvester/util.go b/filebeat/harvester/util.go index 54b0c871588c..543545b8b007 100644 --- a/filebeat/harvester/util.go +++ b/filebeat/harvester/util.go @@ -1,9 +1,11 @@ package harvester import ( + "regexp" + "time" + "github.com/elastic/beats/filebeat/harvester/encoding" "github.com/elastic/beats/libbeat/logp" - "time" ) // isLine checks if the given byte array is a line, means has a line ending \n @@ -60,3 +62,34 @@ func readlineString(bytes []byte, size int) (string, int, error) { s := string(bytes)[:len(bytes)-lineEndingChars(bytes)] return s, size, nil } + +// InitRegexps initializes a list of compiled regular expressions. +func InitRegexps(exprs []string) ([]*regexp.Regexp, error) { + + result := []*regexp.Regexp{} + + for _, exp := range exprs { + + rexp, err := regexp.CompilePOSIX(exp) + if err != nil { + logp.Err("Fail to compile the regexp %s: %s", exp, err) + return nil, err + } + result = append(result, rexp) + } + return result, nil +} + +// MatchAnyRegexps checks if the text matches any of the regular expressions +func MatchAnyRegexps(regexps []*regexp.Regexp, text string) bool { + + for _, rexp := range regexps { + if rexp.MatchString(text) { + // drop line + return true + + } + } + + return false +}