From b6241b852bc00999e7efd9c45152e2e48c2fd155 Mon Sep 17 00:00:00 2001 From: Camden Cheek Date: Wed, 19 Aug 2020 10:29:25 -0400 Subject: [PATCH] Enable the multiline flag in line splitter regex by default --- operator/builtin/input/file/file.go | 45 +++++++++---------- .../builtin/input/file/line_splitter_test.go | 34 ++++++++++++-- 2 files changed, 51 insertions(+), 28 deletions(-) diff --git a/operator/builtin/input/file/file.go b/operator/builtin/input/file/file.go index 7fd142958..79ab4b1b0 100644 --- a/operator/builtin/input/file/file.go +++ b/operator/builtin/input/file/file.go @@ -165,35 +165,32 @@ func lookupEncoding(enc string) (encoding.Encoding, error) { // getSplitFunc will return the split function associated the configured mode. func (c InputConfig) getSplitFunc(encoding encoding.Encoding) (bufio.SplitFunc, error) { - var splitFunc bufio.SplitFunc if c.Multiline == nil { - var err error - splitFunc, err = NewNewlineSplitFunc(encoding) + return NewNewlineSplitFunc(encoding) + } + endPattern := c.Multiline.LineEndPattern + startPattern := c.Multiline.LineStartPattern + + switch { + case endPattern != "" && startPattern != "": + return nil, fmt.Errorf("only one of line_start_pattern or line_end_pattern can be set") + case endPattern == "" && startPattern == "": + return nil, fmt.Errorf("one of line_start_pattern or line_end_pattern must be set") + case endPattern != "": + re, err := regexp.Compile("(?m)" + c.Multiline.LineEndPattern) if err != nil { - return nil, err + return nil, fmt.Errorf("compile line end regex: %s", err) } - } else { - definedLineEndPattern := c.Multiline.LineEndPattern != "" - definedLineStartPattern := c.Multiline.LineStartPattern != "" - - switch { - case definedLineEndPattern == definedLineStartPattern: - return nil, fmt.Errorf("if multiline is configured, exactly one of line_start_pattern or line_end_pattern must be set") - case definedLineEndPattern: - re, err := regexp.Compile(c.Multiline.LineEndPattern) - if err != nil { - return nil, fmt.Errorf("compile line end regex: %s", err) - } - splitFunc = NewLineEndSplitFunc(re) - case definedLineStartPattern: - re, err := regexp.Compile(c.Multiline.LineStartPattern) - if err != nil { - return nil, fmt.Errorf("compile line start regex: %s", err) - } - splitFunc = NewLineStartSplitFunc(re) + return NewLineEndSplitFunc(re), nil + case startPattern != "": + re, err := regexp.Compile("(?m)" + c.Multiline.LineStartPattern) + if err != nil { + return nil, fmt.Errorf("compile line start regex: %s", err) } + return NewLineStartSplitFunc(re), nil + default: + return nil, fmt.Errorf("unreachable") } - return splitFunc, nil } // InputOperator is an operator that monitors files for entries diff --git a/operator/builtin/input/file/line_splitter_test.go b/operator/builtin/input/file/line_splitter_test.go index 5b91e8f78..0ecee516d 100644 --- a/operator/builtin/input/file/line_splitter_test.go +++ b/operator/builtin/input/file/line_splitter_test.go @@ -58,6 +58,15 @@ func TestLineStartSplitFunc(t *testing.T) { `LOGSTART 234 log2 `, }, }, + { + Name: "TwoLogsLineStart", + Pattern: `^LOGSTART \d+ `, + Raw: []byte("LOGSTART 123 LOGSTART 345 log1\nLOGSTART 234 log2\nLOGSTART 345 foo"), + ExpectedTokenized: []string{ + "LOGSTART 123 LOGSTART 345 log1\n", + "LOGSTART 234 log2\n", + }, + }, { Name: "NoMatches", Pattern: `LOGSTART \d+ `, @@ -114,8 +123,12 @@ func TestLineStartSplitFunc(t *testing.T) { } for _, tc := range testCases { - re := regexp.MustCompile(tc.Pattern) - splitFunc := NewLineStartSplitFunc(re) + cfg := NewInputConfig("") + cfg.Multiline = &MultilineConfig{ + LineStartPattern: tc.Pattern, + } + splitFunc, err := cfg.getSplitFunc(unicode.UTF8) + require.NoError(t, err) t.Run(tc.Name, tc.RunFunc(splitFunc)) } @@ -158,6 +171,15 @@ func TestLineEndSplitFunc(t *testing.T) { `log2 LOGEND 234`, }, }, + { + Name: "TwoLogsLineEndSimple", + Pattern: `LOGEND$`, + Raw: []byte("log1 LOGEND LOGEND\nlog2 LOGEND\n"), + ExpectedTokenized: []string{ + "log1 LOGEND LOGEND", + "\nlog2 LOGEND", + }, + }, { Name: "NoMatches", Pattern: `LOGEND \d+`, @@ -210,8 +232,12 @@ func TestLineEndSplitFunc(t *testing.T) { } for _, tc := range testCases { - re := regexp.MustCompile(tc.Pattern) - splitFunc := NewLineEndSplitFunc(re) + cfg := NewInputConfig("") + cfg.Multiline = &MultilineConfig{ + LineEndPattern: tc.Pattern, + } + splitFunc, err := cfg.getSplitFunc(unicode.UTF8) + require.NoError(t, err) t.Run(tc.Name, tc.RunFunc(splitFunc)) } }