Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable the multiline flag in line splitter regex by default #96

Merged
merged 1 commit into from
Aug 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 21 additions & 24 deletions operator/builtin/input/file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,35 +165,32 @@ func lookupEncoding(enc string) (encoding.Encoding, error) {

// getSplitFunc will return the split function associated the configured mode.
func (c InputConfig) getSplitFunc(encoding encoding.Encoding) (bufio.SplitFunc, error) {
var splitFunc bufio.SplitFunc
if c.Multiline == nil {
var err error
splitFunc, err = NewNewlineSplitFunc(encoding)
return NewNewlineSplitFunc(encoding)
}
endPattern := c.Multiline.LineEndPattern
startPattern := c.Multiline.LineStartPattern

switch {
case endPattern != "" && startPattern != "":
return nil, fmt.Errorf("only one of line_start_pattern or line_end_pattern can be set")
case endPattern == "" && startPattern == "":
return nil, fmt.Errorf("one of line_start_pattern or line_end_pattern must be set")
case endPattern != "":
re, err := regexp.Compile("(?m)" + c.Multiline.LineEndPattern)
if err != nil {
return nil, err
return nil, fmt.Errorf("compile line end regex: %s", err)
}
} else {
definedLineEndPattern := c.Multiline.LineEndPattern != ""
definedLineStartPattern := c.Multiline.LineStartPattern != ""

switch {
case definedLineEndPattern == definedLineStartPattern:
return nil, fmt.Errorf("if multiline is configured, exactly one of line_start_pattern or line_end_pattern must be set")
case definedLineEndPattern:
re, err := regexp.Compile(c.Multiline.LineEndPattern)
if err != nil {
return nil, fmt.Errorf("compile line end regex: %s", err)
}
splitFunc = NewLineEndSplitFunc(re)
case definedLineStartPattern:
re, err := regexp.Compile(c.Multiline.LineStartPattern)
if err != nil {
return nil, fmt.Errorf("compile line start regex: %s", err)
}
splitFunc = NewLineStartSplitFunc(re)
return NewLineEndSplitFunc(re), nil
case startPattern != "":
re, err := regexp.Compile("(?m)" + c.Multiline.LineStartPattern)
if err != nil {
return nil, fmt.Errorf("compile line start regex: %s", err)
}
return NewLineStartSplitFunc(re), nil
default:
return nil, fmt.Errorf("unreachable")
}
return splitFunc, nil
}

// InputOperator is an operator that monitors files for entries
Expand Down
34 changes: 30 additions & 4 deletions operator/builtin/input/file/line_splitter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ func TestLineStartSplitFunc(t *testing.T) {
`LOGSTART 234 log2 `,
},
},
{
Name: "TwoLogsLineStart",
Pattern: `^LOGSTART \d+ `,
Raw: []byte("LOGSTART 123 LOGSTART 345 log1\nLOGSTART 234 log2\nLOGSTART 345 foo"),
ExpectedTokenized: []string{
"LOGSTART 123 LOGSTART 345 log1\n",
"LOGSTART 234 log2\n",
},
},
{
Name: "NoMatches",
Pattern: `LOGSTART \d+ `,
Expand Down Expand Up @@ -114,8 +123,12 @@ func TestLineStartSplitFunc(t *testing.T) {
}

for _, tc := range testCases {
re := regexp.MustCompile(tc.Pattern)
splitFunc := NewLineStartSplitFunc(re)
cfg := NewInputConfig("")
cfg.Multiline = &MultilineConfig{
LineStartPattern: tc.Pattern,
}
splitFunc, err := cfg.getSplitFunc(unicode.UTF8)
require.NoError(t, err)
t.Run(tc.Name, tc.RunFunc(splitFunc))
}

Expand Down Expand Up @@ -158,6 +171,15 @@ func TestLineEndSplitFunc(t *testing.T) {
`log2 LOGEND 234`,
},
},
{
Name: "TwoLogsLineEndSimple",
Pattern: `LOGEND$`,
Raw: []byte("log1 LOGEND LOGEND\nlog2 LOGEND\n"),
ExpectedTokenized: []string{
"log1 LOGEND LOGEND",
"\nlog2 LOGEND",
},
},
{
Name: "NoMatches",
Pattern: `LOGEND \d+`,
Expand Down Expand Up @@ -210,8 +232,12 @@ func TestLineEndSplitFunc(t *testing.T) {
}

for _, tc := range testCases {
re := regexp.MustCompile(tc.Pattern)
splitFunc := NewLineEndSplitFunc(re)
cfg := NewInputConfig("")
cfg.Multiline = &MultilineConfig{
LineEndPattern: tc.Pattern,
}
splitFunc, err := cfg.getSplitFunc(unicode.UTF8)
require.NoError(t, err)
t.Run(tc.Name, tc.RunFunc(splitFunc))
}
}
Expand Down