Skip to content

Commit

Permalink
Enable the multiline flag in line splitter regex by default
Browse files Browse the repository at this point in the history
  • Loading branch information
camdencheek committed Aug 19, 2020
1 parent 0a0c521 commit b6241b8
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 28 deletions.
45 changes: 21 additions & 24 deletions operator/builtin/input/file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,35 +165,32 @@ func lookupEncoding(enc string) (encoding.Encoding, error) {

// getSplitFunc will return the split function associated the configured mode.
func (c InputConfig) getSplitFunc(encoding encoding.Encoding) (bufio.SplitFunc, error) {
var splitFunc bufio.SplitFunc
if c.Multiline == nil {
var err error
splitFunc, err = NewNewlineSplitFunc(encoding)
return NewNewlineSplitFunc(encoding)
}
endPattern := c.Multiline.LineEndPattern
startPattern := c.Multiline.LineStartPattern

switch {
case endPattern != "" && startPattern != "":
return nil, fmt.Errorf("only one of line_start_pattern or line_end_pattern can be set")
case endPattern == "" && startPattern == "":
return nil, fmt.Errorf("one of line_start_pattern or line_end_pattern must be set")
case endPattern != "":
re, err := regexp.Compile("(?m)" + c.Multiline.LineEndPattern)
if err != nil {
return nil, err
return nil, fmt.Errorf("compile line end regex: %s", err)
}
} else {
definedLineEndPattern := c.Multiline.LineEndPattern != ""
definedLineStartPattern := c.Multiline.LineStartPattern != ""

switch {
case definedLineEndPattern == definedLineStartPattern:
return nil, fmt.Errorf("if multiline is configured, exactly one of line_start_pattern or line_end_pattern must be set")
case definedLineEndPattern:
re, err := regexp.Compile(c.Multiline.LineEndPattern)
if err != nil {
return nil, fmt.Errorf("compile line end regex: %s", err)
}
splitFunc = NewLineEndSplitFunc(re)
case definedLineStartPattern:
re, err := regexp.Compile(c.Multiline.LineStartPattern)
if err != nil {
return nil, fmt.Errorf("compile line start regex: %s", err)
}
splitFunc = NewLineStartSplitFunc(re)
return NewLineEndSplitFunc(re), nil
case startPattern != "":
re, err := regexp.Compile("(?m)" + c.Multiline.LineStartPattern)
if err != nil {
return nil, fmt.Errorf("compile line start regex: %s", err)
}
return NewLineStartSplitFunc(re), nil
default:
return nil, fmt.Errorf("unreachable")
}
return splitFunc, nil
}

// InputOperator is an operator that monitors files for entries
Expand Down
34 changes: 30 additions & 4 deletions operator/builtin/input/file/line_splitter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ func TestLineStartSplitFunc(t *testing.T) {
`LOGSTART 234 log2 `,
},
},
{
Name: "TwoLogsLineStart",
Pattern: `^LOGSTART \d+ `,
Raw: []byte("LOGSTART 123 LOGSTART 345 log1\nLOGSTART 234 log2\nLOGSTART 345 foo"),
ExpectedTokenized: []string{
"LOGSTART 123 LOGSTART 345 log1\n",
"LOGSTART 234 log2\n",
},
},
{
Name: "NoMatches",
Pattern: `LOGSTART \d+ `,
Expand Down Expand Up @@ -114,8 +123,12 @@ func TestLineStartSplitFunc(t *testing.T) {
}

for _, tc := range testCases {
re := regexp.MustCompile(tc.Pattern)
splitFunc := NewLineStartSplitFunc(re)
cfg := NewInputConfig("")
cfg.Multiline = &MultilineConfig{
LineStartPattern: tc.Pattern,
}
splitFunc, err := cfg.getSplitFunc(unicode.UTF8)
require.NoError(t, err)
t.Run(tc.Name, tc.RunFunc(splitFunc))
}

Expand Down Expand Up @@ -158,6 +171,15 @@ func TestLineEndSplitFunc(t *testing.T) {
`log2 LOGEND 234`,
},
},
{
Name: "TwoLogsLineEndSimple",
Pattern: `LOGEND$`,
Raw: []byte("log1 LOGEND LOGEND\nlog2 LOGEND\n"),
ExpectedTokenized: []string{
"log1 LOGEND LOGEND",
"\nlog2 LOGEND",
},
},
{
Name: "NoMatches",
Pattern: `LOGEND \d+`,
Expand Down Expand Up @@ -210,8 +232,12 @@ func TestLineEndSplitFunc(t *testing.T) {
}

for _, tc := range testCases {
re := regexp.MustCompile(tc.Pattern)
splitFunc := NewLineEndSplitFunc(re)
cfg := NewInputConfig("")
cfg.Multiline = &MultilineConfig{
LineEndPattern: tc.Pattern,
}
splitFunc, err := cfg.getSplitFunc(unicode.UTF8)
require.NoError(t, err)
t.Run(tc.Name, tc.RunFunc(splitFunc))
}
}
Expand Down

0 comments on commit b6241b8

Please sign in to comment.