diff --git a/operator/helper/multiline.go b/operator/helper/multiline.go index a6820f9f..5febc984 100644 --- a/operator/helper/multiline.go +++ b/operator/helper/multiline.go @@ -79,7 +79,9 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc { if firstLoc == nil { // Flush if no more data is expected if len(data) != 0 && atEOF && flushAtEOF { - return len(data), data, nil + token = trimWhitespaces(data) + advance = len(token) + return } return 0, nil, nil // read more data and try again. } @@ -89,7 +91,7 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc { if firstMatchStart != 0 { // the beginning of the file does not match the start pattern, so return a token up to the first match so we don't lose data advance = firstMatchStart - token = data[0:firstMatchStart] + token = trimWhitespaces(data[0:firstMatchStart]) return } @@ -100,7 +102,9 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc { // Flush if no more data is expected if atEOF && flushAtEOF { - return len(data), data, nil + token = trimWhitespaces(data) + advance = len(token) + return } secondLocOffset := firstMatchEnd + 1 @@ -110,8 +114,8 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc { } secondMatchStart := secondLoc[0] + secondLocOffset - advance = secondMatchStart // start scanning at the beginning of the second match - token = data[firstMatchStart:secondMatchStart] // the token begins at the first match, and ends at the beginning of the second match + advance = secondMatchStart // start scanning at the beginning of the second match + token = trimWhitespaces(data[firstMatchStart:secondMatchStart]) // the token begins at the first match, and ends at the beginning of the second match err = nil return } @@ -125,7 +129,9 @@ func NewLineEndSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc { if loc == nil { // Flush if no more data is expected if len(data) != 0 && atEOF && flushAtEOF { - return len(data), data, nil + token = trimWhitespaces(data) + advance = len(token) + return } return 0, nil, nil // read more data and try again } @@ -137,7 +143,7 @@ func NewLineEndSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc { } advance = loc[1] - token = data[:loc[1]] + token = trimWhitespaces(data[:loc[1]]) err = nil return } @@ -168,7 +174,9 @@ func NewNewlineSplitFunc(encoding encoding.Encoding, flushAtEOF bool) (bufio.Spl // Flush if no more data is expected if atEOF && flushAtEOF { - return len(data), data, nil + token = trimWhitespaces(data) + advance = len(token) + return } // Request more data. @@ -187,3 +195,7 @@ func encodedCarriageReturn(encoding encoding.Encoding) ([]byte, error) { nDst, _, err := encoding.NewEncoder().Transform(out, []byte{'\r'}, true) return out[:nDst], err } + +func trimWhitespaces(data []byte) []byte { + return bytes.TrimRight(data, "\r\n\t ") +} diff --git a/operator/helper/multiline_test.go b/operator/helper/multiline_test.go index 637deb86..92b1b52f 100644 --- a/operator/helper/multiline_test.go +++ b/operator/helper/multiline_test.go @@ -68,8 +68,8 @@ func TestLineStartSplitFunc(t *testing.T) { Pattern: `LOGSTART \d+ `, Raw: []byte(`LOGSTART 123 log1 LOGSTART 234 log2 LOGSTART 345 foo`), ExpectedTokenized: []string{ - `LOGSTART 123 log1 `, - `LOGSTART 234 log2 `, + `LOGSTART 123 log1`, + `LOGSTART 234 log2`, }, }, { @@ -77,8 +77,8 @@ func TestLineStartSplitFunc(t *testing.T) { Pattern: `^LOGSTART \d+ `, Raw: []byte("LOGSTART 123 LOGSTART 345 log1\nLOGSTART 234 log2\nLOGSTART 345 foo"), ExpectedTokenized: []string{ - "LOGSTART 123 LOGSTART 345 log1\n", - "LOGSTART 234 log2\n", + "LOGSTART 123 LOGSTART 345 log1", + "LOGSTART 234 log2", }, }, { @@ -92,7 +92,7 @@ func TestLineStartSplitFunc(t *testing.T) { Pattern: `LOGSTART \d+ `, Raw: []byte(`part that doesn't match LOGSTART 123 part that matchesLOGSTART 123 foo`), ExpectedTokenized: []string{ - `part that doesn't match `, + `part that doesn't match`, `LOGSTART 123 part that matches`, }, },