Skip to content

Commit

Permalink
feat: trim whitechars for file with multiline (#212)
Browse files Browse the repository at this point in the history
* feat: strip whitechars in multiline helper

Signed-off-by: Dominik Rosiek <[email protected]>

* fix(multiline): use proper advance and strip new line whitespaces from beginning of log

Signed-off-by: Dominik Rosiek <[email protected]>

* test(multiline): add multiple multiline logs case

Signed-off-by: Dominik Rosiek <[email protected]>
  • Loading branch information
sumo-drosiek authored Jul 9, 2021
1 parent 3f1d661 commit 2ffe032
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 14 deletions.
31 changes: 23 additions & 8 deletions operator/helper/multiline.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
if firstLoc == nil {
// Flush if no more data is expected
if len(data) != 0 && atEOF && flushAtEOF {
return len(data), data, nil
token = trimWhitespaces(data)
advance = len(data)
return
}
return 0, nil, nil // read more data and try again.
}
Expand All @@ -89,7 +91,7 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
if firstMatchStart != 0 {
// the beginning of the file does not match the start pattern, so return a token up to the first match so we don't lose data
advance = firstMatchStart
token = data[0:firstMatchStart]
token = trimWhitespaces(data[0:firstMatchStart])
return
}

Expand All @@ -100,7 +102,9 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {

// Flush if no more data is expected
if atEOF && flushAtEOF {
return len(data), data, nil
token = trimWhitespaces(data)
advance = len(data)
return
}

secondLocOffset := firstMatchEnd + 1
Expand All @@ -110,8 +114,8 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
}
secondMatchStart := secondLoc[0] + secondLocOffset

advance = secondMatchStart // start scanning at the beginning of the second match
token = data[firstMatchStart:secondMatchStart] // the token begins at the first match, and ends at the beginning of the second match
advance = secondMatchStart // start scanning at the beginning of the second match
token = trimWhitespaces(data[firstMatchStart:secondMatchStart]) // the token begins at the first match, and ends at the beginning of the second match
err = nil
return
}
Expand All @@ -125,7 +129,9 @@ func NewLineEndSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
if loc == nil {
// Flush if no more data is expected
if len(data) != 0 && atEOF && flushAtEOF {
return len(data), data, nil
token = trimWhitespaces(data)
advance = len(data)
return
}
return 0, nil, nil // read more data and try again
}
Expand All @@ -137,7 +143,7 @@ func NewLineEndSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
}

advance = loc[1]
token = data[:loc[1]]
token = trimWhitespaces(data[:loc[1]])
err = nil
return
}
Expand Down Expand Up @@ -168,7 +174,9 @@ func NewNewlineSplitFunc(encoding encoding.Encoding, flushAtEOF bool) (bufio.Spl

// Flush if no more data is expected
if atEOF && flushAtEOF {
return len(data), data, nil
token = trimWhitespaces(data)
advance = len(data)
return
}

// Request more data.
Expand All @@ -187,3 +195,10 @@ func encodedCarriageReturn(encoding encoding.Encoding) ([]byte, error) {
nDst, _, err := encoding.NewEncoder().Transform(out, []byte{'\r'}, true)
return out[:nDst], err
}

func trimWhitespaces(data []byte) []byte {
// TrimLeft to strip EOF whitespaces in case of using $ in regex
// For some reason newline and carriage return are being moved to beginning of next log
// TrimRight to strip all whitespaces from the end of log
return bytes.TrimLeft(bytes.TrimRight(data, "\r\n\t "), "\r\n")
}
30 changes: 24 additions & 6 deletions operator/helper/multiline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,17 @@ func TestLineStartSplitFunc(t *testing.T) {
Pattern: `LOGSTART \d+ `,
Raw: []byte(`LOGSTART 123 log1 LOGSTART 234 log2 LOGSTART 345 foo`),
ExpectedTokenized: []string{
`LOGSTART 123 log1 `,
`LOGSTART 234 log2 `,
`LOGSTART 123 log1`,
`LOGSTART 234 log2`,
},
},
{
Name: "TwoLogsLineStart",
Pattern: `^LOGSTART \d+ `,
Raw: []byte("LOGSTART 123 LOGSTART 345 log1\nLOGSTART 234 log2\nLOGSTART 345 foo"),
ExpectedTokenized: []string{
"LOGSTART 123 LOGSTART 345 log1\n",
"LOGSTART 234 log2\n",
"LOGSTART 123 LOGSTART 345 log1",
"LOGSTART 234 log2",
},
},
{
Expand All @@ -92,7 +92,7 @@ func TestLineStartSplitFunc(t *testing.T) {
Pattern: `LOGSTART \d+ `,
Raw: []byte(`part that doesn't match LOGSTART 123 part that matchesLOGSTART 123 foo`),
ExpectedTokenized: []string{
`part that doesn't match `,
`part that doesn't match`,
`LOGSTART 123 part that matches`,
},
},
Expand Down Expand Up @@ -134,6 +134,15 @@ func TestLineStartSplitFunc(t *testing.T) {
ExpectedError: errors.New("bufio.Scanner: token too long"),
ExpectedTokenized: []string{},
},
{
Name: "MultipleMultilineLogs",
Pattern: `^LOGSTART \d+`,
Raw: []byte("LOGSTART 12 log1\t \nLOGPART log1\nLOGPART log1\t \nLOGSTART 17 log2\nLOGPART log2\nanother line\nLOGSTART 43 log5"),
ExpectedTokenized: []string{
"LOGSTART 12 log1\t \nLOGPART log1\nLOGPART log1",
"LOGSTART 17 log2\nLOGPART log2\nanother line",
},
},
}

for _, tc := range testCases {
Expand Down Expand Up @@ -190,7 +199,7 @@ func TestLineEndSplitFunc(t *testing.T) {
Raw: []byte("log1 LOGEND LOGEND\nlog2 LOGEND\n"),
ExpectedTokenized: []string{
"log1 LOGEND LOGEND",
"\nlog2 LOGEND",
"log2 LOGEND",
},
},
{
Expand Down Expand Up @@ -242,6 +251,15 @@ func TestLineEndSplitFunc(t *testing.T) {
ExpectedTokenized: []string{},
ExpectedError: errors.New("bufio.Scanner: token too long"),
},
{
Name: "MultipleMultilineLogs",
Pattern: `^LOGEND.*$`,
Raw: []byte("LOGSTART 12 log1\t \nLOGPART log1\nLOGEND log1\t \nLOGSTART 17 log2\nLOGPART log2\nLOGEND log2\nLOGSTART 43 log5"),
ExpectedTokenized: []string{
"LOGSTART 12 log1\t \nLOGPART log1\nLOGEND log1",
"LOGSTART 17 log2\nLOGPART log2\nLOGEND log2",
},
},
}

for _, tc := range testCases {
Expand Down

0 comments on commit 2ffe032

Please sign in to comment.