Skip to content
This repository has been archived by the owner on May 25, 2022. It is now read-only.

feat: trim whitechars for file with multiline #212

Merged
merged 3 commits into from
Jul 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions operator/helper/multiline.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
if firstLoc == nil {
// Flush if no more data is expected
if len(data) != 0 && atEOF && flushAtEOF {
return len(data), data, nil
token = trimWhitespaces(data)
advance = len(data)
return
}
return 0, nil, nil // read more data and try again.
}
Expand All @@ -89,7 +91,7 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
if firstMatchStart != 0 {
// the beginning of the file does not match the start pattern, so return a token up to the first match so we don't lose data
advance = firstMatchStart
token = data[0:firstMatchStart]
token = trimWhitespaces(data[0:firstMatchStart])
return
}

Expand All @@ -100,7 +102,9 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {

// Flush if no more data is expected
if atEOF && flushAtEOF {
return len(data), data, nil
token = trimWhitespaces(data)
advance = len(data)
return
}

secondLocOffset := firstMatchEnd + 1
Expand All @@ -110,8 +114,8 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
}
secondMatchStart := secondLoc[0] + secondLocOffset

advance = secondMatchStart // start scanning at the beginning of the second match
token = data[firstMatchStart:secondMatchStart] // the token begins at the first match, and ends at the beginning of the second match
advance = secondMatchStart // start scanning at the beginning of the second match
token = trimWhitespaces(data[firstMatchStart:secondMatchStart]) // the token begins at the first match, and ends at the beginning of the second match
err = nil
return
}
Expand All @@ -125,7 +129,9 @@ func NewLineEndSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
if loc == nil {
// Flush if no more data is expected
if len(data) != 0 && atEOF && flushAtEOF {
return len(data), data, nil
token = trimWhitespaces(data)
advance = len(data)
return
}
return 0, nil, nil // read more data and try again
}
Expand All @@ -137,7 +143,7 @@ func NewLineEndSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
}

advance = loc[1]
token = data[:loc[1]]
token = trimWhitespaces(data[:loc[1]])
err = nil
return
}
Expand Down Expand Up @@ -168,7 +174,9 @@ func NewNewlineSplitFunc(encoding encoding.Encoding, flushAtEOF bool) (bufio.Spl

// Flush if no more data is expected
if atEOF && flushAtEOF {
return len(data), data, nil
token = trimWhitespaces(data)
advance = len(data)
return
}

// Request more data.
Expand All @@ -187,3 +195,10 @@ func encodedCarriageReturn(encoding encoding.Encoding) ([]byte, error) {
nDst, _, err := encoding.NewEncoder().Transform(out, []byte{'\r'}, true)
return out[:nDst], err
}

func trimWhitespaces(data []byte) []byte {
// TrimLeft to strip EOF whitespaces in case of using $ in regex
// For some reason newline and carriage return are being moved to beginning of next log
// TrimRight to strip all whitespaces from the end of log
djaglowski marked this conversation as resolved.
Show resolved Hide resolved
return bytes.TrimLeft(bytes.TrimRight(data, "\r\n\t "), "\r\n")
}
30 changes: 24 additions & 6 deletions operator/helper/multiline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,17 @@ func TestLineStartSplitFunc(t *testing.T) {
Pattern: `LOGSTART \d+ `,
Raw: []byte(`LOGSTART 123 log1 LOGSTART 234 log2 LOGSTART 345 foo`),
ExpectedTokenized: []string{
`LOGSTART 123 log1 `,
`LOGSTART 234 log2 `,
`LOGSTART 123 log1`,
`LOGSTART 234 log2`,
},
},
{
Name: "TwoLogsLineStart",
Pattern: `^LOGSTART \d+ `,
Raw: []byte("LOGSTART 123 LOGSTART 345 log1\nLOGSTART 234 log2\nLOGSTART 345 foo"),
ExpectedTokenized: []string{
"LOGSTART 123 LOGSTART 345 log1\n",
"LOGSTART 234 log2\n",
"LOGSTART 123 LOGSTART 345 log1",
"LOGSTART 234 log2",
},
},
{
Expand All @@ -92,7 +92,7 @@ func TestLineStartSplitFunc(t *testing.T) {
Pattern: `LOGSTART \d+ `,
Raw: []byte(`part that doesn't match LOGSTART 123 part that matchesLOGSTART 123 foo`),
ExpectedTokenized: []string{
`part that doesn't match `,
`part that doesn't match`,
`LOGSTART 123 part that matches`,
},
},
Expand Down Expand Up @@ -134,6 +134,15 @@ func TestLineStartSplitFunc(t *testing.T) {
ExpectedError: errors.New("bufio.Scanner: token too long"),
ExpectedTokenized: []string{},
},
{
Name: "MultipleMultilineLogs",
Pattern: `^LOGSTART \d+`,
Raw: []byte("LOGSTART 12 log1\t \nLOGPART log1\nLOGPART log1\t \nLOGSTART 17 log2\nLOGPART log2\nanother line\nLOGSTART 43 log5"),
ExpectedTokenized: []string{
"LOGSTART 12 log1\t \nLOGPART log1\nLOGPART log1",
"LOGSTART 17 log2\nLOGPART log2\nanother line",
},
},
}

for _, tc := range testCases {
Expand Down Expand Up @@ -190,7 +199,7 @@ func TestLineEndSplitFunc(t *testing.T) {
Raw: []byte("log1 LOGEND LOGEND\nlog2 LOGEND\n"),
ExpectedTokenized: []string{
"log1 LOGEND LOGEND",
"\nlog2 LOGEND",
"log2 LOGEND",
},
},
{
Expand Down Expand Up @@ -242,6 +251,15 @@ func TestLineEndSplitFunc(t *testing.T) {
ExpectedTokenized: []string{},
ExpectedError: errors.New("bufio.Scanner: token too long"),
},
{
Name: "MultipleMultilineLogs",
Pattern: `^LOGEND.*$`,
Raw: []byte("LOGSTART 12 log1\t \nLOGPART log1\nLOGEND log1\t \nLOGSTART 17 log2\nLOGPART log2\nLOGEND log2\nLOGSTART 43 log5"),
ExpectedTokenized: []string{
"LOGSTART 12 log1\t \nLOGPART log1\nLOGEND log1",
"LOGSTART 17 log2\nLOGPART log2\nLOGEND log2",
},
},
}

for _, tc := range testCases {
Expand Down