feat: strip whitechars in multiline helper

Signed-off-by: Dominik Rosiek <[email protected]>
open-telemetry · Jul 6, 2021 · fd4dbb8 · fd4dbb8
1 parent 8b99134
commit fd4dbb8
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 13 deletions.
diff --git a/operator/helper/multiline.go b/operator/helper/multiline.go
@@ -79,7 +79,9 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
 		if firstLoc == nil {
 			// Flush if no more data is expected
 			if len(data) != 0 && atEOF && flushAtEOF {
-				return len(data), data, nil
+				token = trimWhitespaces(data)
+				advance = len(token)
+				return
 			}
 			return 0, nil, nil // read more data and try again.
 		}
@@ -89,7 +91,7 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
 		if firstMatchStart != 0 {
 			// the beginning of the file does not match the start pattern, so return a token up to the first match so we don't lose data
 			advance = firstMatchStart
-			token = data[0:firstMatchStart]
+			token = trimWhitespaces(data[0:firstMatchStart])
 			return
 		}
 
@@ -100,7 +102,9 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
 
 		// Flush if no more data is expected
 		if atEOF && flushAtEOF {
-			return len(data), data, nil
+			token = trimWhitespaces(data)
+			advance = len(token)
+			return
 		}
 
 		secondLocOffset := firstMatchEnd + 1
@@ -110,8 +114,8 @@ func NewLineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
 		}
 		secondMatchStart := secondLoc[0] + secondLocOffset
 
-		advance = secondMatchStart                     // start scanning at the beginning of the second match
-		token = data[firstMatchStart:secondMatchStart] // the token begins at the first match, and ends at the beginning of the second match
+		advance = secondMatchStart                                      // start scanning at the beginning of the second match
+		token = trimWhitespaces(data[firstMatchStart:secondMatchStart]) // the token begins at the first match, and ends at the beginning of the second match
 		err = nil
 		return
 	}
@@ -125,7 +129,9 @@ func NewLineEndSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
 		if loc == nil {
 			// Flush if no more data is expected
 			if len(data) != 0 && atEOF && flushAtEOF {
-				return len(data), data, nil
+				token = trimWhitespaces(data)
+				advance = len(token)
+				return
 			}
 			return 0, nil, nil // read more data and try again
 		}
@@ -137,7 +143,7 @@ func NewLineEndSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
 		}
 
 		advance = loc[1]
-		token = data[:loc[1]]
+		token = trimWhitespaces(data[:loc[1]])
 		err = nil
 		return
 	}
@@ -168,7 +174,9 @@ func NewNewlineSplitFunc(encoding encoding.Encoding, flushAtEOF bool) (bufio.Spl
 
 		// Flush if no more data is expected
 		if atEOF && flushAtEOF {
-			return len(data), data, nil
+			token = trimWhitespaces(data)
+			advance = len(token)
+			return
 		}
 
 		// Request more data.
@@ -187,3 +195,7 @@ func encodedCarriageReturn(encoding encoding.Encoding) ([]byte, error) {
 	nDst, _, err := encoding.NewEncoder().Transform(out, []byte{'\r'}, true)
 	return out[:nDst], err
 }
+
+func trimWhitespaces(data []byte) []byte {
+	return bytes.TrimRight(data, "\r\n\t ")
+}
diff --git a/operator/helper/multiline_test.go b/operator/helper/multiline_test.go
@@ -68,17 +68,17 @@ func TestLineStartSplitFunc(t *testing.T) {
 			Pattern: `LOGSTART \d+ `,
 			Raw:     []byte(`LOGSTART 123 log1 LOGSTART 234 log2 LOGSTART 345 foo`),
 			ExpectedTokenized: []string{
-				`LOGSTART 123 log1 `,
-				`LOGSTART 234 log2 `,
+				`LOGSTART 123 log1`,
+				`LOGSTART 234 log2`,
 			},
 		},
 		{
 			Name:    "TwoLogsLineStart",
 			Pattern: `^LOGSTART \d+ `,
 			Raw:     []byte("LOGSTART 123 LOGSTART 345 log1\nLOGSTART 234 log2\nLOGSTART 345 foo"),
 			ExpectedTokenized: []string{
-				"LOGSTART 123 LOGSTART 345 log1\n",
-				"LOGSTART 234 log2\n",
+				"LOGSTART 123 LOGSTART 345 log1",
+				"LOGSTART 234 log2",
 			},
 		},
 		{
@@ -92,7 +92,7 @@ func TestLineStartSplitFunc(t *testing.T) {
 			Pattern: `LOGSTART \d+ `,
 			Raw:     []byte(`part that doesn't match LOGSTART 123 part that matchesLOGSTART 123 foo`),
 			ExpectedTokenized: []string{
-				`part that doesn't match `,
+				`part that doesn't match`,
 				`LOGSTART 123 part that matches`,
 			},
 		},