diff --git a/.chloggen/pkg-stanza-syslog-octen-split.yaml b/.chloggen/pkg-stanza-syslog-octen-split.yaml new file mode 100755 index 000000000000..16de6042b950 --- /dev/null +++ b/.chloggen/pkg-stanza-syslog-octen-split.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: syslogreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Fix issue where long tokens would be truncated prematurely + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [27294] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/pkg/stanza/operator/input/syslog/syslog.go b/pkg/stanza/operator/input/syslog/syslog.go index cfa5c24aa8e7..26636e6ab56b 100644 --- a/pkg/stanza/operator/input/syslog/syslog.go +++ b/pkg/stanza/operator/input/syslog/syslog.go @@ -150,34 +150,32 @@ func OctetSplitFuncBuilder(_ encoding.Encoding) (bufio.SplitFunc, error) { func newOctetFrameSplitFunc(flushAtEOF bool) bufio.SplitFunc { frameRegex := regexp.MustCompile(`^[1-9]\d*\s`) - return func(data []byte, atEOF bool) (advance int, token []byte, err error) { + return func(data []byte, atEOF bool) (int, []byte, error) { frameLoc := frameRegex.FindIndex(data) if frameLoc == nil { // Flush if no more data is expected if len(data) != 0 && atEOF && flushAtEOF { - token = data - advance = len(data) - return + return len(data), data, nil } return 0, nil, nil } frameMaxIndex := frameLoc[1] - // delimit space between length and log + // Remove the delimiter (space) between length and log, and parse the length frameLenValue, err := strconv.Atoi(string(data[:frameMaxIndex-1])) if err != nil { - return 0, nil, err // read more data and try again. + // This should not be possible because the regex matched. + // However, return an error just in case. + return 0, nil, err } - advance = frameMaxIndex + frameLenValue - // the limitation here is that we can only line split within a single buffer - // the context of buffer length cannot be pass onto the next scan - capacity := cap(data) - if advance > capacity { - return capacity, data, nil + advance := frameMaxIndex + frameLenValue + if advance > len(data) { + if atEOF && flushAtEOF { + return len(data), data, nil + } + return 0, nil, nil } - token = data[:advance] - err = nil - return + return advance, data[:advance], nil } } diff --git a/pkg/stanza/operator/input/syslog/syslog_test.go b/pkg/stanza/operator/input/syslog/syslog_test.go index 1baaaf77fd83..6d489173a997 100644 --- a/pkg/stanza/operator/input/syslog/syslog_test.go +++ b/pkg/stanza/operator/input/syslog/syslog_test.go @@ -243,8 +243,7 @@ func TestOctetFramingSplitFunc(t *testing.T) { return newRaw }(), ExpectedTokens: []string{ - `5000 ` + string(splittest.GenerateBytes(4091)), - `j`, + `5000 ` + string(splittest.GenerateBytes(4092)), }, }, }