open-telemetry · djaglowski · Sep 13, 2023 · Aug 28, 2023 · Sep 13, 2023
@@ -0,0 +1,30 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: breaking
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: pkg/stanza
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Make trim func composable
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [26536]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext: |
+  - Adds trim.WithFunc to allow trim funcs to wrap bufio.SplitFuncs.
+  - Removes trim.Func from split.Config.Func. Use trim.WithFunc instead.
+  - Removes trim.Func from flush.WithPeriod. Use trim.WithFunc instead.
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [api]
@@ -119,7 +119,7 @@ func (c Config) BuildWithSplitFunc(logger *zap.SugaredLogger, emit emit.Callback
 	}
 
 	// Ensure that splitter is buildable
-	factory := splitter.NewCustomFactory(splitFunc, c.FlushPeriod)
+	factory := splitter.NewCustomFactory(splitFunc, c.TrimConfig.Func(), c.FlushPeriod)
 	if _, err := factory.SplitFunc(); err != nil {
 		return nil, err
 	}

@@ -16,6 +16,7 @@ import (
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/pipeline"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/split"
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
 )
 
 type Config struct {
@@ -69,13 +70,16 @@ func NewConfig(matchRegex string, metadataOperators []operator.Config, enc encod
 		return nil, fmt.Errorf("failed to compile `pattern`: %w", err)
 	}
 
-	splitFunc, err := split.NewlineSplitFunc(enc, false, func(b []byte) []byte {
-		return bytes.Trim(b, "\r\n")
-	})
+	splitFunc, err := split.NewlineSplitFunc(enc, false)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create split func: %w", err)
 	}
 
+	var trimFunc trim.Func = func(b []byte) []byte {
+		return bytes.Trim(b, "\r\n")
+	}
+	splitFunc = trim.WithFunc(splitFunc, trimFunc)
+
 	return &Config{
 		regex:             regex,
 		SplitFunc:         splitFunc,

@@ -13,19 +13,21 @@ import (
 
 type customFactory struct {
 	splitFunc   bufio.SplitFunc
+	trimFunc    trim.Func
 	flushPeriod time.Duration
 }
 
 var _ Factory = (*customFactory)(nil)
 
-func NewCustomFactory(splitFunc bufio.SplitFunc, flushPeriod time.Duration) Factory {
+func NewCustomFactory(splitFunc bufio.SplitFunc, trimFunc trim.Func, flushPeriod time.Duration) Factory {
 	return &customFactory{
 		splitFunc:   splitFunc,
+		trimFunc:    trimFunc,
 		flushPeriod: flushPeriod,
 	}
 }
 
 // SplitFunc builds a bufio.SplitFunc based on the configuration
 func (f *customFactory) SplitFunc() (bufio.SplitFunc, error) {
-	return flush.WithPeriod(f.splitFunc, trim.Nop, f.flushPeriod), nil
+	return trim.WithFunc(flush.WithPeriod(f.splitFunc, f.flushPeriod), f.trimFunc), nil
 }
@@ -9,10 +9,12 @@ import (
 	"time"
 
 	"github.com/stretchr/testify/assert"
+
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
 )
 
 func TestCustom(t *testing.T) {
-	factory := NewCustomFactory(bufio.ScanLines, 0)
+	factory := NewCustomFactory(bufio.ScanLines, trim.Nop, 0)
 	splitFunc, err := factory.SplitFunc()
 	assert.NoError(t, err)
 	assert.NotNil(t, splitFunc)
@@ -35,9 +37,33 @@ func TestCustom(t *testing.T) {
 	assert.Nil(t, token)
 }
 
+func TestCustomWithTrim(t *testing.T) {
+	factory := NewCustomFactory(bufio.ScanLines, trim.Whitespace, 0)
+	splitFunc, err := factory.SplitFunc()
+	assert.NoError(t, err)
+	assert.NotNil(t, splitFunc)
+
+	input := []byte(" hello \n world \n extra ")
+
+	advance, token, err := splitFunc(input, false)
+	assert.NoError(t, err)
+	assert.Equal(t, 8, advance)
+	assert.Equal(t, []byte("hello"), token)
+
+	advance, token, err = splitFunc(input[8:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 8, advance)
+	assert.Equal(t, []byte("world"), token)
+
+	advance, token, err = splitFunc(input[16:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 0, advance)
+	assert.Nil(t, token)
+}
+
 func TestCustomWithFlush(t *testing.T) {
 	flushPeriod := 100 * time.Millisecond
-	factory := NewCustomFactory(bufio.ScanLines, flushPeriod)
+	factory := NewCustomFactory(bufio.ScanLines, trim.Nop, flushPeriod)
 	splitFunc, err := factory.SplitFunc()
 	assert.NoError(t, err)
 	assert.NotNil(t, splitFunc)
@@ -66,3 +92,35 @@ func TestCustomWithFlush(t *testing.T) {
 	assert.Equal(t, 7, advance)
 	assert.Equal(t, []byte(" extra "), token)
 }
+
+func TestCustomWithFlushTrim(t *testing.T) {
+	flushPeriod := 100 * time.Millisecond
+	factory := NewCustomFactory(bufio.ScanLines, trim.Whitespace, flushPeriod)
+	splitFunc, err := factory.SplitFunc()
+	assert.NoError(t, err)
+	assert.NotNil(t, splitFunc)
+
+	input := []byte(" hello \n world \n extra ")
+
+	advance, token, err := splitFunc(input, false)
+	assert.NoError(t, err)
+	assert.Equal(t, 8, advance)
+	assert.Equal(t, []byte("hello"), token)
+
+	advance, token, err = splitFunc(input[8:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 8, advance)
+	assert.Equal(t, []byte("world"), token)
+
+	advance, token, err = splitFunc(input[16:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 0, advance)
+	assert.Nil(t, token)
+
+	time.Sleep(2 * flushPeriod)
+
+	advance, token, err = splitFunc(input[16:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 7, advance)
+	assert.Equal(t, []byte("extra"), token) // Ensure trim applies to flushed token
+}
@@ -42,9 +42,14 @@ func NewSplitFuncFactory(
 
 // SplitFunc builds a bufio.SplitFunc based on the configuration
 func (f *splitFuncFactory) SplitFunc() (bufio.SplitFunc, error) {
-	splitFunc, err := f.splitConfig.Func(f.encoding, false, f.maxLogSize, f.trimFunc)
+	splitFunc, err := f.splitConfig.Func(f.encoding, false, f.maxLogSize)
 	if err != nil {
 		return nil, err
 	}
-	return flush.WithPeriod(splitFunc, f.trimFunc, f.flushPeriod), nil
+	splitFunc = flush.WithPeriod(splitFunc, f.flushPeriod)
+	if f.encoding == encoding.Nop {
+		// Special case where we should never trim
+		return splitFunc, nil
+	}
+	return trim.WithFunc(splitFunc, f.trimFunc), nil
 }
@@ -49,6 +49,30 @@ func TestSplitFunc(t *testing.T) {
 	assert.Nil(t, token)
 }
 
+func TestSplitFuncWithTrim(t *testing.T) {
+	factory := NewSplitFuncFactory(split.Config{}, unicode.UTF8, 1024, trim.Whitespace, 0)
+	splitFunc, err := factory.SplitFunc()
+	assert.NoError(t, err)
+	assert.NotNil(t, splitFunc)
+
+	input := []byte(" hello \n world \n extra ")
+
+	advance, token, err := splitFunc(input, false)
+	assert.NoError(t, err)
+	assert.Equal(t, 8, advance)
+	assert.Equal(t, []byte("hello"), token)
+
+	advance, token, err = splitFunc(input[8:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 8, advance)
+	assert.Equal(t, []byte("world"), token)
+
+	advance, token, err = splitFunc(input[16:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 0, advance)
+	assert.Nil(t, token)
+}
+
 func TestSplitFuncWithFlush(t *testing.T) {
 	flushPeriod := 100 * time.Millisecond
 	factory := NewSplitFuncFactory(split.Config{}, unicode.UTF8, 1024, trim.Nop, flushPeriod)
@@ -81,7 +105,7 @@ func TestSplitFuncWithFlush(t *testing.T) {
 	assert.Equal(t, []byte(" extra "), token)
 }
 
-func TestSplitFuncWithTrim(t *testing.T) {
+func TestSplitFuncWithFlushTrim(t *testing.T) {
 	flushPeriod := 100 * time.Millisecond
 	factory := NewSplitFuncFactory(split.Config{}, unicode.UTF8, 1024, trim.Whitespace, flushPeriod)
 	splitFunc, err := factory.SplitFunc()

@@ -6,12 +6,10 @@ package flush // import "github.com/open-telemetry/opentelemetry-collector-contr
 import (
 	"bufio"
 	"time"
-
-	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
 )
 
 // Wrap a bufio.SplitFunc with a flusher
-func WithPeriod(splitFunc bufio.SplitFunc, trimFunc trim.Func, period time.Duration) bufio.SplitFunc {
+func WithPeriod(splitFunc bufio.SplitFunc, period time.Duration) bufio.SplitFunc {
 	if period <= 0 {
 		return splitFunc
 	}
@@ -20,7 +18,7 @@ func WithPeriod(splitFunc bufio.SplitFunc, trimFunc trim.Func, period time.Durat
 		forcePeriod:        period,
 		previousDataLength: 0,
 	}
-	return f.splitFunc(splitFunc, trimFunc)
+	return f.splitFunc(splitFunc)
 }
 
 // flusher keeps information about flush state
@@ -61,7 +59,7 @@ func (f *flusher) shouldFlush() bool {
 	return f.forcePeriod > 0 && time.Since(f.lastDataChange) > f.forcePeriod && f.previousDataLength > 0
 }
 
-func (f *flusher) splitFunc(splitFunc bufio.SplitFunc, trimFunc trim.Func) bufio.SplitFunc {
+func (f *flusher) splitFunc(splitFunc bufio.SplitFunc) bufio.SplitFunc {
 	return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
 		advance, token, err = splitFunc(data, atEOF)
 
@@ -81,7 +79,7 @@ func (f *flusher) splitFunc(splitFunc bufio.SplitFunc, trimFunc trim.Func) bufio
 		if f.shouldFlush() {
 			// Inform flusher that we just flushed
 			f.flushed()
-			token = trimFunc(data)
+			token = data
 			advance = len(data)
 			return
 		}

@@ -9,8 +9,6 @@ import (
 	"time"
 
 	"github.com/stretchr/testify/assert"
-
-	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
 )
 
 func TestFlusher(t *testing.T) {
@@ -22,7 +20,7 @@ func TestFlusher(t *testing.T) {
 	// always use atEOF=false.
 
 	flushPeriod := 100 * time.Millisecond
-	f := WithPeriod(bufio.ScanWords, trim.Nop, flushPeriod)
+	f := WithPeriod(bufio.ScanWords, flushPeriod)
 
 	content := []byte("foo bar hellowo")
 
@@ -64,7 +62,7 @@ func TestNoFlushPeriod(t *testing.T) {
 	// In other words, we should expect exactly the behavior of bufio.ScanWords.
 
 	flushPeriod := time.Duration(0)
-	f := WithPeriod(bufio.ScanWords, trim.Nop, flushPeriod)
+	f := WithPeriod(bufio.ScanWords, flushPeriod)
 
 	content := []byte("foo bar hellowo")
 

@@ -81,13 +81,8 @@ type BaseConfig struct {
 
 type SplitFuncBuilder func(enc encoding.Encoding) (bufio.SplitFunc, error)
 
-func (c Config) defaultMultilineBuilder(enc encoding.Encoding) (bufio.SplitFunc, error) {
-	trimFunc := c.TrimConfig.Func()
-	splitFunc, err := c.SplitConfig.Func(enc, true, int(c.MaxLogSize), trimFunc)
-	if err != nil {
-		return nil, err
-	}
-	return splitFunc, nil
+func (c Config) defaultSplitFuncBuilder(enc encoding.Encoding) (bufio.SplitFunc, error) {
+	return c.SplitConfig.Func(enc, true, int(c.MaxLogSize))
 }
 
 // Build will build a tcp input operator.
@@ -121,14 +116,15 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) {
 	}
 
 	if c.SplitFuncBuilder == nil {
-		c.SplitFuncBuilder = c.defaultMultilineBuilder
+		c.SplitFuncBuilder = c.defaultSplitFuncBuilder
 	}
 
 	// Build split func
 	splitFunc, err := c.SplitFuncBuilder(enc)
 	if err != nil {
 		return nil, err
 	}
+	splitFunc = trim.WithFunc(splitFunc, c.TrimConfig.Func())
 
 	var resolver *helper.IPResolver
 	if c.AddAttributes {

@@ -89,11 +89,12 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) {
 		return nil, err
 	}
 
-	// Build SplitFunc
-	splitFunc, err := c.SplitConfig.Func(enc, true, MaxUDPSize, c.TrimConfig.Func())
+	// Build split func
+	splitFunc, err := c.SplitConfig.Func(enc, true, MaxUDPSize)
 	if err != nil {
 		return nil, err
 	}
+	splitFunc = trim.WithFunc(splitFunc, c.TrimConfig.Func())
 
 	var resolver *helper.IPResolver
 	if c.AddAttributes {