Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pkg/stanza] Extract trim func from split package #26536

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .chloggen/pkg-stanza-extract-trim-split.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: breaking

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/stanza

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Make trim func composable

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [26536]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
- Adds trim.WithFunc to allow trim funcs to wrap bufio.SplitFuncs.
- Removes trim.Func from split.Config.Func. Use trim.WithFunc instead.
- Removes trim.Func from flush.WithPeriod. Use trim.WithFunc instead.

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [api]
2 changes: 1 addition & 1 deletion pkg/stanza/fileconsumer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (c Config) BuildWithSplitFunc(logger *zap.SugaredLogger, emit emit.Callback
}

// Ensure that splitter is buildable
factory := splitter.NewCustomFactory(splitFunc, c.FlushPeriod)
factory := splitter.NewCustomFactory(splitFunc, c.TrimConfig.Func(), c.FlushPeriod)
if _, err := factory.SplitFunc(); err != nil {
return nil, err
}
Expand Down
10 changes: 7 additions & 3 deletions pkg/stanza/fileconsumer/internal/header/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/pipeline"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/split"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
)

type Config struct {
Expand Down Expand Up @@ -69,13 +70,16 @@ func NewConfig(matchRegex string, metadataOperators []operator.Config, enc encod
return nil, fmt.Errorf("failed to compile `pattern`: %w", err)
}

splitFunc, err := split.NewlineSplitFunc(enc, false, func(b []byte) []byte {
return bytes.Trim(b, "\r\n")
})
splitFunc, err := split.NewlineSplitFunc(enc, false)
if err != nil {
return nil, fmt.Errorf("failed to create split func: %w", err)
}

var trimFunc trim.Func = func(b []byte) []byte {
return bytes.Trim(b, "\r\n")
}
splitFunc = trim.WithFunc(splitFunc, trimFunc)

return &Config{
regex: regex,
SplitFunc: splitFunc,
Expand Down
6 changes: 4 additions & 2 deletions pkg/stanza/fileconsumer/internal/splitter/custom.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,21 @@ import (

type customFactory struct {
splitFunc bufio.SplitFunc
trimFunc trim.Func
flushPeriod time.Duration
}

var _ Factory = (*customFactory)(nil)

func NewCustomFactory(splitFunc bufio.SplitFunc, flushPeriod time.Duration) Factory {
func NewCustomFactory(splitFunc bufio.SplitFunc, trimFunc trim.Func, flushPeriod time.Duration) Factory {
return &customFactory{
splitFunc: splitFunc,
trimFunc: trimFunc,
flushPeriod: flushPeriod,
}
}

// SplitFunc builds a bufio.SplitFunc based on the configuration
func (f *customFactory) SplitFunc() (bufio.SplitFunc, error) {
return flush.WithPeriod(f.splitFunc, trim.Nop, f.flushPeriod), nil
return trim.WithFunc(flush.WithPeriod(f.splitFunc, f.flushPeriod), f.trimFunc), nil
}
62 changes: 60 additions & 2 deletions pkg/stanza/fileconsumer/internal/splitter/custom_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ import (
"time"

"github.com/stretchr/testify/assert"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
)

func TestCustom(t *testing.T) {
factory := NewCustomFactory(bufio.ScanLines, 0)
factory := NewCustomFactory(bufio.ScanLines, trim.Nop, 0)
splitFunc, err := factory.SplitFunc()
assert.NoError(t, err)
assert.NotNil(t, splitFunc)
Expand All @@ -35,9 +37,33 @@ func TestCustom(t *testing.T) {
assert.Nil(t, token)
}

func TestCustomWithTrim(t *testing.T) {
factory := NewCustomFactory(bufio.ScanLines, trim.Whitespace, 0)
splitFunc, err := factory.SplitFunc()
assert.NoError(t, err)
assert.NotNil(t, splitFunc)

input := []byte(" hello \n world \n extra ")

advance, token, err := splitFunc(input, false)
assert.NoError(t, err)
assert.Equal(t, 8, advance)
assert.Equal(t, []byte("hello"), token)

advance, token, err = splitFunc(input[8:], false)
assert.NoError(t, err)
assert.Equal(t, 8, advance)
assert.Equal(t, []byte("world"), token)

advance, token, err = splitFunc(input[16:], false)
assert.NoError(t, err)
assert.Equal(t, 0, advance)
assert.Nil(t, token)
}

func TestCustomWithFlush(t *testing.T) {
flushPeriod := 100 * time.Millisecond
factory := NewCustomFactory(bufio.ScanLines, flushPeriod)
factory := NewCustomFactory(bufio.ScanLines, trim.Nop, flushPeriod)
splitFunc, err := factory.SplitFunc()
assert.NoError(t, err)
assert.NotNil(t, splitFunc)
Expand Down Expand Up @@ -66,3 +92,35 @@ func TestCustomWithFlush(t *testing.T) {
assert.Equal(t, 7, advance)
assert.Equal(t, []byte(" extra "), token)
}

func TestCustomWithFlushTrim(t *testing.T) {
flushPeriod := 100 * time.Millisecond
factory := NewCustomFactory(bufio.ScanLines, trim.Whitespace, flushPeriod)
splitFunc, err := factory.SplitFunc()
assert.NoError(t, err)
assert.NotNil(t, splitFunc)

input := []byte(" hello \n world \n extra ")

advance, token, err := splitFunc(input, false)
assert.NoError(t, err)
assert.Equal(t, 8, advance)
assert.Equal(t, []byte("hello"), token)

advance, token, err = splitFunc(input[8:], false)
assert.NoError(t, err)
assert.Equal(t, 8, advance)
assert.Equal(t, []byte("world"), token)

advance, token, err = splitFunc(input[16:], false)
assert.NoError(t, err)
assert.Equal(t, 0, advance)
assert.Nil(t, token)

time.Sleep(2 * flushPeriod)

advance, token, err = splitFunc(input[16:], false)
assert.NoError(t, err)
assert.Equal(t, 7, advance)
assert.Equal(t, []byte("extra"), token) // Ensure trim applies to flushed token
}
9 changes: 7 additions & 2 deletions pkg/stanza/fileconsumer/internal/splitter/multiline.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,14 @@ func NewSplitFuncFactory(

// SplitFunc builds a bufio.SplitFunc based on the configuration
func (f *splitFuncFactory) SplitFunc() (bufio.SplitFunc, error) {
splitFunc, err := f.splitConfig.Func(f.encoding, false, f.maxLogSize, f.trimFunc)
splitFunc, err := f.splitConfig.Func(f.encoding, false, f.maxLogSize)
if err != nil {
return nil, err
}
return flush.WithPeriod(splitFunc, f.trimFunc, f.flushPeriod), nil
splitFunc = flush.WithPeriod(splitFunc, f.flushPeriod)
if f.encoding == encoding.Nop {
// Special case where we should never trim
return splitFunc, nil
}
return trim.WithFunc(splitFunc, f.trimFunc), nil
}
26 changes: 25 additions & 1 deletion pkg/stanza/fileconsumer/internal/splitter/multiline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,30 @@ func TestSplitFunc(t *testing.T) {
assert.Nil(t, token)
}

func TestSplitFuncWithTrim(t *testing.T) {
factory := NewSplitFuncFactory(split.Config{}, unicode.UTF8, 1024, trim.Whitespace, 0)
splitFunc, err := factory.SplitFunc()
assert.NoError(t, err)
assert.NotNil(t, splitFunc)

input := []byte(" hello \n world \n extra ")

advance, token, err := splitFunc(input, false)
assert.NoError(t, err)
assert.Equal(t, 8, advance)
assert.Equal(t, []byte("hello"), token)

advance, token, err = splitFunc(input[8:], false)
assert.NoError(t, err)
assert.Equal(t, 8, advance)
assert.Equal(t, []byte("world"), token)

advance, token, err = splitFunc(input[16:], false)
assert.NoError(t, err)
assert.Equal(t, 0, advance)
assert.Nil(t, token)
}

func TestSplitFuncWithFlush(t *testing.T) {
flushPeriod := 100 * time.Millisecond
factory := NewSplitFuncFactory(split.Config{}, unicode.UTF8, 1024, trim.Nop, flushPeriod)
Expand Down Expand Up @@ -81,7 +105,7 @@ func TestSplitFuncWithFlush(t *testing.T) {
assert.Equal(t, []byte(" extra "), token)
}

func TestSplitFuncWithTrim(t *testing.T) {
func TestSplitFuncWithFlushTrim(t *testing.T) {
flushPeriod := 100 * time.Millisecond
factory := NewSplitFuncFactory(split.Config{}, unicode.UTF8, 1024, trim.Whitespace, flushPeriod)
splitFunc, err := factory.SplitFunc()
Expand Down
10 changes: 4 additions & 6 deletions pkg/stanza/flush/flush.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@ package flush // import "github.com/open-telemetry/opentelemetry-collector-contr
import (
"bufio"
"time"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
)

// Wrap a bufio.SplitFunc with a flusher
func WithPeriod(splitFunc bufio.SplitFunc, trimFunc trim.Func, period time.Duration) bufio.SplitFunc {
func WithPeriod(splitFunc bufio.SplitFunc, period time.Duration) bufio.SplitFunc {
if period <= 0 {
return splitFunc
}
Expand All @@ -20,7 +18,7 @@ func WithPeriod(splitFunc bufio.SplitFunc, trimFunc trim.Func, period time.Durat
forcePeriod: period,
previousDataLength: 0,
}
return f.splitFunc(splitFunc, trimFunc)
return f.splitFunc(splitFunc)
}

// flusher keeps information about flush state
Expand Down Expand Up @@ -61,7 +59,7 @@ func (f *flusher) shouldFlush() bool {
return f.forcePeriod > 0 && time.Since(f.lastDataChange) > f.forcePeriod && f.previousDataLength > 0
}

func (f *flusher) splitFunc(splitFunc bufio.SplitFunc, trimFunc trim.Func) bufio.SplitFunc {
func (f *flusher) splitFunc(splitFunc bufio.SplitFunc) bufio.SplitFunc {
return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
advance, token, err = splitFunc(data, atEOF)

Expand All @@ -81,7 +79,7 @@ func (f *flusher) splitFunc(splitFunc bufio.SplitFunc, trimFunc trim.Func) bufio
if f.shouldFlush() {
// Inform flusher that we just flushed
f.flushed()
token = trimFunc(data)
token = data
advance = len(data)
return
}
Expand Down
6 changes: 2 additions & 4 deletions pkg/stanza/flush/flush_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ import (
"time"

"github.com/stretchr/testify/assert"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
)

func TestFlusher(t *testing.T) {
Expand All @@ -22,7 +20,7 @@ func TestFlusher(t *testing.T) {
// always use atEOF=false.

flushPeriod := 100 * time.Millisecond
f := WithPeriod(bufio.ScanWords, trim.Nop, flushPeriod)
f := WithPeriod(bufio.ScanWords, flushPeriod)

content := []byte("foo bar hellowo")

Expand Down Expand Up @@ -64,7 +62,7 @@ func TestNoFlushPeriod(t *testing.T) {
// In other words, we should expect exactly the behavior of bufio.ScanWords.

flushPeriod := time.Duration(0)
f := WithPeriod(bufio.ScanWords, trim.Nop, flushPeriod)
f := WithPeriod(bufio.ScanWords, flushPeriod)

content := []byte("foo bar hellowo")

Expand Down
12 changes: 4 additions & 8 deletions pkg/stanza/operator/input/tcp/tcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,8 @@ type BaseConfig struct {

type SplitFuncBuilder func(enc encoding.Encoding) (bufio.SplitFunc, error)

func (c Config) defaultMultilineBuilder(enc encoding.Encoding) (bufio.SplitFunc, error) {
trimFunc := c.TrimConfig.Func()
splitFunc, err := c.SplitConfig.Func(enc, true, int(c.MaxLogSize), trimFunc)
if err != nil {
return nil, err
}
return splitFunc, nil
func (c Config) defaultSplitFuncBuilder(enc encoding.Encoding) (bufio.SplitFunc, error) {
return c.SplitConfig.Func(enc, true, int(c.MaxLogSize))
}

// Build will build a tcp input operator.
Expand Down Expand Up @@ -121,14 +116,15 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) {
}

if c.SplitFuncBuilder == nil {
c.SplitFuncBuilder = c.defaultMultilineBuilder
c.SplitFuncBuilder = c.defaultSplitFuncBuilder
}

// Build split func
splitFunc, err := c.SplitFuncBuilder(enc)
if err != nil {
return nil, err
}
splitFunc = trim.WithFunc(splitFunc, c.TrimConfig.Func())

var resolver *helper.IPResolver
if c.AddAttributes {
Expand Down
5 changes: 3 additions & 2 deletions pkg/stanza/operator/input/udp/udp.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,12 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) {
return nil, err
}

// Build SplitFunc
splitFunc, err := c.SplitConfig.Func(enc, true, MaxUDPSize, c.TrimConfig.Func())
// Build split func
splitFunc, err := c.SplitConfig.Func(enc, true, MaxUDPSize)
if err != nil {
return nil, err
}
splitFunc = trim.WithFunc(splitFunc, c.TrimConfig.Func())

var resolver *helper.IPResolver
if c.AddAttributes {
Expand Down
Loading
Loading