Skip to content

Commit

Permalink
[pkg/stanza] Fix Stanza Key Value Parser custom pair delimiter bug (#…
Browse files Browse the repository at this point in the history
…31048)

**Description:** <Describe what has changed.>
Fixes a bug that would occur where quotes were not respected while pair
parsing with a custom pair delimiter. This occurred because in the case
of a custom pair delimiter `strings.Split()` was used which does not
respect quotes. This bug was only an issue with custom pair delimiters
because the default white space delimiter used `strings.FieldsFunc`with
a function that did respect quotes.

This fix adds the function `splitPairs(input, pairDelimiter string)
([]string, error)` which will split the given input on the given
pairDelimiter.

Alternative solutions like `strings.FieldsFunc` and `csv.NewReader` were
considered but not used because they require the delimiter be a rune.
This would have worked in many cases except for when the delimiter is
more than 1 unique character long. Since the delimiter shouldn't be
limited to a single rune, those solutions were rejected.

A consequence of this new function respecting quotes is it will fail and
return an error if any quoted value is ever malformed. For example,
given the input `k1=v1 k2='v2"` the function will return an error
because the single quote used for `k2`'s value is never closed. It will
also fail on the input `k1='v1 k2=v2 k3=v3` for the same reason that the
quote is never closed. I believe this is the behavior we want so that we
don't return incorrect pairs for further parsing.

**Link to tracking Issue:** <Issue number if applicable>
Closes #31034 

**Testing:** <Describe what testing was performed and which tests were
added.>
Added in additional unit tests to verify correct behaviors.

**Documentation:** <Describe the documentation added.>
I don't believe documentation is required as the user shouldn't see any
changes to intended behavior.

---------

Co-authored-by: Daniel Jaglowski <[email protected]>
  • Loading branch information
dpaasman00 and djaglowski authored Feb 8, 2024
1 parent 82cbc43 commit fe61401
Show file tree
Hide file tree
Showing 3 changed files with 199 additions and 53 deletions.
27 changes: 27 additions & 0 deletions .chloggen/pkg-stanza-fix-key-value-pair-delimiter-bug.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/stanza

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fixed a bug in the keyvalue_parser where quoted values could be split if they contained a delimited.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [31034]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
83 changes: 56 additions & 27 deletions pkg/stanza/operator/parser/keyvalue/keyvalue.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,35 +51,31 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) {
return nil, err
}

if c.Delimiter == c.PairDelimiter {
return nil, errors.New("delimiter and pair_delimiter cannot be the same value")
}

if c.Delimiter == "" {
return nil, errors.New("delimiter is a required parameter")
}

// split on whitespace by default, if pair delimiter is set, use
// strings.Split()
pairSplitFunc := splitStringByWhitespace
pairDelimiter := " "
if c.PairDelimiter != "" {
pairSplitFunc = func(input string) []string {
return strings.Split(input, c.PairDelimiter)
}
pairDelimiter = c.PairDelimiter
}

if c.Delimiter == pairDelimiter {
return nil, errors.New("delimiter and pair_delimiter cannot be the same value")
}

return &Parser{
ParserOperator: parserOperator,
delimiter: c.Delimiter,
pairSplitFunc: pairSplitFunc,
pairDelimiter: pairDelimiter,
}, nil
}

// Parser is an operator that parses key value pairs.
type Parser struct {
helper.ParserOperator
delimiter string
pairSplitFunc func(input string) []string
pairDelimiter string
}

// Process will parse an entry for key value pairs.
Expand All @@ -91,45 +87,78 @@ func (kv *Parser) Process(ctx context.Context, entry *entry.Entry) error {
func (kv *Parser) parse(value any) (any, error) {
switch m := value.(type) {
case string:
return kv.parser(m, kv.delimiter)
return kv.parser(m, kv.delimiter, kv.pairDelimiter)
default:
return nil, fmt.Errorf("type %T cannot be parsed as key value pairs", value)
}
}

func (kv *Parser) parser(input string, delimiter string) (map[string]any, error) {
func (kv *Parser) parser(input string, delimiter string, pairDelimiter string) (map[string]any, error) {
if input == "" {
return nil, fmt.Errorf("parse from field %s is empty", kv.ParseFrom.String())
}

pairs, err := splitPairs(input, pairDelimiter)
if err != nil {
return nil, fmt.Errorf("failed to parse pairs from input: %w", err)
}

parsed := make(map[string]any)

var err error
for _, raw := range kv.pairSplitFunc(input) {
for _, raw := range pairs {
m := strings.SplitN(raw, delimiter, 2)
if len(m) != 2 {
e := fmt.Errorf("expected '%s' to split by '%s' into two items, got %d", raw, delimiter, len(m))
err = multierr.Append(err, e)
continue
}

key := strings.TrimSpace(strings.Trim(m[0], "\"'"))
value := strings.TrimSpace(strings.Trim(m[1], "\"'"))
key := strings.TrimSpace(m[0])
value := strings.TrimSpace(m[1])

parsed[key] = value
}

return parsed, err
}

// split on whitespace and preserve quoted text
func splitStringByWhitespace(input string) []string {
quoted := false
raw := strings.FieldsFunc(input, func(r rune) bool {
if r == '"' || r == '\'' {
quoted = !quoted
// splitPairs will split the input on the pairDelimiter and return the resulting slice.
// `strings.Split` is not used because it does not respect quotes and will split if the delimiter appears in a quoted value
func splitPairs(input, pairDelimiter string) ([]string, error) {
var result []string
currentPair := ""
delimiterLength := len(pairDelimiter)
quoteChar := "" // "" means we are not in quotes

for i := 0; i < len(input); i++ {
if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == pairDelimiter { // delimiter
if currentPair == "" { // leading || trailing delimiter; ignore
continue
}
result = append(result, currentPair)
currentPair = ""
i += delimiterLength - 1
continue
}

if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote
quoteChar = string(input[i])
continue
}
return !quoted && r == ' '
})
return raw
if string(input[i]) == quoteChar { // end of quote
quoteChar = ""
continue
}

currentPair += string(input[i])
}

if quoteChar != "" { // check for closed quotes
return nil, fmt.Errorf("never reached end of a quoted value")
}
if currentPair != "" { // avoid adding empty value bc of a trailing delimiter
return append(result, currentPair), nil
}

return result, nil
}
142 changes: 116 additions & 26 deletions pkg/stanza/operator/parser/keyvalue/keyvalue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,15 @@ func TestBuild(t *testing.T) {
}(),
true,
},
{
"pair-delimiter-equals-default-delimiter",
func() *Config {
cfg := basicConfig()
cfg.Delimiter = " "
return cfg
}(),
true,
},
{
"unset-delimiter",
func() *Config {
Expand Down Expand Up @@ -148,6 +157,13 @@ func TestParserInvalidType(t *testing.T) {
require.Contains(t, err.Error(), "type []int cannot be parsed as key value pairs")
}

func TestParserEmptyInput(t *testing.T) {
parser := newTestParser(t)
_, err := parser.parse("")
require.Error(t, err)
require.Contains(t, err.Error(), "parse from field body is empty")
}

func TestKVImplementations(t *testing.T) {
require.Implements(t, (*operator.Operator)(nil), new(Parser))
}
Expand Down Expand Up @@ -503,7 +519,7 @@ key=value`,
false,
},
{
"quoted-value-contains-delimiter",
"quoted-value-contains-whitespace-delimiter",
func(kv *Config) {},
&entry.Entry{
Body: `msg="Message successfully sent at 2023-12-04 06:47:31.204222276 +0000 UTC m=+5115.932279346"`,
Expand Down Expand Up @@ -572,6 +588,105 @@ key=value`,
false,
true,
},
{
"custom pair delimiter in quoted value",
func(kv *Config) {
kv.PairDelimiter = "_"
},
&entry.Entry{
Body: `a=b_c="d_e"`,
},
&entry.Entry{
Attributes: map[string]any{
"a": "b",
"c": "d_e",
},
Body: `a=b_c="d_e"`,
},
false,
false,
},
{
"embedded double quotes in single quoted value",
func(kv *Config) {},
&entry.Entry{
Body: `a=b c='this is a "co ol" value'`,
},
&entry.Entry{
Attributes: map[string]any{
"a": "b",
"c": "this is a \"co ol\" value",
},
Body: `a=b c='this is a "co ol" value'`,
},
false,
false,
},
{
"embedded double quotes end single quoted value",
func(kv *Config) {},
&entry.Entry{
Body: `a=b c='this is a "co ol"'`,
},
&entry.Entry{
Attributes: map[string]any{
"a": "b",
"c": "this is a \"co ol\"",
},
Body: `a=b c='this is a "co ol"'`,
},
false,
false,
},
{
"leading and trailing pair delimiter w/o quotes",
func(kv *Config) {},
&entry.Entry{
Body: " k1=v1 k2==v2 k3=v3= ",
},
&entry.Entry{
Attributes: map[string]any{
"k1": "v1",
"k2": "=v2",
"k3": "v3=",
},
Body: " k1=v1 k2==v2 k3=v3= ",
},
false,
false,
},
{
"complicated delimiters",
func(kv *Config) {
kv.Delimiter = "@*"
kv.PairDelimiter = "_!_"
},
&entry.Entry{
Body: `k1@*v1_!_k2@**v2_!__k3@@*v3__`,
},
&entry.Entry{
Attributes: map[string]any{
"k1": "v1",
"k2": "*v2",
"_k3@": "v3__",
},
Body: `k1@*v1_!_k2@**v2_!__k3@@*v3__`,
},
false,
false,
},
{
"unclosed quotes",
func(kv *Config) {},
&entry.Entry{
Body: `k1='v1' k2='v2`,
},
&entry.Entry{
Body: `k1='v1' k2='v2`,
},
true,
false,
},
}

for _, tc := range cases {
Expand Down Expand Up @@ -604,28 +719,3 @@ key=value`,
})
}
}

func TestSplitStringByWhitespace(t *testing.T) {
cases := []struct {
name string
intput string
output []string
}{
{
"simple",
"k=v a=b x=\" y \" job=\"software engineering\"",
[]string{
"k=v",
"a=b",
"x=\" y \"",
"job=\"software engineering\"",
},
},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
require.Equal(t, tc.output, splitStringByWhitespace(tc.intput))
})
}
}

0 comments on commit fe61401

Please sign in to comment.