Skip to content
This repository has been archived by the owner on May 25, 2022. It is now read-only.

CSV Added 'Lazy Quotes' support #324

Merged
merged 2 commits into from
Dec 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/operators/csv_parser.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The `csv_parser` operator parses the string-type field selected by `parse_from`
| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries. |
| `header` | required | A string of delimited field names. The values in the delimited header will be used as keys. |
| `delimiter` | `,` | A character that will be used as a delimiter. Values `\r` and `\n` cannot be used as a delimiter. |
| `lazy_quotes` | `false` | If true, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field. |
| `parse_from` | $body | The [field](/docs/types/field.md) from which the value will be parsed. |
| `parse_to` | $body | The [field](/docs/types/field.md) to which the value will be parsed. |
| `preserve_to` | | Preserves the unparsed value at the specified [field](/docs/types/field.md). |
Expand Down
10 changes: 10 additions & 0 deletions operator/builtin/parser/csv/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ func TestJSONParserConfig(t *testing.T) {
return p
}(),
},
{
Name: "lazy_quotes",
Expect: func() *CSVParserConfig {
p := defaultCfg()
p.Header = "id,severity,message"
p.LazyQuotes = true
p.ParseFrom = entry.NewBodyField("message")
return p
}(),
},
{
Name: "delimiter",
Expect: func() *CSVParserConfig {
Expand Down
4 changes: 4 additions & 0 deletions operator/builtin/parser/csv/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type CSVParserConfig struct {

Header string `json:"header" yaml:"header"`
FieldDelimiter string `json:"delimiter,omitempty" yaml:"delimiter,omitempty"`
LazyQuotes bool `json:"lazy_quotes,omitempty" yaml:"lazy_quotes,omitempty"`
}

// Build will build a csv parser operator.
Expand Down Expand Up @@ -76,6 +77,7 @@ func (c CSVParserConfig) Build(context operator.BuildContext) ([]operator.Operat
header: strings.Split(c.Header, delimiterStr),
fieldDelimiter: fieldDelimiter,
numFields: numFields,
lazyQuotes: c.LazyQuotes,
}

return []operator.Operator{csvParser}, nil
Expand All @@ -87,6 +89,7 @@ type CSVParser struct {
header []string
fieldDelimiter rune
numFields int
lazyQuotes bool
}

// Process will parse an entry for csv.
Expand All @@ -107,6 +110,7 @@ func (r *CSVParser) parse(value interface{}) (interface{}, error) {
reader := csvparser.NewReader(strings.NewReader(csvLine))
reader.Comma = r.fieldDelimiter
reader.FieldsPerRecord = r.numFields
reader.LazyQuotes = r.lazyQuotes
parsedValues := make(map[string]interface{})

record, err := reader.Read()
Expand Down
17 changes: 16 additions & 1 deletion operator/builtin/parser/csv/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,21 @@ func TestParserCSV(t *testing.T) {
"position": "agent",
},
},
{
"parse-with-lazy-quotes",
func(p *CSVParserConfig) {
p.Header = "name,age,height,number"
p.FieldDelimiter = ","
p.LazyQuotes = true
},
`stanza "log parser",1,6ft,5`,
map[string]interface{}{
"name": `stanza "log parser"`,
"age": "1",
"height": "6ft",
"number": "5",
},
},
}

for _, tc := range cases {
Expand Down Expand Up @@ -218,7 +233,7 @@ func TestParserCSV(t *testing.T) {
}
}

func TestParserCSVMultipleBodys(t *testing.T) {
func TestParserCSVMultipleBodies(t *testing.T) {
t.Run("basic", func(t *testing.T) {
cfg := NewCSVParserConfig("test")
cfg.OutputIDs = []string{"fake"}
Expand Down
4 changes: 4 additions & 0 deletions operator/builtin/parser/csv/testdata/lazy_quotes.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
type: csv_parser
parse_from: message
header: id,severity,message
lazy_quotes: true