Skip to content

Commit

Permalink
CSV Added 'Lazy Quotes' support (open-telemetry#324)
Browse files Browse the repository at this point in the history
* Added a configuration options for csv lazy quotes (embedded) quotes

Signed-off-by: Corbin Phelps <[email protected]>

* Updated csv_parser docs with lazy_quotes option

Signed-off-by: Corbin Phelps <[email protected]>
  • Loading branch information
cpheps authored Dec 20, 2021
1 parent 7b3dec9 commit d79bfca
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/operators/csv_parser.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The `csv_parser` operator parses the string-type field selected by `parse_from`
| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries. |
| `header` | required | A string of delimited field names. The values in the delimited header will be used as keys. |
| `delimiter` | `,` | A character that will be used as a delimiter. Values `\r` and `\n` cannot be used as a delimiter. |
| `lazy_quotes` | `false` | If true, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field. |
| `parse_from` | $body | The [field](/docs/types/field.md) from which the value will be parsed. |
| `parse_to` | $body | The [field](/docs/types/field.md) to which the value will be parsed. |
| `preserve_to` | | Preserves the unparsed value at the specified [field](/docs/types/field.md). |
Expand Down
10 changes: 10 additions & 0 deletions operator/builtin/parser/csv/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ func TestJSONParserConfig(t *testing.T) {
return p
}(),
},
{
Name: "lazy_quotes",
Expect: func() *CSVParserConfig {
p := defaultCfg()
p.Header = "id,severity,message"
p.LazyQuotes = true
p.ParseFrom = entry.NewBodyField("message")
return p
}(),
},
{
Name: "delimiter",
Expect: func() *CSVParserConfig {
Expand Down
4 changes: 4 additions & 0 deletions operator/builtin/parser/csv/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type CSVParserConfig struct {

Header string `json:"header" yaml:"header"`
FieldDelimiter string `json:"delimiter,omitempty" yaml:"delimiter,omitempty"`
LazyQuotes bool `json:"lazy_quotes,omitempty" yaml:"lazy_quotes,omitempty"`
}

// Build will build a csv parser operator.
Expand Down Expand Up @@ -76,6 +77,7 @@ func (c CSVParserConfig) Build(context operator.BuildContext) ([]operator.Operat
header: strings.Split(c.Header, delimiterStr),
fieldDelimiter: fieldDelimiter,
numFields: numFields,
lazyQuotes: c.LazyQuotes,
}

return []operator.Operator{csvParser}, nil
Expand All @@ -87,6 +89,7 @@ type CSVParser struct {
header []string
fieldDelimiter rune
numFields int
lazyQuotes bool
}

// Process will parse an entry for csv.
Expand All @@ -107,6 +110,7 @@ func (r *CSVParser) parse(value interface{}) (interface{}, error) {
reader := csvparser.NewReader(strings.NewReader(csvLine))
reader.Comma = r.fieldDelimiter
reader.FieldsPerRecord = r.numFields
reader.LazyQuotes = r.lazyQuotes
parsedValues := make(map[string]interface{})

record, err := reader.Read()
Expand Down
17 changes: 16 additions & 1 deletion operator/builtin/parser/csv/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,21 @@ func TestParserCSV(t *testing.T) {
"position": "agent",
},
},
{
"parse-with-lazy-quotes",
func(p *CSVParserConfig) {
p.Header = "name,age,height,number"
p.FieldDelimiter = ","
p.LazyQuotes = true
},
`stanza "log parser",1,6ft,5`,
map[string]interface{}{
"name": `stanza "log parser"`,
"age": "1",
"height": "6ft",
"number": "5",
},
},
}

for _, tc := range cases {
Expand Down Expand Up @@ -218,7 +233,7 @@ func TestParserCSV(t *testing.T) {
}
}

func TestParserCSVMultipleBodys(t *testing.T) {
func TestParserCSVMultipleBodies(t *testing.T) {
t.Run("basic", func(t *testing.T) {
cfg := NewCSVParserConfig("test")
cfg.OutputIDs = []string{"fake"}
Expand Down
4 changes: 4 additions & 0 deletions operator/builtin/parser/csv/testdata/lazy_quotes.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
type: csv_parser
parse_from: message
header: id,severity,message
lazy_quotes: true

0 comments on commit d79bfca

Please sign in to comment.