diff --git a/docs/operators/csv_parser.md b/docs/operators/csv_parser.md index 81ba2d9c..98859610 100644 --- a/docs/operators/csv_parser.md +++ b/docs/operators/csv_parser.md @@ -10,6 +10,7 @@ The `csv_parser` operator parses the string-type field selected by `parse_from` | `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries. | | `header` | required | A string of delimited field names. The values in the delimited header will be used as keys. | | `delimiter` | `,` | A character that will be used as a delimiter. Values `\r` and `\n` cannot be used as a delimiter. | +| `lazy_quotes` | `false` | If true, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field. | | `parse_from` | $body | The [field](/docs/types/field.md) from which the value will be parsed. | | `parse_to` | $body | The [field](/docs/types/field.md) to which the value will be parsed. | | `preserve_to` | | Preserves the unparsed value at the specified [field](/docs/types/field.md). | diff --git a/operator/builtin/parser/csv/config_test.go b/operator/builtin/parser/csv/config_test.go index b1da7ff6..5e4775ff 100644 --- a/operator/builtin/parser/csv/config_test.go +++ b/operator/builtin/parser/csv/config_test.go @@ -32,6 +32,16 @@ func TestJSONParserConfig(t *testing.T) { return p }(), }, + { + Name: "lazy_quotes", + Expect: func() *CSVParserConfig { + p := defaultCfg() + p.Header = "id,severity,message" + p.LazyQuotes = true + p.ParseFrom = entry.NewBodyField("message") + return p + }(), + }, { Name: "delimiter", Expect: func() *CSVParserConfig { diff --git a/operator/builtin/parser/csv/csv.go b/operator/builtin/parser/csv/csv.go index dc8e23b7..b2529bc8 100644 --- a/operator/builtin/parser/csv/csv.go +++ b/operator/builtin/parser/csv/csv.go @@ -41,6 +41,7 @@ type CSVParserConfig struct { Header string `json:"header" yaml:"header"` FieldDelimiter string `json:"delimiter,omitempty" yaml:"delimiter,omitempty"` + LazyQuotes bool `json:"lazy_quotes,omitempty" yaml:"lazy_quotes,omitempty"` } // Build will build a csv parser operator. @@ -76,6 +77,7 @@ func (c CSVParserConfig) Build(context operator.BuildContext) ([]operator.Operat header: strings.Split(c.Header, delimiterStr), fieldDelimiter: fieldDelimiter, numFields: numFields, + lazyQuotes: c.LazyQuotes, } return []operator.Operator{csvParser}, nil @@ -87,6 +89,7 @@ type CSVParser struct { header []string fieldDelimiter rune numFields int + lazyQuotes bool } // Process will parse an entry for csv. @@ -107,6 +110,7 @@ func (r *CSVParser) parse(value interface{}) (interface{}, error) { reader := csvparser.NewReader(strings.NewReader(csvLine)) reader.Comma = r.fieldDelimiter reader.FieldsPerRecord = r.numFields + reader.LazyQuotes = r.lazyQuotes parsedValues := make(map[string]interface{}) record, err := reader.Read() diff --git a/operator/builtin/parser/csv/csv_test.go b/operator/builtin/parser/csv/csv_test.go index 3400896c..08fe0cfa 100644 --- a/operator/builtin/parser/csv/csv_test.go +++ b/operator/builtin/parser/csv/csv_test.go @@ -190,6 +190,21 @@ func TestParserCSV(t *testing.T) { "position": "agent", }, }, + { + "parse-with-lazy-quotes", + func(p *CSVParserConfig) { + p.Header = "name,age,height,number" + p.FieldDelimiter = "," + p.LazyQuotes = true + }, + `stanza "log parser",1,6ft,5`, + map[string]interface{}{ + "name": `stanza "log parser"`, + "age": "1", + "height": "6ft", + "number": "5", + }, + }, } for _, tc := range cases { @@ -218,7 +233,7 @@ func TestParserCSV(t *testing.T) { } } -func TestParserCSVMultipleBodys(t *testing.T) { +func TestParserCSVMultipleBodies(t *testing.T) { t.Run("basic", func(t *testing.T) { cfg := NewCSVParserConfig("test") cfg.OutputIDs = []string{"fake"} diff --git a/operator/builtin/parser/csv/testdata/lazy_quotes.yaml b/operator/builtin/parser/csv/testdata/lazy_quotes.yaml new file mode 100644 index 00000000..9af79c16 --- /dev/null +++ b/operator/builtin/parser/csv/testdata/lazy_quotes.yaml @@ -0,0 +1,4 @@ +type: csv_parser +parse_from: message +header: id,severity,message +lazy_quotes: true