From 585a177671ce7de2d5272c7d24c01af9de899c36 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Sat, 26 Jun 2021 00:27:23 -0500 Subject: [PATCH 01/14] Proof of concept --- config/config.go | 64 +++++++---- plugins/parsers/json_v2/README.md | 29 ++++- plugins/parsers/json_v2/parser.go | 101 ++++++++++++++++-- plugins/parsers/json_v2/parser_test.go | 1 + .../test_subfieldtag_in_object/expected.out | 1 + .../test_subfieldtag_in_object/input.json | 97 +++++++++++++++++ .../test_subfieldtag_in_object/telegraf.conf | 17 +++ 7 files changed, 274 insertions(+), 36 deletions(-) create mode 100644 plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/expected.out create mode 100644 plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/input.json create mode 100644 plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf diff --git a/config/config.go b/config/config.go index 4880da4832e5a..85f7f083c394c 100644 --- a/config/config.go +++ b/config/config.go @@ -1421,28 +1421,8 @@ func (c *Config) getParserConfig(name string, tbl *ast.Table) (*parsers.Config, c.getFieldString(metricConfig, "timestamp_format", &mc.TimestampFormat) c.getFieldString(metricConfig, "timestamp_timezone", &mc.TimestampTimezone) - if fieldConfigs, ok := metricConfig.Fields["field"]; ok { - if fieldConfigs, ok := fieldConfigs.([]*ast.Table); ok { - for _, fieldconfig := range fieldConfigs { - var f json_v2.DataSet - c.getFieldString(fieldconfig, "path", &f.Path) - c.getFieldString(fieldconfig, "rename", &f.Rename) - c.getFieldString(fieldconfig, "type", &f.Type) - mc.Fields = append(mc.Fields, f) - } - } - } - if fieldConfigs, ok := metricConfig.Fields["tag"]; ok { - if fieldConfigs, ok := fieldConfigs.([]*ast.Table); ok { - for _, fieldconfig := range fieldConfigs { - var t json_v2.DataSet - c.getFieldString(fieldconfig, "path", &t.Path) - c.getFieldString(fieldconfig, "rename", &t.Rename) - t.Type = "string" - mc.Tags = append(mc.Tags, t) - } - } - } + mc.Fields = getFieldSubtable(c, metricConfig) + mc.Tags = getTagSubtable(c, metricConfig) if objectconfigs, ok := metricConfig.Fields["object"]; ok { if objectconfigs, ok := objectconfigs.([]*ast.Table); ok { @@ -1458,6 +1438,10 @@ func (c *Config) getParserConfig(name string, tbl *ast.Table) (*parsers.Config, c.getFieldStringSlice(objectConfig, "tags", &o.Tags) c.getFieldStringMap(objectConfig, "renames", &o.Renames) c.getFieldStringMap(objectConfig, "fields", &o.Fields) + + o.FieldPaths = getFieldSubtable(c, metricConfig) + o.TagPaths = getTagSubtable(c, metricConfig) + mc.JSONObjects = append(mc.JSONObjects, o) } } @@ -1477,6 +1461,42 @@ func (c *Config) getParserConfig(name string, tbl *ast.Table) (*parsers.Config, return pc, nil } +func getFieldSubtable(c *Config, metricConfig *ast.Table) []json_v2.DataSet { + var fields []json_v2.DataSet + + if fieldConfigs, ok := metricConfig.Fields["field"]; ok { + if fieldConfigs, ok := fieldConfigs.([]*ast.Table); ok { + for _, fieldconfig := range fieldConfigs { + var f json_v2.DataSet + c.getFieldString(fieldconfig, "path", &f.Path) + c.getFieldString(fieldconfig, "rename", &f.Rename) + c.getFieldString(fieldconfig, "type", &f.Type) + fields = append(fields, f) + } + } + } + + return fields +} + +func getTagSubtable(c *Config, metricConfig *ast.Table) []json_v2.DataSet { + var tags []json_v2.DataSet + + if fieldConfigs, ok := metricConfig.Fields["tag"]; ok { + if fieldConfigs, ok := fieldConfigs.([]*ast.Table); ok { + for _, fieldconfig := range fieldConfigs { + var t json_v2.DataSet + c.getFieldString(fieldconfig, "path", &t.Path) + c.getFieldString(fieldconfig, "rename", &t.Rename) + t.Type = "string" + tags = append(tags, t) + } + } + } + + return tags +} + // buildSerializer grabs the necessary entries from the ast.Table for creating // a serializers.Serializer object, and creates it, which can then be added onto // an Output object. diff --git a/plugins/parsers/json_v2/README.md b/plugins/parsers/json_v2/README.md index a1effd5940614..fee4093e89837 100644 --- a/plugins/parsers/json_v2/README.md +++ b/plugins/parsers/json_v2/README.md @@ -19,21 +19,37 @@ You configure this parser by describing the metric you want by defining the fiel timestamp_format = "" # A string with a valid timestamp format (see below for possible values) timestamp_timezone = "" # A string with with a valid timezone (see below for possible values) [[inputs.file.json_v2.tag]] - path = "" # A string with valid GJSON path syntax + path = "" # A string with valid GJSON path syntax to a non-array/non-object value rename = "new name" # A string with a new name for the tag key [[inputs.file.json_v2.field]] - path = "" # A string with valid GJSON path syntax + path = "" # A string with valid GJSON path syntax to a non-array/non-object value rename = "new name" # A string with a new name for the tag key type = "int" # A string specifying the type (int,uint,float,string,bool) [[inputs.file.json_v2.object]] - path = "" # A string with valid GJSON path syntax + path = "" # A string with valid GJSON path syntax, can include array's and object's + + ## Configuration to define what JSON keys should be used as timestamps ## timestamp_key = "" # A JSON key (for a nested key, prepend the parent keys with underscores) to a valid timestamp timestamp_format = "" # A string with a valid timestamp format (see below for possible values) timestamp_timezone = "" # A string with with a valid timezone (see below for possible values) - disable_prepend_keys = false (or true, just not both) + + ### Configuration to define what JSON keys should be included and how (field/tag) ### + tags = [] # List of JSON keys (for a nested key, prepend the parent keys with underscores) to be a tag instead of a field, when adding a JSON key in this list you don't have to define it in the included_keys list included_keys = [] # List of JSON keys (for a nested key, prepend the parent keys with underscores) that should be only included in result excluded_keys = [] # List of JSON keys (for a nested key, prepend the parent keys with underscores) that shouldn't be included in result - tags = [] # List of JSON keys (for a nested key, prepend the parent keys with underscores) to be a tag instead of a field + # When a tag/field sub-table is defined, they will be the only field/tag's along with any keys defined in the included_keys list. + # If the resulting values aren't included in the object/array returned by the root object path, it won't be included. + # You can define as many tag/field sub-tables as you want. + [[inputs.file.json_v2.object.tag]] + path = "" # A string with valid GJSON path syntax to a non-array/non-object value + rename = "new name" # A string with a new name for the tag key + [[inputs.file.json_v2.object.field]] + path = "" # A string with valid GJSON path syntax to a non-array/non-object value + rename = "new name" # A string with a new name for the tag key + type = "int" # A string specifying the type (int,uint,float,string,bool) + + ### Configuration to modify the resutling line protocol ### + disable_prepend_keys = false (or true, just not both) [inputs.file.json_v2.object.renames] # A map of JSON keys (for a nested key, prepend the parent keys with underscores) with a new name for the tag key key = "new name" [inputs.file.json_v2.object.fields] # A map of JSON keys (for a nested key, prepend the parent keys with underscores) with a type (int,uint,float,string,bool) @@ -185,3 +201,6 @@ The type values you can set: * `string`, any data can be formatted as a string. * `float`, string values (with valid numbers) or integers can be converted to a float. * `bool`, the string values "true" or "false" (regardless of capitalization) or the integer values `0` or `1` can be turned to a bool. + +## The JSON path formats: GJSON, prepended keys, and limited GJSON + diff --git a/plugins/parsers/json_v2/parser.go b/plugins/parsers/json_v2/parser.go index fa0946621cde4..22b4f89e02b79 100644 --- a/plugins/parsers/json_v2/parser.go +++ b/plugins/parsers/json_v2/parser.go @@ -20,8 +20,15 @@ type Parser struct { measurementName string - iterateObjects bool - currentSettings JSONObject + iterateObjects bool + currentSettings JSONObject + fieldPathResults []PathResult + tagPathResults []PathResult +} + +type PathResult struct { + result gjson.Result + DataSet } type Config struct { @@ -53,9 +60,12 @@ type JSONObject struct { IncludedKeys []string `toml:"included_keys"` // OPTIONAL ExcludedKeys []string `toml:"excluded_keys"` // OPTIONAL DisablePrependKeys bool `toml:"disable_prepend_keys"` // OPTIONAL + FieldPaths []DataSet // OPTIONAL + TagPaths []DataSet // OPTIONAL } type MetricNode struct { + ParentIndex int OutputName string SetName string Tag bool @@ -254,9 +264,10 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { if val.IsObject() { if p.iterateObjects { n := MetricNode{ - SetName: result.SetName, - Metric: m, - Result: val, + ParentIndex: result.ParentIndex + val.Index, + SetName: result.SetName, + Metric: m, + Result: val, } r, err := p.combineObject(n) if err != nil { @@ -282,6 +293,7 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { m.AddTag(f.Key, f.Value) } n := MetricNode{ + ParentIndex: result.ParentIndex + val.Index, Tag: result.Tag, DesiredType: result.DesiredType, OutputName: result.OutputName, @@ -314,6 +326,19 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { switch result.Value().(type) { case nil: // Ignore JSON values that are set as null default: + fieldDataSet := existsInPathResults(result.ParentIndex, p.fieldPathResults) + tagDataSet := existsInPathResults(result.ParentIndex, p.tagPathResults) + if fieldDataSet == nil && tagDataSet == nil { + return results, nil + } + outputName := result.OutputName + if fieldDataSet == nil && tagDataSet != nil { + result.Tag = true + if tagDataSet.Rename != "" { + outputName = tagDataSet.Rename + } + } + if result.Tag { result.DesiredType = "string" } @@ -322,9 +347,9 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { return nil, err } if result.Tag { - result.Metric.AddTag(result.OutputName, v.(string)) + result.Metric.AddTag(outputName, v.(string)) } else { - result.Metric.AddField(result.OutputName, v) + result.Metric.AddField(outputName, v) } } } @@ -335,22 +360,79 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { return results, nil } +func existsInPathResults(index int, indexList []PathResult) *DataSet { + for _, f := range indexList { + if f.result.Index == index { + return &f.DataSet + } + } + return nil +} + // processObjects will iterate over all 'object' configs and create metrics for each func (p *Parser) processObjects(objects []JSONObject, input []byte) ([]telegraf.Metric, error) { p.iterateObjects = true var t []telegraf.Metric for _, c := range objects { p.currentSettings = c + if c.Path == "" { return nil, fmt.Errorf("GJSON path is required") } result := gjson.GetBytes(input, c.Path) + // hastag doesn't return index! idea: replace all hastags with index, and find lenght of array + for _, f := range c.FieldPaths { + var r PathResult + r.result = gjson.GetBytes(input, f.Path) + r.DataSet = f + p.fieldPathResults = append(p.fieldPathResults, r) + } + + for _, f := range c.TagPaths { + var r PathResult + r.result = gjson.GetBytes(input, f.Path) + r.DataSet = f + p.tagPathResults = append(p.tagPathResults, r) + } + + // resultTest := gjson.GetBytes(input, "root.station") + // subResult := gjson.GetBytes(input, "root.station.0.etd.0.estimate.0.minutes") + + // fmt.Println("key", subResult.Str, "val", subResult.Raw, "val index", subResult.Index) + // fmt.Println(string(input[subResult.Index : subResult.Index+len(subResult.Raw)])) + + // // fmt.Println(string(input[10 : 10+len(result.Raw)])) + // resultTest.ForEach(func(key, val gjson.Result) bool { + // // fmt.Println("key", key.Str, "val", val.Raw, "val index", val.Index) + // // fmt.Println(string(input[val.Index : val.Index+len(val.Raw)])) + // // if val.IsArray() { + // // val.ForEach(func(subkey, subval gjson.Result) bool { + // // fmt.Println("key", subkey.Str, "val", subval.Raw, "val index", subval.Index) + // // newIndex := subval.Index + val.Index + // // fmt.Println(newIndex) + // // fmt.Println(string(input[newIndex : newIndex+len(subval.Raw)])) + // if val.IsObject() { + // val.ForEach(func(subsubkey, subsubval gjson.Result) bool { + // fmt.Println("key", subsubkey.Str, "val", subsubval.Raw, "val index", subsubval.Index) + // newnewIndex := subsubval.Index + (val.Index + resultTest.Index) + // fmt.Println(newnewIndex) + // fmt.Println(string(input[newnewIndex : newnewIndex+len(subsubval.Raw)])) + // return true + // }) + // } + // // return true + // // }) + // // } + // return true + // }) + if result.Type == gjson.Null { return nil, fmt.Errorf("GJSON Path returned null") } rootObject := MetricNode{ + ParentIndex: result.Index, Metric: metric.New( p.measurementName, map[string]string{}, @@ -402,6 +484,7 @@ func (p *Parser) combineObject(result MetricNode) ([]telegraf.Metric, error) { } arrayNode := MetricNode{ + ParentIndex: result.ParentIndex + val.Index, DesiredType: result.DesiredType, Tag: result.Tag, OutputName: outputName, @@ -455,8 +538,8 @@ func (p *Parser) isIncluded(key string, val gjson.Result) bool { return true } // automatically adds tags to included_keys so it does NOT have to be repeated in the config - p.currentSettings.IncludedKeys = append(p.currentSettings.IncludedKeys, p.currentSettings.Tags...) - for _, i := range p.currentSettings.IncludedKeys { + allKeys := append(p.currentSettings.IncludedKeys, p.currentSettings.Tags...) + for _, i := range allKeys { if i == key { return true } diff --git a/plugins/parsers/json_v2/parser_test.go b/plugins/parsers/json_v2/parser_test.go index 7b34b83c0af8a..fd8f95a31afff 100644 --- a/plugins/parsers/json_v2/parser_test.go +++ b/plugins/parsers/json_v2/parser_test.go @@ -20,6 +20,7 @@ func TestData(t *testing.T) { name string test string }{ + // !!!! TEST JSON THAT ISN'T PRETTY FORMATTED !!!! { name: "Test complex nesting", test: "complex_nesting", diff --git a/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/expected.out b/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/expected.out new file mode 100644 index 0000000000000..a7db83863a63c --- /dev/null +++ b/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/expected.out @@ -0,0 +1 @@ +file,from_station=COLM,to_station=ANTC,etd_estimate_direction=North etd_estimate_minutes=6i diff --git a/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/input.json b/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/input.json new file mode 100644 index 0000000000000..45d0d5514ae76 --- /dev/null +++ b/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/input.json @@ -0,0 +1,97 @@ +{ + "?xml": { + "@version": "1.0", + "@encoding": "utf-8" + }, + "root": { + "@id": "1", + "uri": { + "#cdata-section": "http://api.bart.gov/api/etd.aspx?cmd=etd&orig=COLM&dir=n&json=y" + }, + "date": "06/25/2021", + "time": "05:01:31 PM PDT", + "station": [ + { + "name": "Colma", + "abbr": "COLM", + "etd": [ + { + "destination": "Antioch", + "abbreviation": "ANTC", + "limited": "0", + "estimate": [ + { + "minutes": "6", + "platform": "2", + "direction": "North", + "length": "10", + "color": "YELLOW", + "hexcolor": "#ffff33", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "36", + "platform": "2", + "direction": "North", + "length": "10", + "color": "YELLOW", + "hexcolor": "#ffff33", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "51", + "platform": "2", + "direction": "North", + "length": "10", + "color": "YELLOW", + "hexcolor": "#ffff33", + "bikeflag": "1", + "delay": "0" + } + ] + }, + { + "destination": "Richmond", + "abbreviation": "RICH", + "limited": "0", + "estimate": [ + { + "minutes": "12", + "platform": "2", + "direction": "North", + "length": "10", + "color": "RED", + "hexcolor": "#ff0000", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "26", + "platform": "2", + "direction": "North", + "length": "10", + "color": "RED", + "hexcolor": "#ff0000", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "41", + "platform": "2", + "direction": "North", + "length": "10", + "color": "RED", + "hexcolor": "#ff0000", + "bikeflag": "1", + "delay": "0" + } + ] + } + ] + } + ], + "message": "" + } +} diff --git a/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf b/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf new file mode 100644 index 0000000000000..eb6f1b3e27cb3 --- /dev/null +++ b/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf @@ -0,0 +1,17 @@ +[[inputs.file]] + files = ["./testdata/test_subfieldtag_in_object/input.json"] + data_format = "json_v2" + [[inputs.file.json_v2]] + [[inputs.file.json_v2.object]] + path = "root.station" + [[inputs.file.json_v2.field]] + path = "root.station.0.etd.0.estimate.0.minutes" + type = "int" + [[inputs.file.json_v2.tag]] + path = "root.station.0.abbr" + rename = "from_station" + [[inputs.file.json_v2.tag]] + path = "root.station.0.etd.0.abbreviation" + rename = "to_station" + [[inputs.file.json_v2.tag]] + path = "root.station.0.etd.0.estimate.0.direction" From da1d8d84fb49fd0bd727dae97691a0ced06be314 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Sat, 26 Jun 2021 00:40:46 -0500 Subject: [PATCH 02/14] Simplify sub paths by indexing from root path --- plugins/parsers/json_v2/parser.go | 7 ++++--- .../testdata/test_subfieldtag_in_object/telegraf.conf | 8 ++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/plugins/parsers/json_v2/parser.go b/plugins/parsers/json_v2/parser.go index 22b4f89e02b79..0691a82eb2a26 100644 --- a/plugins/parsers/json_v2/parser.go +++ b/plugins/parsers/json_v2/parser.go @@ -382,16 +382,17 @@ func (p *Parser) processObjects(objects []JSONObject, input []byte) ([]telegraf. result := gjson.GetBytes(input, c.Path) // hastag doesn't return index! idea: replace all hastags with index, and find lenght of array + scopedJSON := []byte(result.Raw) for _, f := range c.FieldPaths { var r PathResult - r.result = gjson.GetBytes(input, f.Path) + r.result = gjson.GetBytes(scopedJSON, f.Path) r.DataSet = f p.fieldPathResults = append(p.fieldPathResults, r) } for _, f := range c.TagPaths { var r PathResult - r.result = gjson.GetBytes(input, f.Path) + r.result = gjson.GetBytes(scopedJSON, f.Path) r.DataSet = f p.tagPathResults = append(p.tagPathResults, r) } @@ -432,7 +433,7 @@ func (p *Parser) processObjects(objects []JSONObject, input []byte) ([]telegraf. } rootObject := MetricNode{ - ParentIndex: result.Index, + ParentIndex: 0, Metric: metric.New( p.measurementName, map[string]string{}, diff --git a/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf b/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf index eb6f1b3e27cb3..b0ed43bf80c41 100644 --- a/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf +++ b/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf @@ -5,13 +5,13 @@ [[inputs.file.json_v2.object]] path = "root.station" [[inputs.file.json_v2.field]] - path = "root.station.0.etd.0.estimate.0.minutes" + path = "0.etd.0.estimate.0.minutes" type = "int" [[inputs.file.json_v2.tag]] - path = "root.station.0.abbr" + path = "0.abbr" rename = "from_station" [[inputs.file.json_v2.tag]] - path = "root.station.0.etd.0.abbreviation" + path = "0.etd.0.abbreviation" rename = "to_station" [[inputs.file.json_v2.tag]] - path = "root.station.0.etd.0.estimate.0.direction" + path = "0.etd.0.estimate.0.direction" From 2f68d6939fcb2d578c3d5c87f899dc118b0e7c86 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Tue, 29 Jun 2021 15:26:48 -0500 Subject: [PATCH 03/14] Cleanup --- plugins/parsers/json_v2/parser.go | 31 -------------------------- plugins/parsers/json_v2/parser_test.go | 1 - 2 files changed, 32 deletions(-) diff --git a/plugins/parsers/json_v2/parser.go b/plugins/parsers/json_v2/parser.go index 0691a82eb2a26..a5d2075378fcb 100644 --- a/plugins/parsers/json_v2/parser.go +++ b/plugins/parsers/json_v2/parser.go @@ -397,37 +397,6 @@ func (p *Parser) processObjects(objects []JSONObject, input []byte) ([]telegraf. p.tagPathResults = append(p.tagPathResults, r) } - // resultTest := gjson.GetBytes(input, "root.station") - // subResult := gjson.GetBytes(input, "root.station.0.etd.0.estimate.0.minutes") - - // fmt.Println("key", subResult.Str, "val", subResult.Raw, "val index", subResult.Index) - // fmt.Println(string(input[subResult.Index : subResult.Index+len(subResult.Raw)])) - - // // fmt.Println(string(input[10 : 10+len(result.Raw)])) - // resultTest.ForEach(func(key, val gjson.Result) bool { - // // fmt.Println("key", key.Str, "val", val.Raw, "val index", val.Index) - // // fmt.Println(string(input[val.Index : val.Index+len(val.Raw)])) - // // if val.IsArray() { - // // val.ForEach(func(subkey, subval gjson.Result) bool { - // // fmt.Println("key", subkey.Str, "val", subval.Raw, "val index", subval.Index) - // // newIndex := subval.Index + val.Index - // // fmt.Println(newIndex) - // // fmt.Println(string(input[newIndex : newIndex+len(subval.Raw)])) - // if val.IsObject() { - // val.ForEach(func(subsubkey, subsubval gjson.Result) bool { - // fmt.Println("key", subsubkey.Str, "val", subsubval.Raw, "val index", subsubval.Index) - // newnewIndex := subsubval.Index + (val.Index + resultTest.Index) - // fmt.Println(newnewIndex) - // fmt.Println(string(input[newnewIndex : newnewIndex+len(subsubval.Raw)])) - // return true - // }) - // } - // // return true - // // }) - // // } - // return true - // }) - if result.Type == gjson.Null { return nil, fmt.Errorf("GJSON Path returned null") } diff --git a/plugins/parsers/json_v2/parser_test.go b/plugins/parsers/json_v2/parser_test.go index fd8f95a31afff..7b34b83c0af8a 100644 --- a/plugins/parsers/json_v2/parser_test.go +++ b/plugins/parsers/json_v2/parser_test.go @@ -20,7 +20,6 @@ func TestData(t *testing.T) { name string test string }{ - // !!!! TEST JSON THAT ISN'T PRETTY FORMATTED !!!! { name: "Test complex nesting", test: "complex_nesting", From d521e07296c0d7980166a6fcb3231d312dbff807 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Thu, 1 Jul 2021 15:59:08 -0500 Subject: [PATCH 04/14] Support hashtags --- go.sum | 4 ++ plugins/parsers/json_v2/parser.go | 40 ++++++++++++++----- .../test_subfieldtag_in_object/telegraf.conf | 8 ++-- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/go.sum b/go.sum index 4189b415723f0..06e51339f5e3c 100644 --- a/go.sum +++ b/go.sum @@ -1506,6 +1506,8 @@ github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnIn github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/sspaink/gjson v1.8.1-0.20210701110044-7460ecfe6948 h1:zex3QSNZ1UZH4RReIi26TnMt+82mvuGwoIv+39cAW08= +github.com/sspaink/gjson v1.8.1-0.20210701110044-7460ecfe6948/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= github.com/stefanberger/go-pkcs11uri v0.0.0-20201008174630-78d3cae3a980/go.mod h1:AO3tvPzVZ/ayst6UlUKUv6rcPQInYe3IknH3jYhAKu8= github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271 h1:WhxRHzgeVGETMlmVfqhRn8RIeeNoPr2Czh33I4Zdccw= @@ -1537,6 +1539,8 @@ github.com/testcontainers/testcontainers-go v0.11.1 h1:FiYsB83LSGbiawoV8TpAZGfcC github.com/testcontainers/testcontainers-go v0.11.1/go.mod h1:/V0UVq+1e7NWYoqTPog179clf0Qp9TOyp4EcXaEFQz8= github.com/tidwall/gjson v1.8.0 h1:Qt+orfosKn0rbNTZqHYDqBrmm3UDA4KRkv70fDzG+PQ= github.com/tidwall/gjson v1.8.0/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= +github.com/testcontainers/testcontainers-go v0.11.0 h1:HO5YOx2DYBHqcg4MzVWPj3FuHAv7USWVu94vCSsgiaM= +github.com/testcontainers/testcontainers-go v0.11.0/go.mod h1:HztBCODzuA+YpMXGK8amjO8j50jz2gcT0BOzSKUiYIs= github.com/tidwall/match v1.0.3 h1:FQUVvBImDutD8wJLN6c5eMzWtjgONK9MwIBCOrUJKeE= github.com/tidwall/match v1.0.3/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= diff --git a/plugins/parsers/json_v2/parser.go b/plugins/parsers/json_v2/parser.go index a5d2075378fcb..a1562b89ca5f9 100644 --- a/plugins/parsers/json_v2/parser.go +++ b/plugins/parsers/json_v2/parser.go @@ -20,7 +20,8 @@ type Parser struct { measurementName string - iterateObjects bool + iterateObjects bool + currentSettings JSONObject fieldPathResults []PathResult tagPathResults []PathResult @@ -326,16 +327,27 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { switch result.Value().(type) { case nil: // Ignore JSON values that are set as null default: - fieldDataSet := existsInPathResults(result.ParentIndex, p.fieldPathResults) - tagDataSet := existsInPathResults(result.ParentIndex, p.tagPathResults) - if fieldDataSet == nil && tagDataSet == nil { - return results, nil - } outputName := result.OutputName - if fieldDataSet == nil && tagDataSet != nil { - result.Tag = true - if tagDataSet.Rename != "" { - outputName = tagDataSet.Rename + desiredType := result.DesiredType + + if len(p.currentSettings.FieldPaths) > 0 || len(p.currentSettings.TagPaths) > 0 { + fieldDataSet := existsInPathResults(result.ParentIndex, p.fieldPathResults) + tagDataSet := existsInPathResults(result.ParentIndex, p.tagPathResults) + if fieldDataSet == nil && tagDataSet == nil { + return results, nil + } + if tagDataSet != nil { + result.Tag = true + if tagDataSet.Rename != "" { + outputName = tagDataSet.Rename + } + } + + if fieldDataSet != nil { + desiredType = fieldDataSet.Type + if fieldDataSet.Rename != "" { + outputName = fieldDataSet.Rename + } } } @@ -362,7 +374,13 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { func existsInPathResults(index int, indexList []PathResult) *DataSet { for _, f := range indexList { - if f.result.Index == index { + if f.result.Index == 0 { + for _, i := range f.result.HashtagIndexes { + if index == i { + return &f.DataSet + } + } + } else if f.result.Index == index { return &f.DataSet } } diff --git a/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf b/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf index b0ed43bf80c41..634bd4248dfe2 100644 --- a/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf +++ b/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf @@ -5,13 +5,13 @@ [[inputs.file.json_v2.object]] path = "root.station" [[inputs.file.json_v2.field]] - path = "0.etd.0.estimate.0.minutes" + path = "#.etd.0.estimate.0.minutes" type = "int" [[inputs.file.json_v2.tag]] - path = "0.abbr" + path = "#.abbr" rename = "from_station" [[inputs.file.json_v2.tag]] - path = "0.etd.0.abbreviation" + path = "#.etd.0.abbreviation" rename = "to_station" [[inputs.file.json_v2.tag]] - path = "0.etd.0.estimate.0.direction" + path = "#.etd.0.estimate.0.direction" From 755aea5fbddae56399ec116519e0a49009bb0730 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Fri, 2 Jul 2021 15:39:58 -0500 Subject: [PATCH 05/14] More tests and support arrays/objects --- config/config.go | 4 +- plugins/parsers/json_v2/README.md | 41 +++-- plugins/parsers/json_v2/parser.go | 143 ++++++++++-------- plugins/parsers/json_v2/parser_test.go | 8 + .../expected.out | 0 .../input.json | 0 .../telegraf.conf | 10 +- .../subfieldtag_in_object_2/expected.out | 4 + .../subfieldtag_in_object_2/input.json | 10 ++ .../subfieldtag_in_object_2/telegraf.conf | 16 ++ 10 files changed, 155 insertions(+), 81 deletions(-) rename plugins/parsers/json_v2/testdata/{test_subfieldtag_in_object => subfieldtag_in_object}/expected.out (100%) rename plugins/parsers/json_v2/testdata/{test_subfieldtag_in_object => subfieldtag_in_object}/input.json (100%) rename plugins/parsers/json_v2/testdata/{test_subfieldtag_in_object => subfieldtag_in_object}/telegraf.conf (63%) create mode 100644 plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/expected.out create mode 100644 plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/input.json create mode 100644 plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/telegraf.conf diff --git a/config/config.go b/config/config.go index 85f7f083c394c..76aa494c4ca43 100644 --- a/config/config.go +++ b/config/config.go @@ -1439,8 +1439,8 @@ func (c *Config) getParserConfig(name string, tbl *ast.Table) (*parsers.Config, c.getFieldStringMap(objectConfig, "renames", &o.Renames) c.getFieldStringMap(objectConfig, "fields", &o.Fields) - o.FieldPaths = getFieldSubtable(c, metricConfig) - o.TagPaths = getTagSubtable(c, metricConfig) + o.FieldPaths = getFieldSubtable(c, objectConfig) + o.TagPaths = getTagSubtable(c, objectConfig) mc.JSONObjects = append(mc.JSONObjects, o) } diff --git a/plugins/parsers/json_v2/README.md b/plugins/parsers/json_v2/README.md index fee4093e89837..f07aa2f406eea 100644 --- a/plugins/parsers/json_v2/README.md +++ b/plugins/parsers/json_v2/README.md @@ -41,10 +41,10 @@ You configure this parser by describing the metric you want by defining the fiel # If the resulting values aren't included in the object/array returned by the root object path, it won't be included. # You can define as many tag/field sub-tables as you want. [[inputs.file.json_v2.object.tag]] - path = "" # A string with valid GJSON path syntax to a non-array/non-object value + path = "" # # A string with valid GJSON path syntax, can include array's and object's rename = "new name" # A string with a new name for the tag key [[inputs.file.json_v2.object.field]] - path = "" # A string with valid GJSON path syntax to a non-array/non-object value + path = "" # # A string with valid GJSON path syntax, can include array's and object's rename = "new name" # A string with a new name for the tag key type = "int" # A string specifying the type (int,uint,float,string,bool) @@ -55,7 +55,9 @@ You configure this parser by describing the metric you want by defining the fiel [inputs.file.json_v2.object.fields] # A map of JSON keys (for a nested key, prepend the parent keys with underscores) with a type (int,uint,float,string,bool) key = "int" ``` + --- + ### root config options * **measurement_name (OPTIONAL)**: Will set the measurement name to the provided string. @@ -86,26 +88,34 @@ The notable difference between `field` and `tag`, is that `tag` values will alwa #### **field** -* **path (REQUIRED)**: You must define the path query that gathers the object with [GJSON Path Syntax](https://github.com/tidwall/gjson/blob/v1.7.5/SYNTAX.md). +Using this field configuration you can gather a non-array/non-object values. Note this acts as a global field when used with the `object` configuration, if you gather an array of metrics using `object` then the field gathered will be added to each resulting metric without acknowledging its location in the original JSON. This is defined in TOML as an array table using double brackets. + +* **path (REQUIRED)**: A string with valid GJSON path syntax to a non-array/non-object value * **name (OPTIONAL)**: You can define a string value to set the field name. If not defined it will use the trailing word from the provided query. * **type (OPTIONAL)**: You can define a string value to set the desired type (float, int, uint, string, bool). If not defined it won't enforce a type and default to using the original type defined in the JSON (bool, float, or string). #### **tag** -* **path (REQUIRED)**: You must define the path query that gathers the object with [GJSON Path Syntax](https://github.com/tidwall/gjson/blob/v1.7.5/SYNTAX.md). +Using this tag configuration you can gather a non-array/non-object values. Note this acts as a global tag when used with the `object` configuration, if you gather an array of metrics using `object` then the tag gathered will be added to each resulting metric without acknowledging its location in the original JSON. This is defined in TOML as an array table using double brackets. + +* **path (REQUIRED)**: A string with valid GJSON path syntax to a non-array/non-object value * **name (OPTIONAL)**: You can define a string value to set the field name. If not defined it will use the trailing word from the provided query. For good examples in using `field` and `tag` you can reference the following example configs: -* [fields_and_tags](testdata/fields_and_tags/telegraf.conf) --- + ### object -With the configuration section `object`, you can gather metrics from [JSON objects](https://www.w3schools.com/js/js_json_objects.asp). +With the configuration section `object`, you can gather metrics from [JSON objects](https://www.w3schools.com/js/js_json_objects.asp). This is defined in TOML as an array table using double brackets. + -The following keys can be set for `object`: +#### The following keys can be set for `object` * **path (REQUIRED)**: You must define the path query that gathers the object with [GJSON Path Syntax](https://github.com/tidwall/gjson/blob/v1.7.5/SYNTAX.md) + +*Keys to define what JSON keys should be used as timestamps:* + * **timestamp_key(OPTIONAL)**: You can define a json key (for a nested key, prepend the parent keys with underscores) for the value to be set as the timestamp from the JSON input. * **timestamp_format (OPTIONAL, but REQUIRED when timestamp_query is defined**: Must be set to `unix`, `unix_ms`, `unix_us`, `unix_ns`, or the Go "reference time" which is defined to be the specific time: @@ -113,12 +123,20 @@ the Go "reference time" which is defined to be the specific time: * **timestamp_timezone (OPTIONAL, but REQUIRES timestamp_query**: This option should be set to a [Unix TZ value](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones), such as `America/New_York`, to `Local` to utilize the system timezone, or to `UTC`. Defaults to `UTC` -* **disable_prepend_keys (OPTIONAL)**: Set to true to prevent resulting nested data to contain the parent key prepended to its key **NOTE**: duplicate names can overwrite each other when this is enabled + +*Configuration to define what JSON keys should be included and how (field/tag):* + * **included_keys (OPTIONAL)**: You can define a list of key's that should be the only data included in the metric, by default it will include everything. * **excluded_keys (OPTIONAL)**: You can define json keys to be excluded in the metric, for a nested key, prepend the parent keys with underscores * **tags (OPTIONAL)**: You can define json keys to be set as tags instead of fields, if you define a key that is an array or object then all nested values will become a tag -* **renames (OPTIONAL)**: A table matching the json key with the desired name (oppossed to defaulting to using the key), use names that include the prepended keys of its parent keys for nested results -* **fields (OPTIONAL)**: A table matching the json key with the desired type (int,string,bool,float), if you define a key that is an array or object then all nested values will become that type +* **field (OPTIONAL, defined in TOML as an array table using double brackets)**: Identical to the [field](#field) table you can define, but with two key differences. The path supports arrays and objects and is defined under the object table and therefore will adhere to how the JSON is structured. You want to use this if you want the field/tag to be added as it would if it were in the included_key list, but then use the GJSON path syntax. +* **tag (OPTIONAL, defined in TOML as an array table using double brackets)**: Identical to the [tag](#tag) table you can define, but with two key differences. The path supports arrays and objects and is defined under the object table and therefore will adhere to how the JSON is structured. You want to use this if you want the field/tag to be added as it would if it were in the included_key list, but then use the GJSON path syntax. + +*Configuration to modify the resutling line protocol:* + +* **disable_prepend_keys (OPTIONAL)**: Set to true to prevent resulting nested data to contain the parent key prepended to its key **NOTE**: duplicate names can overwrite each other when this is enabled +* **renames (OPTIONAL, defined in TOML as a table using single bracket)**: A table matching the json key with the desired name (oppossed to defaulting to using the key), use names that include the prepended keys of its parent keys for nested results +* **fields (OPTIONAL, defined in TOML as a table using single bracket)**: A table matching the json key with the desired type (int,string,bool,float), if you define a key that is an array or object then all nested values will become that type ## Arrays and Objects @@ -201,6 +219,3 @@ The type values you can set: * `string`, any data can be formatted as a string. * `float`, string values (with valid numbers) or integers can be converted to a float. * `bool`, the string values "true" or "false" (regardless of capitalization) or the integer values `0` or `1` can be turned to a bool. - -## The JSON path formats: GJSON, prepended keys, and limited GJSON - diff --git a/plugins/parsers/json_v2/parser.go b/plugins/parsers/json_v2/parser.go index a1562b89ca5f9..b05cd39e6ea40 100644 --- a/plugins/parsers/json_v2/parser.go +++ b/plugins/parsers/json_v2/parser.go @@ -13,6 +13,7 @@ import ( ) type Parser struct { + InputJSON []byte Configs []Config DefaultTags map[string]string Log telegraf.Logger @@ -22,13 +23,13 @@ type Parser struct { iterateObjects bool - currentSettings JSONObject - fieldPathResults []PathResult - tagPathResults []PathResult + currentSettings JSONObject + pathResults []PathResult } type PathResult struct { result gjson.Result + tag bool DataSet } @@ -66,19 +67,21 @@ type JSONObject struct { } type MetricNode struct { - ParentIndex int - OutputName string - SetName string - Tag bool - DesiredType string // Can be "int", "uint", "float", "bool", "string" + ParentIndex int + OutputName string + SetName string + Tag bool + DesiredType string // Can be "int", "uint", "float", "bool", "string" + IncludeCollection *PathResult // If set to true, it should be auto included Metric telegraf.Metric gjson.Result } func (p *Parser) Parse(input []byte) ([]telegraf.Metric, error) { + p.InputJSON = input // Only valid JSON is supported - if !gjson.Valid(string(input)) { + if !gjson.Valid(string(p.InputJSON)) { return nil, fmt.Errorf("Invalid JSON provided, unable to parse") } @@ -88,7 +91,7 @@ func (p *Parser) Parse(input []byte) ([]telegraf.Metric, error) { // Measurement name configuration p.measurementName = c.MeasurementName if c.MeasurementNamePath != "" { - result := gjson.GetBytes(input, c.MeasurementNamePath) + result := gjson.GetBytes(p.InputJSON, c.MeasurementNamePath) if !result.IsArray() && !result.IsObject() { p.measurementName = result.String() } @@ -97,7 +100,7 @@ func (p *Parser) Parse(input []byte) ([]telegraf.Metric, error) { // Timestamp configuration p.Timestamp = time.Now() if c.TimestampPath != "" { - result := gjson.GetBytes(input, c.TimestampPath) + result := gjson.GetBytes(p.InputJSON, c.TimestampPath) if !result.IsArray() && !result.IsObject() { if c.TimestampFormat == "" { err := fmt.Errorf("use of 'timestamp_query' requires 'timestamp_format'") @@ -112,17 +115,17 @@ func (p *Parser) Parse(input []byte) ([]telegraf.Metric, error) { } } - fields, err := p.processMetric(c.Fields, input, false) + fields, err := p.processMetric(c.Fields, false) if err != nil { return nil, err } - tags, err := p.processMetric(c.Tags, input, true) + tags, err := p.processMetric(c.Tags, true) if err != nil { return nil, err } - objects, err := p.processObjects(c.JSONObjects, input) + objects, err := p.processObjects(c.JSONObjects) if err != nil { return nil, err } @@ -148,7 +151,7 @@ func (p *Parser) Parse(input []byte) ([]telegraf.Metric, error) { // processMetric will iterate over all 'field' or 'tag' configs and create metrics for each // A field/tag can either be a single value or an array of values, each resulting in its own metric // For multiple configs, a set of metrics is created from the cartesian product of each separate config -func (p *Parser) processMetric(data []DataSet, input []byte, tag bool) ([]telegraf.Metric, error) { +func (p *Parser) processMetric(data []DataSet, tag bool) ([]telegraf.Metric, error) { if len(data) == 0 { return nil, nil } @@ -160,7 +163,7 @@ func (p *Parser) processMetric(data []DataSet, input []byte, tag bool) ([]telegr if c.Path == "" { return nil, fmt.Errorf("GJSON path is required") } - result := gjson.GetBytes(input, c.Path) + result := gjson.GetBytes(p.InputJSON, c.Path) if result.IsObject() { p.Log.Debugf("Found object in the path: %s, ignoring it please use 'object' to gather metrics from objects", c.Path) @@ -244,6 +247,9 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { p.Log.Debugf("Found object in query ignoring it please use 'object' to gather metrics from objects") return results, nil } + if result.IncludeCollection == nil && (len(p.currentSettings.FieldPaths) > 0 || len(p.currentSettings.TagPaths) > 0) { + result.IncludeCollection = p.checkIfIncludedCollection(result.Index, result.Raw) + } r, err := p.combineObject(result) if err != nil { return nil, err @@ -254,6 +260,9 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { if result.IsArray() { var err error + if result.IncludeCollection == nil && (len(p.currentSettings.FieldPaths) > 0 || len(p.currentSettings.TagPaths) > 0) { + result.IncludeCollection = p.checkIfIncludedCollection(result.Index, result.Raw) + } result.ForEach(func(_, val gjson.Result) bool { m := metric.New( p.measurementName, @@ -261,14 +270,14 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { map[string]interface{}{}, p.Timestamp, ) - if val.IsObject() { if p.iterateObjects { - n := MetricNode{ - ParentIndex: result.ParentIndex + val.Index, - SetName: result.SetName, - Metric: m, - Result: val, + n := result + n.ParentIndex += val.Index + n.Metric = m + n.Result = val + if n.IncludeCollection == nil && (len(p.currentSettings.FieldPaths) > 0 || len(p.currentSettings.TagPaths) > 0) { + n.IncludeCollection = p.checkIfIncludedCollection(n.Index, n.Raw) } r, err := p.combineObject(n) if err != nil { @@ -293,14 +302,12 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { for _, f := range result.Metric.TagList() { m.AddTag(f.Key, f.Value) } - n := MetricNode{ - ParentIndex: result.ParentIndex + val.Index, - Tag: result.Tag, - DesiredType: result.DesiredType, - OutputName: result.OutputName, - SetName: result.SetName, - Metric: m, - Result: val, + n := result + n.ParentIndex += val.Index + n.Metric = m + n.Result = val + if n.IncludeCollection == nil && (len(p.currentSettings.FieldPaths) > 0 || len(p.currentSettings.TagPaths) > 0) { + n.IncludeCollection = p.checkIfIncludedCollection(n.Index, n.Raw) } r, err := p.expandArray(n) if err != nil { @@ -331,28 +338,30 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { desiredType := result.DesiredType if len(p.currentSettings.FieldPaths) > 0 || len(p.currentSettings.TagPaths) > 0 { - fieldDataSet := existsInPathResults(result.ParentIndex, p.fieldPathResults) - tagDataSet := existsInPathResults(result.ParentIndex, p.tagPathResults) - if fieldDataSet == nil && tagDataSet == nil { + var pathResult *PathResult + if result.IncludeCollection != nil { + pathResult = result.IncludeCollection + } else { + pathResult = p.existsInpathResults(result.ParentIndex, result.Raw) + } + if pathResult == nil { return results, nil } - if tagDataSet != nil { + if pathResult.tag { result.Tag = true - if tagDataSet.Rename != "" { - outputName = tagDataSet.Rename - } } - if fieldDataSet != nil { - desiredType = fieldDataSet.Type - if fieldDataSet.Rename != "" { - outputName = fieldDataSet.Rename - } + if !pathResult.tag { + desiredType = pathResult.Type + } + + if pathResult.Rename != "" { + outputName = pathResult.Rename } } if result.Tag { - result.DesiredType = "string" + desiredType = "string" } v, err := p.convertType(result.Result, result.DesiredType, result.SetName) if err != nil { @@ -372,23 +381,38 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { return results, nil } -func existsInPathResults(index int, indexList []PathResult) *DataSet { - for _, f := range indexList { +func (p *Parser) existsInpathResults(index int, raw string) *PathResult { + for _, f := range p.pathResults { if f.result.Index == 0 { for _, i := range f.result.HashtagIndexes { - if index == i { - return &f.DataSet + if i == index { + return &f } } } else if f.result.Index == index { - return &f.DataSet + return &f + } + } + return nil +} + +func (p *Parser) checkIfIncludedCollection(index int, raw string) *PathResult { + for _, f := range p.pathResults { + if f.result.Index == 0 { + for _, i := range f.result.HashtagIndexes { + if string(p.InputJSON[i:i+len(raw)]) == raw { + return &f + } + } + } else if f.result.Index == index { + return &f } } return nil } // processObjects will iterate over all 'object' configs and create metrics for each -func (p *Parser) processObjects(objects []JSONObject, input []byte) ([]telegraf.Metric, error) { +func (p *Parser) processObjects(objects []JSONObject) ([]telegraf.Metric, error) { p.iterateObjects = true var t []telegraf.Metric for _, c := range objects { @@ -397,7 +421,7 @@ func (p *Parser) processObjects(objects []JSONObject, input []byte) ([]telegraf. if c.Path == "" { return nil, fmt.Errorf("GJSON path is required") } - result := gjson.GetBytes(input, c.Path) + result := gjson.GetBytes(p.InputJSON, c.Path) // hastag doesn't return index! idea: replace all hastags with index, and find lenght of array scopedJSON := []byte(result.Raw) @@ -405,14 +429,15 @@ func (p *Parser) processObjects(objects []JSONObject, input []byte) ([]telegraf. var r PathResult r.result = gjson.GetBytes(scopedJSON, f.Path) r.DataSet = f - p.fieldPathResults = append(p.fieldPathResults, r) + p.pathResults = append(p.pathResults, r) } for _, f := range c.TagPaths { var r PathResult r.result = gjson.GetBytes(scopedJSON, f.Path) r.DataSet = f - p.tagPathResults = append(p.tagPathResults, r) + r.tag = true + p.pathResults = append(p.pathResults, r) } if result.Type == gjson.Null { @@ -471,15 +496,11 @@ func (p *Parser) combineObject(result MetricNode) ([]telegraf.Metric, error) { } } - arrayNode := MetricNode{ - ParentIndex: result.ParentIndex + val.Index, - DesiredType: result.DesiredType, - Tag: result.Tag, - OutputName: outputName, - SetName: setName, - Metric: result.Metric, - Result: val, - } + arrayNode := result + arrayNode.ParentIndex += val.Index + arrayNode.OutputName = outputName + arrayNode.SetName = setName + arrayNode.Result = val for k, t := range p.currentSettings.Fields { if setName == k { diff --git a/plugins/parsers/json_v2/parser_test.go b/plugins/parsers/json_v2/parser_test.go index 7b34b83c0af8a..83420a9c0c985 100644 --- a/plugins/parsers/json_v2/parser_test.go +++ b/plugins/parsers/json_v2/parser_test.go @@ -28,6 +28,14 @@ func TestData(t *testing.T) { name: "Test having an array of objects", test: "array_of_objects", }, + { + name: "A second test when selecting with sub field and tags", + test: "subfieldtag_in_object_2", + }, + { + name: "Test selecting with sub field and tags", + test: "subfieldtag_in_object", + }, { name: "Test using just fields and tags", test: "fields_and_tags", diff --git a/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/expected.out b/plugins/parsers/json_v2/testdata/subfieldtag_in_object/expected.out similarity index 100% rename from plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/expected.out rename to plugins/parsers/json_v2/testdata/subfieldtag_in_object/expected.out diff --git a/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/input.json b/plugins/parsers/json_v2/testdata/subfieldtag_in_object/input.json similarity index 100% rename from plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/input.json rename to plugins/parsers/json_v2/testdata/subfieldtag_in_object/input.json diff --git a/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf b/plugins/parsers/json_v2/testdata/subfieldtag_in_object/telegraf.conf similarity index 63% rename from plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf rename to plugins/parsers/json_v2/testdata/subfieldtag_in_object/telegraf.conf index 634bd4248dfe2..7a8a283d77c3d 100644 --- a/plugins/parsers/json_v2/testdata/test_subfieldtag_in_object/telegraf.conf +++ b/plugins/parsers/json_v2/testdata/subfieldtag_in_object/telegraf.conf @@ -1,17 +1,17 @@ [[inputs.file]] - files = ["./testdata/test_subfieldtag_in_object/input.json"] + files = ["./testdata/subfieldtag_in_object/input.json"] data_format = "json_v2" [[inputs.file.json_v2]] [[inputs.file.json_v2.object]] path = "root.station" - [[inputs.file.json_v2.field]] + [[inputs.file.json_v2.object.field]] path = "#.etd.0.estimate.0.minutes" type = "int" - [[inputs.file.json_v2.tag]] + [[inputs.file.json_v2.object.tag]] path = "#.abbr" rename = "from_station" - [[inputs.file.json_v2.tag]] + [[inputs.file.json_v2.object.tag]] path = "#.etd.0.abbreviation" rename = "to_station" - [[inputs.file.json_v2.tag]] + [[inputs.file.json_v2.object.tag]] path = "#.etd.0.estimate.0.direction" diff --git a/plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/expected.out b/plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/expected.out new file mode 100644 index 0000000000000..89748967a1ee9 --- /dev/null +++ b/plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/expected.out @@ -0,0 +1,4 @@ +file,data=3 cnt=23i,format=0i +file,data=7 cnt=23i,format=0i +file,data=10 cnt=23i,format=0i +file,data=23 cnt=23i,format=0i diff --git a/plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/input.json b/plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/input.json new file mode 100644 index 0000000000000..62b768eae05a7 --- /dev/null +++ b/plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/input.json @@ -0,0 +1,10 @@ +{ + "cnt": 23, + "data": [ + 3, + 7, + 10, + 23 + ], + "format": 0 +} diff --git a/plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/telegraf.conf b/plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/telegraf.conf new file mode 100644 index 0000000000000..60d7d18da43d0 --- /dev/null +++ b/plugins/parsers/json_v2/testdata/subfieldtag_in_object_2/telegraf.conf @@ -0,0 +1,16 @@ +# Example taken from: https://github.com/influxdata/telegraf/issues/5940 + +[[inputs.file]] + files = ["./testdata/subfieldtag_in_object_2/input.json"] + data_format = "json_v2" + [[inputs.file.json_v2]] + [[inputs.file.json_v2.object]] + path = "@this" + [[inputs.file.json_v2.object.tag]] + path = "data" + [[inputs.file.json_v2.object.field]] + path = "cnt" + type = "int" + [[inputs.file.json_v2.object.field]] + path = "format" + type = "int" From 139942480405a0ff2bea0a53bb8583bac6dec169 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Fri, 2 Jul 2021 15:45:59 -0500 Subject: [PATCH 06/14] Clean up README --- plugins/parsers/json_v2/README.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/plugins/parsers/json_v2/README.md b/plugins/parsers/json_v2/README.md index f07aa2f406eea..d1e2e9c407255 100644 --- a/plugins/parsers/json_v2/README.md +++ b/plugins/parsers/json_v2/README.md @@ -1,10 +1,10 @@ # JSON Parser - Version 2 -This parser takes valid JSON input and turns it into metrics. The query syntax supported is [GJSON Path Syntax](https://github.com/tidwall/gjson/blob/v1.7.5/SYNTAX.md), you can go to this playground to test out your GJSON path here: https://gjson.dev/. You can find multiple examples under the `testdata` folder. +This parser takes valid JSON input and turns it into line protocol. The query syntax supported is [GJSON Path Syntax](https://github.com/tidwall/gjson/blob/v1.7.5/SYNTAX.md), you can go to this playground to test out your GJSON path here: https://gjson.dev/. You can find multiple examples under the `testdata` folder. ## Configuration -You configure this parser by describing the metric you want by defining the fields and tags from the input. The configuration is divided into config sub-tables called `field`, `tag`, and `object`. In the example below you can see all the possible configuration keys you can define for each config table. In the sections that follow these configuration keys are defined in more detail. +You configure this parser by describing the line protocol you want by defining the fields and tags from the input. The configuration is divided into config sub-tables called `field`, `tag`, and `object`. In the example below you can see all the possible configuration keys you can define for each config table. In the sections that follow these configuration keys are defined in more detail. **Example configuration:** @@ -74,7 +74,7 @@ such as `America/New_York`, to `Local` to utilize the system timezone, or to `UT ### `field` and `tag` config options -`field` and `tag` represent the elements of [line protocol](https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/), which is used to define a `metric`. You can use the `field` and `tag` config tables to gather a single value or an array of values that all share the same type and name. With this you can add a field or tag to a metric from data stored anywhere in your JSON. If you define the GJSON path to return a single value then you will get a single resutling metric that contains the field/tag. If you define the GJSON path to return an array of values, then each field/tag will be put into a separate metric (you use the # character to retrieve JSON arrays, find examples [here](https://github.com/tidwall/gjson/blob/v1.7.5/SYNTAX.md#arrays)). +`field` and `tag` represent the elements of [line protocol](https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/). You can use the `field` and `tag` config tables to gather a single value or an array of values that all share the same type and name. With this you can add a field or tag to a line protocol from data stored anywhere in your JSON. If you define the GJSON path to return a single value then you will get a single resutling line protocol that contains the field/tag. If you define the GJSON path to return an array of values, then each field/tag will be put into a separate line protocol (you use the # character to retrieve JSON arrays, find examples [here](https://github.com/tidwall/gjson/blob/v1.7.5/SYNTAX.md#arrays)). Note that objects are handled separately, therefore if you provide a path that returns a object it will be ignored. You will need use the `object` config table to parse objects, because `field` and `tag` doesn't handle relationships between data. Each `field` and `tag` you define is handled as a separate data point. @@ -88,7 +88,7 @@ The notable difference between `field` and `tag`, is that `tag` values will alwa #### **field** -Using this field configuration you can gather a non-array/non-object values. Note this acts as a global field when used with the `object` configuration, if you gather an array of metrics using `object` then the field gathered will be added to each resulting metric without acknowledging its location in the original JSON. This is defined in TOML as an array table using double brackets. +Using this field configuration you can gather a non-array/non-object values. Note this acts as a global field when used with the `object` configuration, if you gather an array of values using `object` then the field gathered will be added to each resulting line protocol without acknowledging its location in the original JSON. This is defined in TOML as an array table using double brackets. * **path (REQUIRED)**: A string with valid GJSON path syntax to a non-array/non-object value * **name (OPTIONAL)**: You can define a string value to set the field name. If not defined it will use the trailing word from the provided query. @@ -96,7 +96,8 @@ Using this field configuration you can gather a non-array/non-object values. Not #### **tag** -Using this tag configuration you can gather a non-array/non-object values. Note this acts as a global tag when used with the `object` configuration, if you gather an array of metrics using `object` then the tag gathered will be added to each resulting metric without acknowledging its location in the original JSON. This is defined in TOML as an array table using double brackets. +Using this tag configuration you can gather a non-array/non-object values. Note this acts as a global tag when used with the `object` configuration, if you gather an array of values using `object` then the tag gathered will be added to each resulting line protocol without acknowledging its location in the original JSON. This is defined in TOML as an array table using double brackets. + * **path (REQUIRED)**: A string with valid GJSON path syntax to a non-array/non-object value * **name (OPTIONAL)**: You can define a string value to set the field name. If not defined it will use the trailing word from the provided query. @@ -107,8 +108,7 @@ For good examples in using `field` and `tag` you can reference the following exa ### object -With the configuration section `object`, you can gather metrics from [JSON objects](https://www.w3schools.com/js/js_json_objects.asp). This is defined in TOML as an array table using double brackets. - +With the configuration section `object`, you can gather values from [JSON objects](https://www.w3schools.com/js/js_json_objects.asp). This is defined in TOML as an array table using double brackets. #### The following keys can be set for `object` @@ -126,8 +126,8 @@ such as `America/New_York`, to `Local` to utilize the system timezone, or to `UT *Configuration to define what JSON keys should be included and how (field/tag):* -* **included_keys (OPTIONAL)**: You can define a list of key's that should be the only data included in the metric, by default it will include everything. -* **excluded_keys (OPTIONAL)**: You can define json keys to be excluded in the metric, for a nested key, prepend the parent keys with underscores +* **included_keys (OPTIONAL)**: You can define a list of key's that should be the only data included in the line protocol, by default it will include everything. +* **excluded_keys (OPTIONAL)**: You can define json keys to be excluded in the line protocol, for a nested key, prepend the parent keys with underscores * **tags (OPTIONAL)**: You can define json keys to be set as tags instead of fields, if you define a key that is an array or object then all nested values will become a tag * **field (OPTIONAL, defined in TOML as an array table using double brackets)**: Identical to the [field](#field) table you can define, but with two key differences. The path supports arrays and objects and is defined under the object table and therefore will adhere to how the JSON is structured. You want to use this if you want the field/tag to be added as it would if it were in the included_key list, but then use the GJSON path syntax. * **tag (OPTIONAL, defined in TOML as an array table using double brackets)**: Identical to the [tag](#tag) table you can define, but with two key differences. The path supports arrays and objects and is defined under the object table and therefore will adhere to how the JSON is structured. You want to use this if you want the field/tag to be added as it would if it were in the included_key list, but then use the GJSON path syntax. @@ -142,11 +142,11 @@ such as `America/New_York`, to `Local` to utilize the system timezone, or to `UT The following describes the high-level approach when parsing arrays and objects: -**Array**: Every element in an array is treated as a *separate* metric +**Array**: Every element in an array is treated as a *separate* line protocol -**Object**: Every key/value in a object is treated as a *single* metric +**Object**: Every key/value in a object is treated as a *single* line protocol -When handling nested arrays and objects, these above rules continue to apply as the parser creates metrics. When an object has multiple array's as values, the array's will become separate metrics containing only non-array values from the obejct. Below you can see an example of this behavior, with an input json containing an array of book objects that has a nested array of characters. +When handling nested arrays and objects, these above rules continue to apply as the parser creates line protocol. When an object has multiple array's as values, the array's will become separate line protocol containing only non-array values from the obejct. Below you can see an example of this behavior, with an input json containing an array of book objects that has a nested array of characters. Example JSON: @@ -191,7 +191,7 @@ Example configuration: disable_prepend_keys = true ``` -Expected metrics: +Expected line protocol: ``` file,title=The\ Lord\ Of\ The\ Rings author="Tolkien",chapters="A Long-expected Party" @@ -207,7 +207,7 @@ You can find more complicated examples under the folder `testdata`. ## Types -For each field you have the option to define the types for each metric. The following rules are in place for this configuration: +For each field you have the option to define the types. The following rules are in place for this configuration: * If a type is explicitly defined, the parser will enforce this type and convert the data to the defined type if possible. If the type can't be converted then the parser will fail. * If a type isn't defined, the parser will use the default type defined in the JSON (int, float, string) From 0d8d15277a89129d402511a386a88252c7203468 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Fri, 2 Jul 2021 19:22:14 -0500 Subject: [PATCH 07/14] Remove old function --- plugins/parsers/json_v2/parser.go | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/plugins/parsers/json_v2/parser.go b/plugins/parsers/json_v2/parser.go index b05cd39e6ea40..8af47608f2252 100644 --- a/plugins/parsers/json_v2/parser.go +++ b/plugins/parsers/json_v2/parser.go @@ -248,7 +248,7 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { return results, nil } if result.IncludeCollection == nil && (len(p.currentSettings.FieldPaths) > 0 || len(p.currentSettings.TagPaths) > 0) { - result.IncludeCollection = p.checkIfIncludedCollection(result.Index, result.Raw) + result.IncludeCollection = p.existsInpathResults(result.Index, result.Raw) } r, err := p.combineObject(result) if err != nil { @@ -261,7 +261,7 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { if result.IsArray() { var err error if result.IncludeCollection == nil && (len(p.currentSettings.FieldPaths) > 0 || len(p.currentSettings.TagPaths) > 0) { - result.IncludeCollection = p.checkIfIncludedCollection(result.Index, result.Raw) + result.IncludeCollection = p.existsInpathResults(result.Index, result.Raw) } result.ForEach(func(_, val gjson.Result) bool { m := metric.New( @@ -277,7 +277,7 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { n.Metric = m n.Result = val if n.IncludeCollection == nil && (len(p.currentSettings.FieldPaths) > 0 || len(p.currentSettings.TagPaths) > 0) { - n.IncludeCollection = p.checkIfIncludedCollection(n.Index, n.Raw) + n.IncludeCollection = p.existsInpathResults(n.Index, n.Raw) } r, err := p.combineObject(n) if err != nil { @@ -307,7 +307,7 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { n.Metric = m n.Result = val if n.IncludeCollection == nil && (len(p.currentSettings.FieldPaths) > 0 || len(p.currentSettings.TagPaths) > 0) { - n.IncludeCollection = p.checkIfIncludedCollection(n.Index, n.Raw) + n.IncludeCollection = p.existsInpathResults(n.Index, n.Raw) } r, err := p.expandArray(n) if err != nil { @@ -396,21 +396,6 @@ func (p *Parser) existsInpathResults(index int, raw string) *PathResult { return nil } -func (p *Parser) checkIfIncludedCollection(index int, raw string) *PathResult { - for _, f := range p.pathResults { - if f.result.Index == 0 { - for _, i := range f.result.HashtagIndexes { - if string(p.InputJSON[i:i+len(raw)]) == raw { - return &f - } - } - } else if f.result.Index == index { - return &f - } - } - return nil -} - // processObjects will iterate over all 'object' configs and create metrics for each func (p *Parser) processObjects(objects []JSONObject) ([]telegraf.Metric, error) { p.iterateObjects = true From 97ace2e9d1ebae3055d588e8b7528e2e002ae5e9 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Fri, 2 Jul 2021 20:13:59 -0500 Subject: [PATCH 08/14] Add test --- plugins/parsers/json_v2/parser_test.go | 4 + .../testdata/multiple_json_input/expected.out | 2 + .../testdata/multiple_json_input/input_1.json | 87 ++++++++++++ .../testdata/multiple_json_input/input_2.json | 134 ++++++++++++++++++ .../multiple_json_input/telegraf.conf | 18 +++ 5 files changed, 245 insertions(+) create mode 100644 plugins/parsers/json_v2/testdata/multiple_json_input/expected.out create mode 100644 plugins/parsers/json_v2/testdata/multiple_json_input/input_1.json create mode 100644 plugins/parsers/json_v2/testdata/multiple_json_input/input_2.json create mode 100644 plugins/parsers/json_v2/testdata/multiple_json_input/telegraf.conf diff --git a/plugins/parsers/json_v2/parser_test.go b/plugins/parsers/json_v2/parser_test.go index 83420a9c0c985..4dc12c7ab3a1f 100644 --- a/plugins/parsers/json_v2/parser_test.go +++ b/plugins/parsers/json_v2/parser_test.go @@ -28,6 +28,10 @@ func TestData(t *testing.T) { name: "Test having an array of objects", test: "array_of_objects", }, + { + name: "test", + test: "multiple_json_input", + }, { name: "A second test when selecting with sub field and tags", test: "subfieldtag_in_object_2", diff --git a/plugins/parsers/json_v2/testdata/multiple_json_input/expected.out b/plugins/parsers/json_v2/testdata/multiple_json_input/expected.out new file mode 100644 index 0000000000000..f3fa9f0d8571c --- /dev/null +++ b/plugins/parsers/json_v2/testdata/multiple_json_input/expected.out @@ -0,0 +1,2 @@ +file,from_station=COLM,to_station=ANTC,etd_estimate_direction=North minutes=2i +file,from_station=POWL,to_station=DALY,etd_estimate_direction=South minutes=6i diff --git a/plugins/parsers/json_v2/testdata/multiple_json_input/input_1.json b/plugins/parsers/json_v2/testdata/multiple_json_input/input_1.json new file mode 100644 index 0000000000000..f60cd59f91247 --- /dev/null +++ b/plugins/parsers/json_v2/testdata/multiple_json_input/input_1.json @@ -0,0 +1,87 @@ +{ + "?xml": { + "@version": "1.0", + "@encoding": "utf-8" + }, + "root": { + "@id": "1", + "uri": { + "#cdata-section": "http://api.bart.gov/api/etd.aspx?cmd=etd&orig=COLM&dir=n&json=y" + }, + "date": "07/02/2021", + "time": "06:05:47 PM PDT", + "station": [ + { + "name": "Colma", + "abbr": "COLM", + "etd": [ + { + "destination": "Antioch", + "abbreviation": "ANTC", + "limited": "0", + "estimate": [ + { + "minutes": "2", + "platform": "2", + "direction": "North", + "length": "10", + "color": "YELLOW", + "hexcolor": "#ffff33", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "16", + "platform": "2", + "direction": "North", + "length": "10", + "color": "YELLOW", + "hexcolor": "#ffff33", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "31", + "platform": "2", + "direction": "North", + "length": "10", + "color": "YELLOW", + "hexcolor": "#ffff33", + "bikeflag": "1", + "delay": "0" + } + ] + }, + { + "destination": "Richmond", + "abbreviation": "RICH", + "limited": "0", + "estimate": [ + { + "minutes": "22", + "platform": "2", + "direction": "North", + "length": "10", + "color": "RED", + "hexcolor": "#ff0000", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "52", + "platform": "2", + "direction": "North", + "length": "10", + "color": "RED", + "hexcolor": "#ff0000", + "bikeflag": "1", + "delay": "0" + } + ] + } + ] + } + ], + "message": "" + } +} diff --git a/plugins/parsers/json_v2/testdata/multiple_json_input/input_2.json b/plugins/parsers/json_v2/testdata/multiple_json_input/input_2.json new file mode 100644 index 0000000000000..e75e84a093b37 --- /dev/null +++ b/plugins/parsers/json_v2/testdata/multiple_json_input/input_2.json @@ -0,0 +1,134 @@ +{ + "?xml": { + "@version": "1.0", + "@encoding": "utf-8" + }, + "root": { + "@id": "1", + "uri": { + "#cdata-section": "http://api.bart.gov/api/etd.aspx?cmd=etd&orig=POWL&dir=s&json=y" + }, + "date": "07/02/2021", + "time": "06:06:01 PM PDT", + "station": [ + { + "name": "Powell St.", + "abbr": "POWL", + "etd": [ + { + "destination": "Daly City", + "abbreviation": "DALY", + "limited": "0", + "estimate": [ + { + "minutes": "6", + "platform": "1", + "direction": "South", + "length": "10", + "color": "GREEN", + "hexcolor": "#339933", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "26", + "platform": "1", + "direction": "South", + "length": "9", + "color": "BLUE", + "hexcolor": "#0099cc", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "36", + "platform": "1", + "direction": "South", + "length": "10", + "color": "GREEN", + "hexcolor": "#339933", + "bikeflag": "1", + "delay": "0" + } + ] + }, + { + "destination": "Millbrae", + "abbreviation": "MLBR", + "limited": "0", + "estimate": [ + { + "minutes": "19", + "platform": "1", + "direction": "South", + "length": "10", + "color": "RED", + "hexcolor": "#ff0000", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "49", + "platform": "1", + "direction": "South", + "length": "10", + "color": "RED", + "hexcolor": "#ff0000", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "79", + "platform": "1", + "direction": "South", + "length": "10", + "color": "RED", + "hexcolor": "#ff0000", + "bikeflag": "1", + "delay": "0" + } + ] + }, + { + "destination": "SF Airport", + "abbreviation": "SFIA", + "limited": "0", + "estimate": [ + { + "minutes": "7", + "platform": "1", + "direction": "South", + "length": "10", + "color": "YELLOW", + "hexcolor": "#ffff33", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "37", + "platform": "1", + "direction": "South", + "length": "10", + "color": "YELLOW", + "hexcolor": "#ffff33", + "bikeflag": "1", + "delay": "0" + }, + { + "minutes": "67", + "platform": "1", + "direction": "South", + "length": "10", + "color": "YELLOW", + "hexcolor": "#ffff33", + "bikeflag": "1", + "delay": "0" + } + ] + } + ] + } + ], + "message": "" + } +} diff --git a/plugins/parsers/json_v2/testdata/multiple_json_input/telegraf.conf b/plugins/parsers/json_v2/testdata/multiple_json_input/telegraf.conf new file mode 100644 index 0000000000000..96c8ede181a54 --- /dev/null +++ b/plugins/parsers/json_v2/testdata/multiple_json_input/telegraf.conf @@ -0,0 +1,18 @@ +[[inputs.file]] + files = ["./testdata/multiple_json_input/input_1.json", "./testdata/multiple_json_input/input_2.json"] + data_format = "json_v2" + [[inputs.file.json_v2]] + [[inputs.file.json_v2.object]] + path = "root.station" + [[inputs.file.json_v2.object.tag]] + path="#.abbr" + rename = "from_station" + [[inputs.file.json_v2.object.field]] + path = "#.etd.0.estimate.0.minutes" + rename = "minutes" + type = "int" + [[inputs.file.json_v2.object.tag]] + path = "#.etd.0.abbreviation" + rename = "to_station" + [[inputs.file.json_v2.object.tag]] + path = "#.etd.0.estimate.0.direction" From 531a94c5d08b24adc71363a175ccb1132187385c Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Wed, 21 Jul 2021 13:03:59 -0500 Subject: [PATCH 09/14] Add pr 9504 --- .../json_v2/testdata/multiple_arrays_in_object/expected.out | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/parsers/json_v2/testdata/multiple_arrays_in_object/expected.out b/plugins/parsers/json_v2/testdata/multiple_arrays_in_object/expected.out index 2948da1720f64..04cd0635a5497 100644 --- a/plugins/parsers/json_v2/testdata/multiple_arrays_in_object/expected.out +++ b/plugins/parsers/json_v2/testdata/multiple_arrays_in_object/expected.out @@ -6,4 +6,3 @@ file,title=The\ Lord\ Of\ The\ Rings author="Tolkien",chapters="The Shadow of th file,title=The\ Lord\ Of\ The\ Rings author="Tolkien",chapters="The Shadow of the Past",name="Bilbo",species="hobbit",random=2 file,title=The\ Lord\ Of\ The\ Rings author="Tolkien",chapters="The Shadow of the Past",name="Frodo",species="hobbit",random=1 file,title=The\ Lord\ Of\ The\ Rings author="Tolkien",chapters="The Shadow of the Past",name="Frodo",species="hobbit",random=2 - From 994f44e79f0630b56e31d4db09bf0b889b06243f Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Wed, 21 Jul 2021 13:08:32 -0500 Subject: [PATCH 10/14] rebase --- plugins/parsers/json_v2/parser.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/parsers/json_v2/parser.go b/plugins/parsers/json_v2/parser.go index 8af47608f2252..a1693b93ad09e 100644 --- a/plugins/parsers/json_v2/parser.go +++ b/plugins/parsers/json_v2/parser.go @@ -363,7 +363,7 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { if result.Tag { desiredType = "string" } - v, err := p.convertType(result.Result, result.DesiredType, result.SetName) + v, err := p.convertType(result.Result, desiredType, result.SetName) if err != nil { return nil, err } From 79e09b97d463fb69a227265f6bb43c58b72f5182 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Thu, 2 Sep 2021 17:43:47 -0500 Subject: [PATCH 11/14] feat: use latest gjson --- go.sum | 4 ++-- plugins/parsers/json_v2/parser.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.sum b/go.sum index 06e51339f5e3c..0a87acab21227 100644 --- a/go.sum +++ b/go.sum @@ -1506,8 +1506,6 @@ github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnIn github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/sspaink/gjson v1.8.1-0.20210701110044-7460ecfe6948 h1:zex3QSNZ1UZH4RReIi26TnMt+82mvuGwoIv+39cAW08= -github.com/sspaink/gjson v1.8.1-0.20210701110044-7460ecfe6948/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= github.com/stefanberger/go-pkcs11uri v0.0.0-20201008174630-78d3cae3a980/go.mod h1:AO3tvPzVZ/ayst6UlUKUv6rcPQInYe3IknH3jYhAKu8= github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271 h1:WhxRHzgeVGETMlmVfqhRn8RIeeNoPr2Czh33I4Zdccw= @@ -1541,6 +1539,8 @@ github.com/tidwall/gjson v1.8.0 h1:Qt+orfosKn0rbNTZqHYDqBrmm3UDA4KRkv70fDzG+PQ= github.com/tidwall/gjson v1.8.0/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= github.com/testcontainers/testcontainers-go v0.11.0 h1:HO5YOx2DYBHqcg4MzVWPj3FuHAv7USWVu94vCSsgiaM= github.com/testcontainers/testcontainers-go v0.11.0/go.mod h1:HztBCODzuA+YpMXGK8amjO8j50jz2gcT0BOzSKUiYIs= +github.com/tidwall/gjson v1.9.0 h1:+Od7AE26jAaMgVC31cQV/Ope5iKXulNMflrlB7k+F9E= +github.com/tidwall/gjson v1.9.0/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= github.com/tidwall/match v1.0.3 h1:FQUVvBImDutD8wJLN6c5eMzWtjgONK9MwIBCOrUJKeE= github.com/tidwall/match v1.0.3/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= diff --git a/plugins/parsers/json_v2/parser.go b/plugins/parsers/json_v2/parser.go index a1693b93ad09e..248f18c44213e 100644 --- a/plugins/parsers/json_v2/parser.go +++ b/plugins/parsers/json_v2/parser.go @@ -384,7 +384,7 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { func (p *Parser) existsInpathResults(index int, raw string) *PathResult { for _, f := range p.pathResults { if f.result.Index == 0 { - for _, i := range f.result.HashtagIndexes { + for _, i := range f.result.Indexes { if i == index { return &f } From e62d5364526ed49106fd9922212253888a4941d9 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Thu, 2 Sep 2021 17:49:51 -0500 Subject: [PATCH 12/14] feat: gjson v1.9.0 --- go.mod | 2 +- go.sum | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/go.mod b/go.mod index dc8b762d1e6d1..0999e764200ca 100644 --- a/go.mod +++ b/go.mod @@ -246,7 +246,7 @@ require ( github.com/stretchr/testify v1.7.0 github.com/tbrandon/mbserver v0.0.0-20170611213546-993e1772cc62 github.com/testcontainers/testcontainers-go v0.11.1 - github.com/tidwall/gjson v1.8.0 + github.com/tidwall/gjson v1.9.0 github.com/tidwall/match v1.0.3 // indirect github.com/tidwall/pretty v1.1.0 // indirect github.com/tinylib/msgp v1.1.6 diff --git a/go.sum b/go.sum index 0a87acab21227..6b60e06efb308 100644 --- a/go.sum +++ b/go.sum @@ -1535,10 +1535,6 @@ github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ github.com/tedsuo/ifrit v0.0.0-20180802180643-bea94bb476cc/go.mod h1:eyZnKCc955uh98WQvzOm0dgAeLnf2O0Rz0LPoC5ze+0= github.com/testcontainers/testcontainers-go v0.11.1 h1:FiYsB83LSGbiawoV8TpAZGfcCUbtaeeg1SXqEKUxh08= github.com/testcontainers/testcontainers-go v0.11.1/go.mod h1:/V0UVq+1e7NWYoqTPog179clf0Qp9TOyp4EcXaEFQz8= -github.com/tidwall/gjson v1.8.0 h1:Qt+orfosKn0rbNTZqHYDqBrmm3UDA4KRkv70fDzG+PQ= -github.com/tidwall/gjson v1.8.0/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= -github.com/testcontainers/testcontainers-go v0.11.0 h1:HO5YOx2DYBHqcg4MzVWPj3FuHAv7USWVu94vCSsgiaM= -github.com/testcontainers/testcontainers-go v0.11.0/go.mod h1:HztBCODzuA+YpMXGK8amjO8j50jz2gcT0BOzSKUiYIs= github.com/tidwall/gjson v1.9.0 h1:+Od7AE26jAaMgVC31cQV/Ope5iKXulNMflrlB7k+F9E= github.com/tidwall/gjson v1.9.0/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= github.com/tidwall/match v1.0.3 h1:FQUVvBImDutD8wJLN6c5eMzWtjgONK9MwIBCOrUJKeE= From 12d47485482b62327e9bc301a5c1efe1b670fcb4 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Thu, 16 Sep 2021 17:50:55 -0500 Subject: [PATCH 13/14] fix: test name --- plugins/parsers/json_v2/parser_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/parsers/json_v2/parser_test.go b/plugins/parsers/json_v2/parser_test.go index 4dc12c7ab3a1f..3ef08856190ac 100644 --- a/plugins/parsers/json_v2/parser_test.go +++ b/plugins/parsers/json_v2/parser_test.go @@ -29,7 +29,7 @@ func TestData(t *testing.T) { test: "array_of_objects", }, { - name: "test", + name: "Test having multiple JSON inputs", test: "multiple_json_input", }, { From 7b91a3026c5f423bc585aa7c2b1d90e4bdb593c7 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink Date: Thu, 16 Sep 2021 18:24:45 -0500 Subject: [PATCH 14/14] fix: improve comments --- plugins/parsers/json_v2/parser.go | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/plugins/parsers/json_v2/parser.go b/plugins/parsers/json_v2/parser.go index 248f18c44213e..f4f84c562e781 100644 --- a/plugins/parsers/json_v2/parser.go +++ b/plugins/parsers/json_v2/parser.go @@ -67,12 +67,16 @@ type JSONObject struct { } type MetricNode struct { - ParentIndex int - OutputName string - SetName string - Tag bool - DesiredType string // Can be "int", "uint", "float", "bool", "string" - IncludeCollection *PathResult // If set to true, it should be auto included + ParentIndex int + OutputName string + SetName string + Tag bool + DesiredType string // Can be "int", "uint", "float", "bool", "string" + /* + IncludeCollection is only used when processing objects and is responsible for containing the gjson results + found by the gjson paths provided in the FieldPaths and TagPaths configs. + */ + IncludeCollection *PathResult Metric telegraf.Metric gjson.Result @@ -339,9 +343,11 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { if len(p.currentSettings.FieldPaths) > 0 || len(p.currentSettings.TagPaths) > 0 { var pathResult *PathResult + // When IncludeCollection isn't nil, that means the current result is included in the collection. if result.IncludeCollection != nil { pathResult = result.IncludeCollection } else { + // Verify that the result should be included based on the results of fieldpaths and tag paths pathResult = p.existsInpathResults(result.ParentIndex, result.Raw) } if pathResult == nil { @@ -350,11 +356,9 @@ func (p *Parser) expandArray(result MetricNode) ([]telegraf.Metric, error) { if pathResult.tag { result.Tag = true } - if !pathResult.tag { desiredType = pathResult.Type } - if pathResult.Rename != "" { outputName = pathResult.Rename } @@ -408,7 +412,6 @@ func (p *Parser) processObjects(objects []JSONObject) ([]telegraf.Metric, error) } result := gjson.GetBytes(p.InputJSON, c.Path) - // hastag doesn't return index! idea: replace all hastags with index, and find lenght of array scopedJSON := []byte(result.Raw) for _, f := range c.FieldPaths { var r PathResult