From dbc476349bdd70e53e6f3844ac50212b18517dad Mon Sep 17 00:00:00 2001 From: ruflin Date: Tue, 19 Dec 2017 13:23:13 +1100 Subject: [PATCH 1/3] include_fields processor does not work with dotted keys The include_fields processor does not work in case the key contains a dot. See tests added here for this case. There are different solutions here: * Modify `GetValue` in common.MapStr to also look for keys with dots inside instead of directly walking the tree * Modifying jolokia fetcher to convert dot keys to objects The first change would be possible with adding ``` --- a/libbeat/common/mapstr.go +++ b/libbeat/common/mapstr.go @@ -296,6 +296,10 @@ func walkMap(key string, data MapStr, op mapStrOperation) (interface{}, error) { var err error keyParts := strings.Split(key, ".") + if v, exists := data[key]; exists { + return v, nil + } + ``` But that does not fully solve the problem. As in the processor the following code is used, the fields which are added after filtering to the filtered fields, will have a nesting without the docs (see Put method) ``` for _, field := range f.Fields { v, err := event.GetValue(field) if err == nil { _, err = filtered.Put(field, v) } // Ignore ErrKeyNotFound errors if err != nil && errors.Cause(err) != common.ErrKeyNotFound { errs = append(errs, err.Error()) } } ``` This Put would also cause other issues as we see in the event below. If it's convert to an object `classes` and the namespaces `classes.loaded` would conflict because one is an object and the other one isn't. I would have expected that this already causes an issue on the Elasticsearch side before filtering. ``` "jolokia":{ "metrics":{ "Metrics":{ "atomikos.nbTransactions":0.000000, "classes":18857.000000, "classes.loaded":19127.000000, "classes.unloaded":270.000000, "counter.servo.discoveryclient-httpclient_createnew":13542.000000, "counter.servo.discoveryclient-httpclient_delete":13534.000000, "uptime":406037194.000000 }, "java":{ "Threading":{ "DaemonThreadCount":37.000000, "ThreadCount":57.000000 } } } } ``` An alternative solution to the above is replacing the `.` in the names with `_` as we do for example for docker labels and other places with a similar issue. The downside of this is that filtering on fields is not straight forward as the names change between fetching from jolokia and in ES. See https://discuss.elastic.co/t/include-fields-problem/109809 --- libbeat/common/mapstr.go | 11 ++++ .../processors/actions/include_fields_test.go | 59 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 libbeat/processors/actions/include_fields_test.go diff --git a/libbeat/common/mapstr.go b/libbeat/common/mapstr.go index af49e9cb67ee..9d0313bf0f67 100644 --- a/libbeat/common/mapstr.go +++ b/libbeat/common/mapstr.go @@ -322,6 +322,17 @@ func walkMap(key string, data MapStr, op mapStrOperation) (interface{}, error) { var err error keyParts := strings.Split(key, ".") + // Only check for full key if key contains dots + if len(keyParts) > 1 { + // Converted dotted key in object to nested object + // This is needed for processors to support dotted keys + // The reason it needs to be readded is to also support deletion op type properly + if v, exists := data[key]; exists { + delete(data, key) + data.Put(key, v) + } + } + // Walk maps until reaching a leaf object. m := data for i, k := range keyParts[0 : len(keyParts)-1] { diff --git a/libbeat/processors/actions/include_fields_test.go b/libbeat/processors/actions/include_fields_test.go new file mode 100644 index 000000000000..5c581ab5d29c --- /dev/null +++ b/libbeat/processors/actions/include_fields_test.go @@ -0,0 +1,59 @@ +package actions + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/libbeat/beat" + "github.com/elastic/beats/libbeat/common" +) + +func TestIncludeFields(t *testing.T) { + + var tests = []struct { + Fields []string + Input common.MapStr + Output common.MapStr + }{ + { + Fields: []string{"test"}, + Input: common.MapStr{ + "hello": "world", + "test": 17, + }, + Output: common.MapStr{ + "test": 17, + }, + }, + { + Fields: []string{"test", "a.b"}, + Input: common.MapStr{ + "a.b": "b", + "a.c": "c", + "test": 17, + }, + Output: common.MapStr{ + "test": 17, + "a": common.MapStr{ + "b": "b", + }, + }, + }, + } + + for _, test := range tests { + p := includeFields{ + Fields: test.Fields, + } + + event := &beat.Event{ + Fields: test.Input, + } + + newEvent, err := p.Run(event) + assert.NoError(t, err) + + assert.Equal(t, test.Output, newEvent.Fields) + } +} From 87ede81d7c53141b05d2df44fa33e7b6815e73c7 Mon Sep 17 00:00:00 2001 From: ruflin Date: Mon, 19 Feb 2018 12:43:41 +1100 Subject: [PATCH 2/3] some refactoring Benchmark old to new walkMap Recursive WalkMap ``` BenchmarkWalkMap/Get-4 500000 3032 ns/op BenchmarkWalkMap/Put-4 300000 4131 ns/op BenchmarkWalkMap/HasKey-4 300000 4080 ns/op BenchmarkWalkMap/Delete-4 300000 4353 ns/op ``` Non recursive WalkMap ``` BenchmarkWalkMap/Get-4 500000 2890 ns/op BenchmarkWalkMap/Put-4 300000 4426 ns/op BenchmarkWalkMap/HasKey-4 300000 4025 ns/op BenchmarkWalkMap/Delete-4 300000 4568 ns/op ``` I did multiple runs and there was no clear winner. They seem to be very similar in execution time. --- libbeat/common/mapstr.go | 68 +++++++++++++++++++---- libbeat/common/mapstr_test.go | 100 ++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+), 10 deletions(-) diff --git a/libbeat/common/mapstr.go b/libbeat/common/mapstr.go index 9d0313bf0f67..dfac0408ddc5 100644 --- a/libbeat/common/mapstr.go +++ b/libbeat/common/mapstr.go @@ -318,20 +318,20 @@ func tryToMapStr(v interface{}) (MapStr, bool) { // walkMap walks the data MapStr to arrive at the value specified by the key. // The key is expressed in dot-notation (eg. x.y.z). When the key is found then // the given mapStrOperation is invoked. -func walkMap(key string, data MapStr, op mapStrOperation) (interface{}, error) { +func walkMap2(key string, data MapStr, op mapStrOperation) (interface{}, error) { var err error keyParts := strings.Split(key, ".") // Only check for full key if key contains dots - if len(keyParts) > 1 { - // Converted dotted key in object to nested object - // This is needed for processors to support dotted keys - // The reason it needs to be readded is to also support deletion op type properly - if v, exists := data[key]; exists { - delete(data, key) - data.Put(key, v) - } - } + //if len(keyParts) > 1 { + // // Converted dotted key in object to nested object + // // This is needed for processors to support dotted keys + // // The reason it needs to be readded is to also support deletion op type properly + // if v, exists := data[key]; exists { + // delete(data, key) + // data.Put(key, v) + // } + //} // Walk maps until reaching a leaf object. m := data @@ -341,6 +341,7 @@ func walkMap(key string, data MapStr, op mapStrOperation) (interface{}, error) { if op.CreateMissingKeys { newMap := MapStr{} m[k] = newMap + // Makes m the same as m[k] for the next iteration m = newMap continue } @@ -362,6 +363,53 @@ func walkMap(key string, data MapStr, op mapStrOperation) (interface{}, error) { return v, nil } +func walkMap(key string, data MapStr, op mapStrOperation) (interface{}, error) { + + keyParts := strings.SplitN(key, ".", 2) + _, exists := data[key] + + // If key matches or last keyParts or directly + if len(keyParts) == 1 || exists { + // Execute the mapStrOperator on the leaf object. + v, err := op.Do(keyParts[len(keyParts)-1], data) + if err != nil { + return nil, errors.Wrapf(err, "key=%v", key) + } + return v, nil + } + + k := keyParts[0] + _, keyPartExists := data[k] + if !keyPartExists { + if op.CreateMissingKeys { + newMap := MapStr{} + data[k] = newMap + // Makes m the same as m[k] for the next iteration + //data = newMap + } else { + return nil, errors.Wrapf(ErrKeyNotFound, "key=%v", strings.Join(keyParts[0:1], ".")) + } + } + + data, err := toMapStr(data[k]) + if err != nil { + return nil, errors.Wrapf(err, "key=%v", strings.Join(keyParts[0:1], ".")) + } + + return walkMap2(keyParts[1], data, op) + + //// Walk maps until reaching a leaf object. + //m := data + //k := keyParts[0] + // + //m, err = toMapStr(v) + //if err != nil { + // return nil, errors.Wrapf(err, "key=%v", strings.Join(keyParts[0:i+1], ".")) + //} + // + //return v, nil +} + // mapStrOperation types // These are static mapStrOperation types that store no state and are reusable. diff --git a/libbeat/common/mapstr_test.go b/libbeat/common/mapstr_test.go index ec45b1a5a78b..37d07ff03d98 100644 --- a/libbeat/common/mapstr_test.go +++ b/libbeat/common/mapstr_test.go @@ -60,6 +60,16 @@ func TestMapStrDeepUpdate(t *testing.T) { MapStr{"a": 1}, MapStr{"a": 1}, }, + { + MapStr{"a.b": 1}, + MapStr{"a": 1}, + MapStr{"a": 1, "a.b": 1}, + }, + { + MapStr{"a": 1}, + MapStr{"a.b": 1}, + MapStr{"a": 1, "a.b": 1}, + }, } for i, test := range tests { @@ -173,7 +183,9 @@ func TestHasKey(t *testing.T) { "c31": 1, "c32": 2, }, + "c4.f": 19, }, + "d.f": 1, } hasKey, err := m.HasKey("c.c2") @@ -191,6 +203,14 @@ func TestHasKey(t *testing.T) { hasKey, err = m.HasKey("dd") assert.Equal(nil, err) assert.Equal(false, hasKey) + + //hasKey, err = m.HasKey("d.f") + //assert.Equal(nil, err) + //assert.Equal(true, hasKey) + + /*hasKey, err = m.HasKey("c.c4.f") + assert.Equal(nil, err) + assert.Equal(true, hasKey)*/ } func TestMapStrPut(t *testing.T) { @@ -569,3 +589,83 @@ func BenchmarkMapStrLogging(b *testing.B) { logger.Infow("test", "mapstr", m) } } + +func BenchmarkWalkMap(b *testing.B) { + + b.Run("Get", func(b *testing.B) { + m := MapStr{ + "test": 15, + "hello": MapStr{ + "world": MapStr{ + "ok": "test", + }, + }, + "elastic": MapStr{ + "for": "search", + }, + } + b.ResetTimer() + + for i := 0; i < b.N; i++ { + m.GetValue("test.world.ok") + } + }) + + b.Run("Put", func(b *testing.B) { + + for i := 0; i < b.N; i++ { + m := MapStr{ + "test": 15, + "hello": MapStr{ + "world": MapStr{ + "ok": "test", + }, + }, + "elastic": MapStr{ + "for": "search", + }, + } + + m.Put("test.world.tt", 17) + } + }) + + b.Run("HasKey", func(b *testing.B) { + b.ResetTimer() + + for i := 0; i < b.N; i++ { + m := MapStr{ + "test": 15, + "hello": MapStr{ + "world": MapStr{ + "ok": "test", + }, + }, + "elastic": MapStr{ + "for": "search", + }, + } + m.HasKey("test.world.tt") + } + }) + + b.Run("Delete", func(b *testing.B) { + b.ResetTimer() + + for i := 0; i < b.N; i++ { + m := MapStr{ + "test": 15, + "hello": MapStr{ + "world": MapStr{ + "ok": "test", + }, + }, + "elastic": MapStr{ + "for": "search", + }, + } + m.Put("test.world.tt", 17) + } + }) + +} From f29de3459b6e51c657b0f77e20732a6f9ec04008 Mon Sep 17 00:00:00 2001 From: ruflin Date: Mon, 19 Feb 2018 13:01:31 +1100 Subject: [PATCH 3/3] more refactoring, add tests --- libbeat/common/mapstr.go | 90 ++++++++++------------------------------ 1 file changed, 21 insertions(+), 69 deletions(-) diff --git a/libbeat/common/mapstr.go b/libbeat/common/mapstr.go index dfac0408ddc5..26ca40089e44 100644 --- a/libbeat/common/mapstr.go +++ b/libbeat/common/mapstr.go @@ -318,96 +318,48 @@ func tryToMapStr(v interface{}) (MapStr, bool) { // walkMap walks the data MapStr to arrive at the value specified by the key. // The key is expressed in dot-notation (eg. x.y.z). When the key is found then // the given mapStrOperation is invoked. -func walkMap2(key string, data MapStr, op mapStrOperation) (interface{}, error) { - var err error - keyParts := strings.Split(key, ".") - - // Only check for full key if key contains dots - //if len(keyParts) > 1 { - // // Converted dotted key in object to nested object - // // This is needed for processors to support dotted keys - // // The reason it needs to be readded is to also support deletion op type properly - // if v, exists := data[key]; exists { - // delete(data, key) - // data.Put(key, v) - // } - //} - - // Walk maps until reaching a leaf object. - m := data - for i, k := range keyParts[0 : len(keyParts)-1] { - v, exists := m[k] - if !exists { - if op.CreateMissingKeys { - newMap := MapStr{} - m[k] = newMap - // Makes m the same as m[k] for the next iteration - m = newMap - continue - } - return nil, errors.Wrapf(ErrKeyNotFound, "key=%v", strings.Join(keyParts[0:i+1], ".")) - } - - m, err = toMapStr(v) - if err != nil { - return nil, errors.Wrapf(err, "key=%v", strings.Join(keyParts[0:i+1], ".")) - } - } - - // Execute the mapStrOperator on the leaf object. - v, err := op.Do(keyParts[len(keyParts)-1], m) - if err != nil { - return nil, errors.Wrapf(err, "key=%v", key) - } - - return v, nil -} - -func walkMap(key string, data MapStr, op mapStrOperation) (interface{}, error) { +func walkMapRecursive(key string, data MapStr, op mapStrOperation) (interface{}, error) { + // Splits up the key in two parts: full key and first part before the dot keyParts := strings.SplitN(key, ".", 2) _, exists := data[key] - // If key matches or last keyParts or directly + // If leave node or key exists directly if len(keyParts) == 1 || exists { // Execute the mapStrOperator on the leaf object. - v, err := op.Do(keyParts[len(keyParts)-1], data) + v, err := op.Do(key, data) if err != nil { - return nil, errors.Wrapf(err, "key=%v", key) + return nil, err } return v, nil } + // Checks if first part of the key exists k := keyParts[0] - _, keyPartExists := data[k] - if !keyPartExists { + _, keyExist := data[k] + if !keyExist { if op.CreateMissingKeys { - newMap := MapStr{} - data[k] = newMap - // Makes m the same as m[k] for the next iteration - //data = newMap + data[k] = MapStr{} } else { - return nil, errors.Wrapf(ErrKeyNotFound, "key=%v", strings.Join(keyParts[0:1], ".")) + return nil, ErrKeyNotFound } } data, err := toMapStr(data[k]) if err != nil { - return nil, errors.Wrapf(err, "key=%v", strings.Join(keyParts[0:1], ".")) + return nil, err } - return walkMap2(keyParts[1], data, op) - - //// Walk maps until reaching a leaf object. - //m := data - //k := keyParts[0] - // - //m, err = toMapStr(v) - //if err != nil { - // return nil, errors.Wrapf(err, "key=%v", strings.Join(keyParts[0:i+1], ".")) - //} - // - //return v, nil + return walkMapRecursive(keyParts[1], data, op) +} + +func walkMap(key string, data MapStr, op mapStrOperation) (interface{}, error) { + v, err := walkMapRecursive(key, data, op) + if err != nil { + // Add key to error + err = errors.Wrapf(err, "key=%v", key) + } + return v, err } // mapStrOperation types