Skip to content

Commit

Permalink
Rework MapStr.walkMap to support dotted keys (#6406)
Browse files Browse the repository at this point in the history
Currently if a dotted key is passed to walkmap it is split up in it's part and the map is traversed for each part. In case an event has a dotted key inside walkMap does not return the expected value. For the Event `{"a.b": 1}` the method `GetValue("a.b")` has previously returned `ErrKeyNotExists` but now returns 0.

With this change all processors which rely on Get/Put functionalities of MapStr can now also work with events containing dotted keys.

**Changes**

* The walkMap function was rewritten to a recursive function. This made adding the new functionality easier.
* Add tests for GetValue to make possible usages more clear
* Add Benchmark tests for walkMap use cases

**Limitations**

When using `walkMap` with a Key, either the full key has to be dotted or the last part. `GetValue("a.b.c")` on an event `{"a.b":{"c":17}}` does not work as only the first part of the key or the full key are checked against the map.

Benchmark old to new walkMap:

```
go test  -bench=BenchmarkWalkMap -run=BenchmarkWalkMap
```

Recursive
```
BenchmarkWalkMap/Get-4           1000000              1334 ns/op             432 B/op          7 allocs/op
BenchmarkWalkMap/Put-4           1000000              1273 ns/op            1120 B/op         10 allocs/op
BenchmarkWalkMap/HasKey-4        2000000               690 ns/op             160 B/op          5 allocs/op
BenchmarkWalkMap/HasKeyFirst-4  50000000                28.8 ns/op             0 B/op          0 allocs/op
BenchmarkWalkMap/Delete-4        1000000              1431 ns/op            1120 B/op         10 allocs/op
```

Iterative
```
BenchmarkWalkMap/Get-4            500000              2577 ns/op             824 B/op         13 allocs/op
BenchmarkWalkMap/Put-4           1000000              1052 ns/op            1072 B/op          8 allocs/op
BenchmarkWalkMap/HasKey-4        5000000               381 ns/op              96 B/op          2 allocs/op
BenchmarkWalkMap/HasKeyFirst-4  20000000                89.6 ns/op            16 B/op          1 allocs/op
BenchmarkWalkMap/Delete-4        1000000              1083 ns/op            1072 B/op          8 allocs/op
```

The benchmark show a difference in the code before and after. The `GetValue` function has become quite a bit more efficient. The `HasKey` is now less efficient. I still need to investigate in detail where the differences come from but the assumption is it comes from the additional functionality.

This change was initially triggered by #5916 but is split out into a separate PR because it has usage in multiple places.
  • Loading branch information
ruflin authored and andrewkroh committed Feb 20, 2018
1 parent 1d7e325 commit a4bddd8
Show file tree
Hide file tree
Showing 3 changed files with 262 additions and 25 deletions.
63 changes: 38 additions & 25 deletions libbeat/common/mapstr.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,40 +315,53 @@ func tryToMapStr(v interface{}) (MapStr, bool) {
}
}

// walkMap walks the data MapStr to arrive at the value specified by the key.
// walkMapRecursive walks the data MapStr to arrive at the value specified by the key.
// The key is expressed in dot-notation (eg. x.y.z). When the key is found then
// the given mapStrOperation is invoked.
func walkMap(key string, data MapStr, op mapStrOperation) (interface{}, error) {
var err error
keyParts := strings.Split(key, ".")

// Walk maps until reaching a leaf object.
m := data
for i, k := range keyParts[0 : len(keyParts)-1] {
v, exists := m[k]
if !exists {
if op.CreateMissingKeys {
newMap := MapStr{}
m[k] = newMap
m = newMap
continue
}
return nil, errors.Wrapf(ErrKeyNotFound, "key=%v", strings.Join(keyParts[0:i+1], "."))
}
func walkMapRecursive(key string, data MapStr, op mapStrOperation) (interface{}, error) {

// Splits up the key in two parts: full key and first part before the dot
_, exists := data[key]
// If leave node or key exists directly
if exists {
// Execute the mapStrOperator on the leaf object.
return op.Do(key, data)
}

keyParts := strings.SplitN(key, ".", 2)
// If leave node or key exists directly
if len(keyParts) == 1 {
// Execute the mapStrOperator on the leaf object.
return op.Do(key, data)
}

m, err = toMapStr(v)
if err != nil {
return nil, errors.Wrapf(err, "key=%v", strings.Join(keyParts[0:i+1], "."))
// Checks if first part of the key exists
k := keyParts[0]
d, keyExist := data[k]
if !keyExist {
if op.CreateMissingKeys {
d = MapStr{}
data[k] = d
} else {
return nil, ErrKeyNotFound
}
}

// Execute the mapStrOperator on the leaf object.
v, err := op.Do(keyParts[len(keyParts)-1], m)
v, err := toMapStr(d)
if err != nil {
return nil, errors.Wrapf(err, "key=%v", key)
return nil, err
}

return v, nil
return walkMapRecursive(keyParts[1], v, op)
}

func walkMap(key string, data MapStr, op mapStrOperation) (interface{}, error) {
v, err := walkMapRecursive(key, data, op)
if err != nil {
// Add key to error
err = errors.Wrapf(err, "key=%v", key)
}
return v, err
}

// mapStrOperation types
Expand Down
165 changes: 165 additions & 0 deletions libbeat/common/mapstr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ func TestMapStrDeepUpdate(t *testing.T) {
MapStr{"a": 1},
MapStr{"a": 1},
},
{
MapStr{"a.b": 1},
MapStr{"a": 1},
MapStr{"a": 1, "a.b": 1},
},
{
MapStr{"a": 1},
MapStr{"a.b": 1},
MapStr{"a": 1, "a.b": 1},
},
}

for i, test := range tests {
Expand Down Expand Up @@ -173,7 +183,9 @@ func TestHasKey(t *testing.T) {
"c31": 1,
"c32": 2,
},
"c4.f": 19,
},
"d.f": 1,
}

hasKey, err := m.HasKey("c.c2")
Expand All @@ -191,6 +203,14 @@ func TestHasKey(t *testing.T) {
hasKey, err = m.HasKey("dd")
assert.Equal(nil, err)
assert.Equal(false, hasKey)

hasKey, err = m.HasKey("d.f")
assert.Equal(nil, err)
assert.Equal(true, hasKey)

hasKey, err = m.HasKey("c.c4.f")
assert.Equal(nil, err)
assert.Equal(true, hasKey)
}

func TestMapStrPut(t *testing.T) {
Expand Down Expand Up @@ -226,6 +246,76 @@ func TestMapStrPut(t *testing.T) {
assert.Equal(t, MapStr{"subMap": MapStr{"newMap": MapStr{"a": 1}}}, m)
}

func TestMapStrGetValue(t *testing.T) {

tests := []struct {
input MapStr
key string
output interface{}
error bool
}{
{
MapStr{"a": 1},
"a",
1,
false,
},
{
MapStr{"a": MapStr{"b": 1}},
"a",
MapStr{"b": 1},
false,
},
{
MapStr{"a": MapStr{"b": 1}},
"a.b",
1,
false,
},
{
MapStr{"a": MapStr{"b.c": 1}},
"a",
MapStr{"b.c": 1},
false,
},
{
MapStr{"a": MapStr{"b.c": 1}},
"a.b",
nil,
true,
},
{
MapStr{"a.b": MapStr{"c": 1}},
"a.b",
MapStr{"c": 1},
false,
},
{
MapStr{"a.b": MapStr{"c": 1}},
"a.b.c",
nil,
true,
},
{
MapStr{"a": MapStr{"b.c": 1}},
"a.b.c",
1,
false,
},
}

for _, test := range tests {
v, err := test.input.GetValue(test.key)
if test.error {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
assert.Equal(t, test.output, v)

}
}

func TestClone(t *testing.T) {
assert := assert.New(t)

Expand Down Expand Up @@ -569,3 +659,78 @@ func BenchmarkMapStrLogging(b *testing.B) {
logger.Infow("test", "mapstr", m)
}
}

func BenchmarkWalkMap(b *testing.B) {

globalM := MapStr{
"hello": MapStr{
"world": MapStr{
"ok": "test",
},
},
}

b.Run("Get", func(b *testing.B) {
b.ResetTimer()

for i := 0; i < b.N; i++ {
globalM.GetValue("test.world.ok")
}
})

b.Run("Put", func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
m := MapStr{
"hello": MapStr{
"world": MapStr{
"ok": "test",
},
},
}

m.Put("hello.world.new", 17)
}
})

b.Run("PutMissing", func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
m := MapStr{}

m.Put("a.b.c", 17)
}
})

b.Run("HasKey", func(b *testing.B) {
b.ResetTimer()

for i := 0; i < b.N; i++ {
globalM.HasKey("hello.world.ok")
globalM.HasKey("hello.world.no_ok")
}
})

b.Run("HasKeyFirst", func(b *testing.B) {
b.ResetTimer()

for i := 0; i < b.N; i++ {
globalM.HasKey("hello")
}
})

b.Run("Delete", func(b *testing.B) {
b.ResetTimer()

for i := 0; i < b.N; i++ {
m := MapStr{
"hello": MapStr{
"world": MapStr{
"ok": "test",
},
},
}
m.Put("hello.world.test", 17)
}
})
}
59 changes: 59 additions & 0 deletions libbeat/processors/actions/include_fields_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package actions

import (
"testing"

"github.com/stretchr/testify/assert"

"github.com/elastic/beats/libbeat/beat"
"github.com/elastic/beats/libbeat/common"
)

func TestIncludeFields(t *testing.T) {

var tests = []struct {
Fields []string
Input common.MapStr
Output common.MapStr
}{
{
Fields: []string{"test"},
Input: common.MapStr{
"hello": "world",
"test": 17,
},
Output: common.MapStr{
"test": 17,
},
},
{
Fields: []string{"test", "a.b"},
Input: common.MapStr{
"a.b": "b",
"a.c": "c",
"test": 17,
},
Output: common.MapStr{
"test": 17,
"a": common.MapStr{
"b": "b",
},
},
},
}

for _, test := range tests {
p := includeFields{
Fields: test.Fields,
}

event := &beat.Event{
Fields: test.Input,
}

newEvent, err := p.Run(event)
assert.NoError(t, err)

assert.Equal(t, test.Output, newEvent.Fields)
}
}

0 comments on commit a4bddd8

Please sign in to comment.