Skip to content

Commit

Permalink
Add strings processor (influxdata#4476)
Browse files Browse the repository at this point in the history
  • Loading branch information
ada-foss authored and rgitzel committed Oct 17, 2018
1 parent 75997d5 commit c9215e2
Show file tree
Hide file tree
Showing 4 changed files with 766 additions and 0 deletions.
1 change: 1 addition & 0 deletions plugins/processors/all/all.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/processors/parser"
_ "github.com/influxdata/telegraf/plugins/processors/printer"
_ "github.com/influxdata/telegraf/plugins/processors/regex"
_ "github.com/influxdata/telegraf/plugins/processors/strings"
_ "github.com/influxdata/telegraf/plugins/processors/rename"
_ "github.com/influxdata/telegraf/plugins/processors/topk"
)
83 changes: 83 additions & 0 deletions plugins/processors/strings/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Strings Processor Plugin

The `strings` plugin maps certain go string functions onto measurement, tag, and field values. Values can be modified in place or stored in another key.

Implemented functions are:
- lowercase
- uppercase
- trim
- trim_left
- trim_right
- trim_prefix
- trim_suffix

Please note that in this implementation these are processed in the order that they appear above.

Specify the `measurement`, `tag` or `field` that you want processed in each section and optionally a `dest` if you want the result stored in a new tag or field. You can specify lots of transformations on data with a single strings processor.

### Configuration:

```toml
[[processors.strings]]
# [[processors.strings.uppercase]]
# tag = "method"

# [[processors.strings.lowercase]]
# field = "uri_stem"
# dest = "uri_stem_normalised"

## Convert a tag value to lowercase
# [[processors.strings.trim]]
# field = "message"

# [[processors.strings.trim_left]]
# field = "message"
# cutset = "\t"

# [[processors.strings.trim_right]]
# field = "message"
# cutset = "\r\n"

# [[processors.strings.trim_prefix]]
# field = "my_value"
# prefix = "my_"

# [[processors.strings.trim_suffix]]
# field = "read_count"
# suffix = "_count"
```

#### Trim, TrimLeft, TrimRight

The `trim`, `trim_left`, and `trim_right` functions take an optional parameter: `cutset`. This value is a string containing the characters to remove from the value.

#### TrimPrefix, TrimSuffix

The `trim_prefix` and `trim_suffix` functions remote the given `prefix` or `suffix`
respectively from the string.

### Example
**Config**
```toml
[[processors.strings]]
[[processors.strings.lowercase]]
field = "uri-stem"

[[processors.strings.trim_prefix]]
field = "uri_stem"
prefix = "/api/"

[[processors.strings.uppercase]]
field = "cs-host"
dest = "cs-host_normalised"
```

**Input**
```
iis_log,method=get,uri_stem=/API/HealthCheck cs-host="MIXEDCASE_host",referrer="-",ident="-",http_version=1.1,agent="UserAgent",resp_bytes=270i 1519652321000000000
```

**Output**
```
iis_log,method=get,uri_stem=healthcheck cs-host="MIXEDCASE_host",cs-host_normalised="MIXEDCASE_HOST",referrer="-",ident="-",http_version=1.1,agent="UserAgent",resp_bytes=270i 1519652321000000000
```
199 changes: 199 additions & 0 deletions plugins/processors/strings/strings.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
package strings

import (
"strings"
"unicode"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/processors"
)

type Strings struct {
Lowercase []converter `toml:"lowercase"`
Uppercase []converter `toml:"uppercase"`
Trim []converter `toml:"trim"`
TrimLeft []converter `toml:"trim_left"`
TrimRight []converter `toml:"trim_right"`
TrimPrefix []converter `toml:"trim_prefix"`
TrimSuffix []converter `toml:"trim_suffix"`

converters []converter
init bool
}

type ConvertFunc func(s string) string

type converter struct {
Field string
Tag string
Measurement string
Dest string
Cutset string
Suffix string
Prefix string

fn ConvertFunc
}

const sampleConfig = `
## Convert a tag value to uppercase
# [[processors.strings.uppercase]]
# tag = "method"
## Convert a field value to lowercase and store in a new field
# [[processors.strings.lowercase]]
# field = "uri_stem"
# dest = "uri_stem_normalised"
## Trim leading and trailing whitespace using the default cutset
# [[processors.strings.trim]]
# field = "message"
## Trim leading characters in cutset
# [[processors.strings.trim_left]]
# field = "message"
# cutset = "\t"
## Trim trailing characters in cutset
# [[processors.strings.trim_right]]
# field = "message"
# cutset = "\r\n"
## Trim the given prefix from the field
# [[processors.strings.trim_prefix]]
# field = "my_value"
# prefix = "my_"
## Trim the given suffix from the field
# [[processors.strings.trim_suffix]]
# field = "read_count"
# suffix = "_count"
`

func (s *Strings) SampleConfig() string {
return sampleConfig
}

func (s *Strings) Description() string {
return "Perform string processing on tags, fields, and measurements"
}

func (c *converter) convertTag(metric telegraf.Metric) {
tv, ok := metric.GetTag(c.Tag)
if !ok {
return
}

dest := c.Tag
if c.Dest != "" {
dest = c.Dest
}

metric.AddTag(dest, c.fn(tv))
}

func (c *converter) convertField(metric telegraf.Metric) {
fv, ok := metric.GetField(c.Field)
if !ok {
return
}

dest := c.Field
if c.Dest != "" {
dest = c.Dest
}

if fv, ok := fv.(string); ok {
metric.AddField(dest, c.fn(fv))
}
}

func (c *converter) convertMeasurement(metric telegraf.Metric) {
if metric.Name() != c.Measurement {
return
}

metric.SetName(c.fn(metric.Name()))
}

func (c *converter) convert(metric telegraf.Metric) {
if c.Field != "" {
c.convertField(metric)
}

if c.Tag != "" {
c.convertTag(metric)
}

if c.Measurement != "" {
c.convertMeasurement(metric)
}
}

func (s *Strings) initOnce() {
if s.init {
return
}

s.converters = make([]converter, 0)
for _, c := range s.Lowercase {
c.fn = strings.ToLower
s.converters = append(s.converters, c)
}
for _, c := range s.Uppercase {
c.fn = strings.ToUpper
s.converters = append(s.converters, c)
}
for _, c := range s.Trim {
if c.Cutset != "" {
c.fn = func(s string) string { return strings.Trim(s, c.Cutset) }
} else {
c.fn = func(s string) string { return strings.TrimFunc(s, unicode.IsSpace) }
}
s.converters = append(s.converters, c)
}
for _, c := range s.TrimLeft {
if c.Cutset != "" {
c.fn = func(s string) string { return strings.TrimLeft(s, c.Cutset) }
} else {
c.fn = func(s string) string { return strings.TrimLeftFunc(s, unicode.IsSpace) }
}
s.converters = append(s.converters, c)
}
for _, c := range s.TrimRight {
if c.Cutset != "" {
c.fn = func(s string) string { return strings.TrimRight(s, c.Cutset) }
} else {
c.fn = func(s string) string { return strings.TrimRightFunc(s, unicode.IsSpace) }
}
s.converters = append(s.converters, c)
}
for _, c := range s.TrimPrefix {
c.fn = func(s string) string { return strings.TrimPrefix(s, c.Prefix) }
s.converters = append(s.converters, c)
}
for _, c := range s.TrimSuffix {
c.fn = func(s string) string { return strings.TrimSuffix(s, c.Suffix) }
s.converters = append(s.converters, c)
}

s.init = true
}

func (s *Strings) Apply(in ...telegraf.Metric) []telegraf.Metric {
s.initOnce()

for _, metric := range in {
for _, converter := range s.converters {
converter.convert(metric)
}
}

return in
}

func init() {
processors.Add("strings", func() telegraf.Processor {
return &Strings{}
})
}
Loading

0 comments on commit c9215e2

Please sign in to comment.