diff --git a/cmd/otelcontribcol/components.go b/cmd/otelcontribcol/components.go index ead94268a377..5d0421659628 100644 --- a/cmd/otelcontribcol/components.go +++ b/cmd/otelcontribcol/components.go @@ -36,6 +36,7 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/stackdriverexporter" "github.com/open-telemetry/opentelemetry-collector-contrib/extension/observer/k8sobserver" "github.com/open-telemetry/opentelemetry-collector-contrib/processor/k8sprocessor" + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/metricstransformprocessor" "github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/carbonreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/collectdreceiver" @@ -120,6 +121,7 @@ func components() (config.Factories, error) { processors := []component.ProcessorFactoryBase{ &k8sprocessor.Factory{}, resourcedetectionprocessor.NewFactory(), + &metricstransformprocessor.Factory{}, } for _, pr := range factories.Processors { processors = append(processors, pr) diff --git a/go.mod b/go.mod index 953e0e80e252..8dedf6fc77d4 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/extension/observer/k8sobserver v0.0.0 github.com/open-telemetry/opentelemetry-collector-contrib/processor/k8sprocessor v0.0.0 github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor v0.0.0 + github.com/open-telemetry/opentelemetry-collector-contrib/processor/metricstransformprocessor v0.0.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/carbonreceiver v0.0.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/collectdreceiver v0.0.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/jaegerlegacyreceiver v0.0.0 @@ -112,4 +113,6 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/k8sp replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor => ./processor/resourcedetectionprocessor/ +replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/metricstransformprocessor => ./processor/metricstransformprocessor/ + replace k8s.io/client-go => k8s.io/client-go v0.0.0-20190620085101-78d2af792bab diff --git a/processor/metricstransformprocessor/Makefile b/processor/metricstransformprocessor/Makefile new file mode 100644 index 000000000000..ded7a36092dc --- /dev/null +++ b/processor/metricstransformprocessor/Makefile @@ -0,0 +1 @@ +include ../../Makefile.Common diff --git a/processor/metricstransformprocessor/README.md b/processor/metricstransformprocessor/README.md new file mode 100644 index 000000000000..282f968342ba --- /dev/null +++ b/processor/metricstransformprocessor/README.md @@ -0,0 +1,99 @@ +# Metrics Transform Processor **(UNDER DEVELOPMENT - NOT READY FOR USE)** +Supported pipeline types: metrics +- This ONLY supports renames/aggregations **within individual metrics**. It does not do any aggregation across batches, so it is not suitable for aggregating metrics from multiple sources (e.g. multiple nodes or clients). At this point, it is only for aggregating metrics from a single source that groups its metrics for a particular time period into a single batch (e.g. host metrics from the VM the collector is running on). +- Rename Collisions will result in a no operation on the metrics data + - e.g. If want to rename a metric or label to `new_name` while there is already a metric or label called `new_name`, this operation will not take any effect. There will also be an error logged + +## Description +The metrics transform processor can be used to rename metrics, labels, or label values. It can also be used to perform aggregations on metrics across labels or label values. + +## Capabilities +- Rename metrics (e.g. rename `cpu/usage` to `cpu/usage_time`) +- Rename labels (e.g. rename `cpu` to `core`) +- Rename label values (e.g. rename `done` to `complete`) +- Aggregate across label sets (e.g. only want the label `usage`, but don’t care about the labels `core`, and `cpu`) + - Aggregation_type: sum, average, max +- Aggregate across label values (e.g. want `memory{slab}`, but don’t care about `memory{slab_reclaimable}` & `memory{slab_unreclaimable}`) + - Aggregation_type: sum, average, max + +## Configuration +```yaml +# transforms is a list of transformations with each element transforming a metric selected by metric name +transforms: + # name is used to match with the metric to operate on. This implementation doesn’t utilize the filtermetric’s MatchProperties struct because it doesn’t match well with what I need at this phase. All is needed for this processor at this stage is a single name string that can be used to match with selected metrics. The list of metric names and the match type in the filtermetric’s MatchProperties struct are unnecessary. Also, based on the issue about improving filtering configuration, it seems like this struct is subject to be slightly modified. + - metric_name: + + # action specifies if the operations are performed on the current copy of the metric or on a newly created metric that will be inserted + action: {update, insert} + + # new_name is used to rename metrics (e.g. rename cpu/usage to cpu/usage_time) if action is insert, new_name is required + new_name: + + # operations contain a list of operations that will be performed on the selected metrics. Each operation block is a key-value pair, where the key can be any arbitrary string set by the users for readability, and the value is a struct with fields required for operations. The action field is important for the processor to identify exactly which operation to perform + operations: + + # update_label action can be used to update the name of a label or the values of this label (e.g. rename label `cpu` to `core`) + - action: update_label + label: + new_label: + value_actions: + - value: + new_value: + + # aggregate_labels action aggregates metrics across labels (e.g. only want the label `usage`, but don’t care about the labels `core`, and `cpu`) + - action: aggregate_labels + # label_set contains a list of labels that will remain after the aggregation. The excluded labels will be aggregated by the way specified by aggregation_type. + label_set: [labels...] + aggregation_type: {sum, average, max} + + # aggregate_label_values action aggregates labels across label values (e.g. want memory{slab}, but don’t care about memory{slab_reclaimable} & memory{slab_unreclaimable}) + - action: aggregate_label_values + label: