From a660b56ab3f6a52c7aa87f9d87eafa387af1c76c Mon Sep 17 00:00:00 2001 From: Pablo Baeyens Date: Tue, 1 Sep 2020 15:26:14 +0200 Subject: [PATCH] Add support for summary and distribution metric types (#65) * Add support for summary metric type * Add support for distribution metrics * Refactor metrics construction - Drop name in Metrics (now they act as Metric values) - Refactor constructor so that errors happen at compile-time * Report Summary total sum and count values Snapshot values are not filled in by OpenTelemetry * Report p00 and p100 as `.min` and `.max` This is more similar to what we do for our own non-additive type * Keep hostname if it has not been overridden --- exporter/datadogexporter/config.go | 7 + exporter/datadogexporter/config_test.go | 9 +- exporter/datadogexporter/dogstatsd.go | 12 +- exporter/datadogexporter/example/config.yaml | 13 ++ exporter/datadogexporter/factory.go | 3 +- exporter/datadogexporter/factory_test.go | 3 +- exporter/datadogexporter/go.mod | 1 + exporter/datadogexporter/metrics.go | 187 ++++++++++------- exporter/datadogexporter/metrics_test.go | 188 ++++++++++++------ exporter/datadogexporter/testdata/config.yaml | 1 + 10 files changed, 281 insertions(+), 143 deletions(-) diff --git a/exporter/datadogexporter/config.go b/exporter/datadogexporter/config.go index 696efc50b2e6..7b95c7cc8d20 100644 --- a/exporter/datadogexporter/config.go +++ b/exporter/datadogexporter/config.go @@ -70,6 +70,13 @@ type MetricsConfig struct { // Mode is the metrics sending mode: either 'dogstatsd' or 'agentless' Mode string `mapstructure:"mode"` + // Percentiles states whether to report percentiles for summary metrics, + // including the minimum and maximum + Percentiles bool `mapstructure:"report_percentiles"` + + // Buckets states whether to report buckets from distribution metrics + Buckets bool `mapstructure:"report_buckets"` + // DogStatsD defines the DogStatsD configuration options. DogStatsD DogStatsDConfig `mapstructure:"dogstatsd"` diff --git a/exporter/datadogexporter/config_test.go b/exporter/datadogexporter/config_test.go index 2e860dcfb793..e1c3bd2ec4af 100644 --- a/exporter/datadogexporter/config_test.go +++ b/exporter/datadogexporter/config_test.go @@ -61,8 +61,9 @@ func TestLoadConfig(t *testing.T) { }, Metrics: MetricsConfig{ - Mode: AgentlessMode, - Namespace: "opentelemetry", + Mode: AgentlessMode, + Namespace: "opentelemetry", + Percentiles: false, DogStatsD: DogStatsDConfig{ Endpoint: "127.0.0.1:8125", @@ -89,8 +90,8 @@ func TestLoadConfig(t *testing.T) { API: APIConfig{Site: "datadoghq.com"}, Metrics: MetricsConfig{ - Mode: DogStatsDMode, - + Mode: DogStatsDMode, + Percentiles: true, DogStatsD: DogStatsDConfig{ Endpoint: "127.0.0.1:8125", Telemetry: true, diff --git a/exporter/datadogexporter/dogstatsd.go b/exporter/datadogexporter/dogstatsd.go index 682077361900..78a6f4a3a7a6 100644 --- a/exporter/datadogexporter/dogstatsd.go +++ b/exporter/datadogexporter/dogstatsd.go @@ -62,11 +62,17 @@ func (exp *dogStatsDExporter) PushMetricsData(_ context.Context, md pdata.Metric for name, data := range metrics { for _, metric := range data { + + tags := metric.GetTags() + + // Send the hostname if it has not been overridden + if exp.GetConfig().Hostname == "" && metric.GetHost() != "" { + tags = append(tags, fmt.Sprintf("host:%s", metric.GetHost())) + } + switch metric.GetType() { - case Count: - err = exp.client.Count(name, metric.GetValue().(int64), metric.GetTags(), metric.GetRate()) case Gauge: - err = exp.client.Gauge(name, metric.GetValue().(float64), metric.GetTags(), metric.GetRate()) + err = exp.client.Gauge(name, metric.GetValue(), tags, metric.GetRate()) } if err != nil { diff --git a/exporter/datadogexporter/example/config.yaml b/exporter/datadogexporter/example/config.yaml index 40f841ee4a7e..11d575b844ad 100644 --- a/exporter/datadogexporter/example/config.yaml +++ b/exporter/datadogexporter/example/config.yaml @@ -74,6 +74,19 @@ exporters: # # namespace: "" + ## @param report_percentiles - boolean - optional - default: true + ## Whether to report percentiles (including minimum and maximum) + ## for summary metric types. + ## Disable this to reduce the number of custom metrics. + # + # report_percentiles: true + + ## @param report_buckets - boolean - optional - default: false + ## Whether to report bucket counts for distribution metric types. + ## Enabling this will increase the number of custom metrics. + # + # report_buckets: false + ## @param dogstatsd - custom object - optional ## DogStatSD mode specific configuration. # diff --git a/exporter/datadogexporter/factory.go b/exporter/datadogexporter/factory.go index 0d816521b0ea..9d8a89f422e5 100644 --- a/exporter/datadogexporter/factory.go +++ b/exporter/datadogexporter/factory.go @@ -59,7 +59,8 @@ func createDefaultConfig() configmodels.Exporter { }, Metrics: MetricsConfig{ - Mode: DogStatsDMode, + Mode: DogStatsDMode, + Percentiles: true, DogStatsD: DogStatsDConfig{ Endpoint: "127.0.0.1:8125", diff --git a/exporter/datadogexporter/factory_test.go b/exporter/datadogexporter/factory_test.go index 42202fc73a1b..ed9fb80b67ba 100644 --- a/exporter/datadogexporter/factory_test.go +++ b/exporter/datadogexporter/factory_test.go @@ -36,7 +36,8 @@ func TestCreateDefaultConfig(t *testing.T) { assert.Equal(t, cfg, &Config{ API: APIConfig{Site: "datadoghq.com"}, Metrics: MetricsConfig{ - Mode: DogStatsDMode, + Mode: DogStatsDMode, + Percentiles: true, DogStatsD: DogStatsDConfig{ Endpoint: "127.0.0.1:8125", Telemetry: true, diff --git a/exporter/datadogexporter/go.mod b/exporter/datadogexporter/go.mod index 135ed09b20d0..4d58c64b778a 100644 --- a/exporter/datadogexporter/go.mod +++ b/exporter/datadogexporter/go.mod @@ -8,4 +8,5 @@ require ( github.com/stretchr/testify v1.6.1 go.opentelemetry.io/collector v0.9.0 go.uber.org/zap v1.15.0 + google.golang.org/protobuf v1.25.0 ) diff --git a/exporter/datadogexporter/metrics.go b/exporter/datadogexporter/metrics.go index ab68e40420e9..8fb4898e641c 100644 --- a/exporter/datadogexporter/metrics.go +++ b/exporter/datadogexporter/metrics.go @@ -17,6 +17,7 @@ package datadogexporter import ( "context" "fmt" + "math" v1 "github.com/census-instrumentation/opencensus-proto/gen-go/metrics/v1" "go.opentelemetry.io/collector/consumer/pdata" @@ -47,82 +48,45 @@ func newMetricsExporter(logger *zap.Logger, cfg *Config) (MetricsExporter, error type MetricType int const ( - Count MetricType = iota - Gauge + Gauge MetricType = iota ) -type Metric struct { +type MetricValue struct { hostname string - name string metricType MetricType - fvalue float64 - ivalue int64 + value float64 tags []string rate float64 } -func (m *Metric) GetHost() string { +func (m *MetricValue) GetHost() string { return m.hostname } -func (m *Metric) GetName() string { - return m.name -} - -func (m *Metric) GetType() MetricType { +func (m *MetricValue) GetType() MetricType { return m.metricType } -func (m *Metric) GetValue() interface{} { - switch m.metricType { - case Count: - return m.ivalue - case Gauge: - return m.fvalue - } - return nil +func (m *MetricValue) GetValue() float64 { + return m.value } -func (m *Metric) GetRate() float64 { +func (m *MetricValue) GetRate() float64 { return m.rate } -func (m *Metric) GetTags() []string { +func (m *MetricValue) GetTags() []string { return m.tags } -func NewMetric(hostname, name string, metricType MetricType, value interface{}, tags []string, rate float64) (*Metric, error) { - switch metricType { - case Count: - ivalue, ok := value.(int64) - if !ok { - return nil, fmt.Errorf("Incorrect value type for count metric '%s'", name) - } - return &Metric{ - hostname: hostname, - name: name, - metricType: Count, - ivalue: ivalue, - rate: rate, - tags: tags, - }, nil - - case Gauge: - fvalue, ok := value.(float64) - if !ok { - return nil, fmt.Errorf("Incorrect value type for count metric '%s'", name) - } - return &Metric{ - hostname: hostname, - name: name, - metricType: Gauge, - fvalue: fvalue, - rate: rate, - tags: tags, - }, nil +func NewGauge(hostname string, value float64, tags []string) MetricValue { + return MetricValue{ + hostname: hostname, + metricType: Gauge, + value: value, + rate: 1, + tags: tags, } - - return nil, fmt.Errorf("Unrecognized Metric type for metric '%s'", name) } type OpenCensusKind int @@ -134,12 +98,12 @@ const ( Summary ) -func MapMetrics(exp MetricsExporter, md pdata.Metrics) (map[string][]*Metric, int, error) { +func MapMetrics(exp MetricsExporter, md pdata.Metrics) (map[string][]MetricValue, int, error) { // Transform it into OpenCensus format data := pdatautil.MetricsToMetricsData(md) // Mapping from metrics name to data - metrics := map[string][]*Metric{} + metrics := map[string][]MetricValue{} logger := exp.GetLogger() @@ -202,30 +166,105 @@ func MapMetrics(exp MetricsExporter, md pdata.Metrics) (map[string][]*Metric, in } for _, point := range timeseries.GetPoints() { - // We assume the sampling rate is 1. - const defaultRate float64 = 1 - switch kind { - case Int64, Double: - var value float64 - if kind == Int64 { - value = float64(point.GetInt64Value()) - } else { - value = point.GetDoubleValue() + case Int64: + metrics[metricName] = append(metrics[metricName], + NewGauge(hostname, float64(point.GetInt64Value()), tags), + ) + case Double: + metrics[metricName] = append(metrics[metricName], + NewGauge(hostname, point.GetDoubleValue(), tags), + ) + case Distribution: + // A Distribution metric has: + // - The count of values in the population + // - The sum of values in the population + // - The sum of squared deviations + // - A number of buckets, each of them having + // - the bounds that define the bucket + // - the count of the number of items in that bucket + // - a sample value from each bucket + // + // We follow the implementation on `opencensus-go-exporter-datadog`: + // we report the first three values and the buckets count can also + // be reported (opt-in), but bounds are ignored. + + dist := point.GetDistributionValue() + + distMetrics := map[string]float64{ + "count": float64(dist.GetCount()), + "sum": dist.GetSum(), + "squared_dev_sum": dist.GetSumOfSquaredDeviation(), } - newVal, err := NewMetric(hostname, metricName, Gauge, value, tags, defaultRate) - if err != nil { - logger.Error("Error when creating Datadog metric, continuing...", nameField, zap.Error(err)) - continue + for suffix, value := range distMetrics { + fullName := fmt.Sprintf("%s.%s", metricName, suffix) + metrics[fullName] = append(metrics[fullName], + NewGauge(hostname, value, tags), + ) + } + + if exp.GetConfig().Metrics.Buckets { + // We have a single metric, 'count_per_bucket', which is tagged with the bucket id. See: + // https://github.com/DataDog/opencensus-go-exporter-datadog/blob/c3b47f1c6dcf1c47b59c32e8dbb7df5f78162daa/stats.go#L99-L104 + fullName := fmt.Sprintf("%s.count_per_bucket", metricName) + + for idx, bucket := range dist.GetBuckets() { + bucketTags := append(tags, fmt.Sprintf("bucket_idx:%d", idx)) + metrics[fullName] = append(metrics[fullName], + NewGauge(hostname, float64(bucket.GetCount()), bucketTags), + ) + } } - metrics[metricName] = append(metrics[metricName], newVal) - case Distribution: - logger.Warn("Ignoring distribution metric, not implemented yet", nameField) - continue case Summary: - logger.Warn("Ignoring summary metric, not implemented yet", nameField) - continue + // A Summary metric has: + // - The total sum so far + // - The total count so far + // - A snapshot with + // - the sum in the current snapshot + // - the count in the current snapshot + // - a series of percentiles + // + // By default we report the sum and count as gauges and percentiles. + // Percentiles are opt-out + + // Report count if available + if count := point.GetSummaryValue().GetCount(); count != nil { + fullName := fmt.Sprintf("%s.count", metricName) + metrics[fullName] = append(metrics[fullName], + NewGauge(hostname, float64(count.GetValue()), tags), + ) + } + + // Report sum if available + if sum := point.GetSummaryValue().GetSum(); sum != nil { + fullName := fmt.Sprintf("%s.sum", metricName) + metrics[fullName] = append(metrics[fullName], + NewGauge(hostname, sum.GetValue(), tags), + ) + } + + if exp.GetConfig().Metrics.Percentiles { + snapshot := point.GetSummaryValue().GetSnapshot() + for _, pair := range snapshot.GetPercentileValues() { + var fullName string + if perc := pair.GetPercentile(); perc == 0 { + // p0 is the minimum + fullName = fmt.Sprintf("%s.min", metricName) + } else if perc == 100 { + // p100 is the maximum + fullName = fmt.Sprintf("%s.max", metricName) + } else { + // Round to the nearest digit + fullName = fmt.Sprintf("%s.p%02d", metricName, int(math.Round(perc))) + } + + metrics[fullName] = append(metrics[fullName], + NewGauge(hostname, pair.GetValue(), tags), + ) + } + } + } } } diff --git a/exporter/datadogexporter/metrics_test.go b/exporter/datadogexporter/metrics_test.go index 02840b67fd2c..7c90875948f1 100644 --- a/exporter/datadogexporter/metrics_test.go +++ b/exporter/datadogexporter/metrics_test.go @@ -24,7 +24,6 @@ import ( v1agent "github.com/census-instrumentation/opencensus-proto/gen-go/agent/common/v1" v1 "github.com/census-instrumentation/opencensus-proto/gen-go/metrics/v1" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/consumer/consumerdata" "go.opentelemetry.io/collector/consumer/pdata" "go.opentelemetry.io/collector/consumer/pdatautil" @@ -61,44 +60,18 @@ func (m *MockMetricsExporter) GetRetrySettings() exporterhelper.RetrySettings { func TestMetricValue(t *testing.T) { var ( hostname string = "unknown" - name string = "metric.name" value float64 = math.Pi tags []string = []string{"tool:opentelemetry", "version:0.1.0"} rate float64 = 1 ) - metric, err := NewMetric( - hostname, - name, - Gauge, - value, - tags, - rate, - ) - - assert.Nil(t, err) - assert.NotNil(t, metric) + metric := NewGauge(hostname, value, tags) assert.Equal(t, hostname, metric.GetHost()) - assert.Equal(t, name, metric.GetName()) assert.Equal(t, Gauge, metric.GetType()) assert.Equal(t, value, metric.GetValue()) assert.Equal(t, tags, metric.GetTags()) assert.Equal(t, rate, metric.GetRate()) - - // Fail when using incorrect type - nilMetric, err := NewMetric( - hostname, - name, - Count, - value, - tags, - rate, - ) - - require.Error(t, err) - assert.Nil(t, nilMetric) - } var ( @@ -137,27 +110,21 @@ func TestMapNumericMetric(t *testing.T) { assert.Equal(t, 0, droppedTimeSeries) assert.Nil(t, err) assert.Equal(t, - map[string][]*Metric{ - "cumulative.float64.test": { - { - hostname: "unknown", - name: "cumulative.float64.test", - metricType: Gauge, - fvalue: math.Pi, - rate: 1, - tags: testTags[:], - }, - }, - "gauge.float64.test": { - { - hostname: "unknown", - name: "gauge.float64.test", - metricType: Gauge, - fvalue: math.Pi, - rate: 1, - tags: testTags[:], - }, - }, + map[string][]MetricValue{ + "cumulative.float64.test": {{ + hostname: "unknown", + metricType: Gauge, + value: math.Pi, + rate: 1, + tags: testTags[:], + }}, + "gauge.float64.test": {{ + hostname: "unknown", + metricType: Gauge, + value: math.Pi, + rate: 1, + tags: testTags[:], + }}, }, metrics, ) @@ -168,29 +135,130 @@ func TestMapDistributionMetric(t *testing.T) { ts := time.Now() md := NewMetricsData([]*v1.Metric{ metricstest.GaugeDist("dist.test", testKeys[:], - metricstest.Timeseries(ts, testValues[:], metricstest.DistPt(ts, []float64{}, []int64{}))), - metricstest.CumulativeDist("cumulative.dist.test", testKeys[:], - metricstest.Timeseries(ts, testValues[:], metricstest.DistPt(ts, []float64{}, []int64{}))), + metricstest.Timeseries(ts, testValues[:], metricstest.DistPt(ts, []float64{0.1, 0.2}, []int64{100, 200}))), }) - metrics, _, err := MapMetrics(mockExporter, md) + metrics, _, err := MapMetrics( + &MockMetricsExporter{cfg: &Config{Metrics: MetricsConfig{Buckets: true}}}, + md, + ) - // Right now they are silently ignored assert.Nil(t, err) - assert.Equal(t, map[string][]*Metric{}, metrics) + assert.Equal(t, map[string][]MetricValue{ + "dist.test.count": {{ + hostname: "unknown", + metricType: Gauge, + value: 300, + rate: 1, + tags: testTags[:], + }}, + "dist.test.sum": {{ + hostname: "unknown", + metricType: Gauge, + // The sum value is approximated by the metricstest + // package using lower bounds and counts: + // sum = ∑ bound(i-1) * count(i) + // = 0*100 + 0.1*200 + // = 20 + value: 20, + rate: 1, + tags: testTags[:], + }}, + "dist.test.squared_dev_sum": {{ + hostname: "unknown", + metricType: Gauge, + // The sum of squared deviations is set to 0 by the + // metricstest package + value: 0, + rate: 1, + tags: testTags[:], + }}, + "dist.test.count_per_bucket": { + { + hostname: "unknown", + metricType: Gauge, + value: 100, + rate: 1, + tags: append(testTags[:], "bucket_idx:0"), + }, + + { + hostname: "unknown", + metricType: Gauge, + value: 200, + rate: 1, + tags: append(testTags[:], "bucket_idx:1"), + }, + }, + }, + metrics, + ) } func TestMapSummaryMetric(t *testing.T) { ts := time.Now() + summaryPoint := metricstest.Timeseries( + ts, + testValues[:], + metricstest.SummPt(ts, 5, 23, []float64{0, 50.1, 95, 100}, []float64{1, 22, 100, 300}), + ) + md := NewMetricsData([]*v1.Metric{ - metricstest.Summary("summary.test", testKeys[:], - metricstest.Timeseries(ts, testValues[:], metricstest.SummPt(ts, 2, 10, []float64{}, []float64{}))), + metricstest.Summary("summary.test", testKeys[:], summaryPoint), }) - metrics, _, err := MapMetrics(mockExporter, md) + metrics, _, err := MapMetrics( + // Enable percentiles for test + &MockMetricsExporter{cfg: &Config{Metrics: MetricsConfig{Percentiles: true}}}, + md, + ) - // Right now they are silently ignored assert.Nil(t, err) - assert.Equal(t, map[string][]*Metric{}, metrics) + assert.Equal(t, map[string][]MetricValue{ + "summary.test.sum": {{ + hostname: "unknown", + metricType: Gauge, + value: 23, + rate: 1, + tags: testTags[:], + }}, + "summary.test.count": {{ + hostname: "unknown", + metricType: Gauge, + value: 5, + rate: 1, + tags: testTags[:], + }}, + "summary.test.p50": {{ + hostname: "unknown", + metricType: Gauge, + value: 22, + rate: 1, + tags: testTags[:], + }}, + "summary.test.p95": {{ + hostname: "unknown", + metricType: Gauge, + value: 100, + rate: 1, + tags: testTags[:], + }}, + "summary.test.min": {{ + hostname: "unknown", + metricType: Gauge, + value: 1, + rate: 1, + tags: testTags[:], + }}, + "summary.test.max": {{ + hostname: "unknown", + metricType: Gauge, + value: 300, + rate: 1, + tags: testTags[:], + }}, + }, + metrics, + ) } diff --git a/exporter/datadogexporter/testdata/config.yaml b/exporter/datadogexporter/testdata/config.yaml index b7731e0e0cdf..d6da965fc968 100644 --- a/exporter/datadogexporter/testdata/config.yaml +++ b/exporter/datadogexporter/testdata/config.yaml @@ -21,6 +21,7 @@ exporters: metrics: mode: agentless namespace: opentelemetry + report_percentiles: false datadog/dogstatsd: metrics: