From a660b56ab3f6a52c7aa87f9d87eafa387af1c76c Mon Sep 17 00:00:00 2001
From: Pablo Baeyens <pablo.baeyens@datadoghq.com>
Date: Tue, 1 Sep 2020 15:26:14 +0200
Subject: [PATCH] Add support for summary and distribution metric types (#65)

* Add support for summary metric type

* Add support for distribution metrics

* Refactor metrics construction
- Drop name in Metrics (now they act as Metric values)
- Refactor constructor so that errors happen at compile-time

* Report Summary total sum and count values
Snapshot values are not filled in by OpenTelemetry

* Report p00 and p100 as `.min` and `.max`
This is more similar to what we do for our own non-additive type

* Keep hostname if it has not been overridden
---
 exporter/datadogexporter/config.go            |   7 +
 exporter/datadogexporter/config_test.go       |   9 +-
 exporter/datadogexporter/dogstatsd.go         |  12 +-
 exporter/datadogexporter/example/config.yaml  |  13 ++
 exporter/datadogexporter/factory.go           |   3 +-
 exporter/datadogexporter/factory_test.go      |   3 +-
 exporter/datadogexporter/go.mod               |   1 +
 exporter/datadogexporter/metrics.go           | 187 ++++++++++-------
 exporter/datadogexporter/metrics_test.go      | 188 ++++++++++++------
 exporter/datadogexporter/testdata/config.yaml |   1 +
 10 files changed, 281 insertions(+), 143 deletions(-)

diff --git a/exporter/datadogexporter/config.go b/exporter/datadogexporter/config.go
index 696efc50b2e6..7b95c7cc8d20 100644
--- a/exporter/datadogexporter/config.go
+++ b/exporter/datadogexporter/config.go
@@ -70,6 +70,13 @@ type MetricsConfig struct {
 	// Mode is the metrics sending mode: either 'dogstatsd' or 'agentless'
 	Mode string `mapstructure:"mode"`
 
+	// Percentiles states whether to report percentiles for summary metrics,
+	// including the minimum and maximum
+	Percentiles bool `mapstructure:"report_percentiles"`
+
+	// Buckets states whether to report buckets from distribution metrics
+	Buckets bool `mapstructure:"report_buckets"`
+
 	// DogStatsD defines the DogStatsD configuration options.
 	DogStatsD DogStatsDConfig `mapstructure:"dogstatsd"`
 
diff --git a/exporter/datadogexporter/config_test.go b/exporter/datadogexporter/config_test.go
index 2e860dcfb793..e1c3bd2ec4af 100644
--- a/exporter/datadogexporter/config_test.go
+++ b/exporter/datadogexporter/config_test.go
@@ -61,8 +61,9 @@ func TestLoadConfig(t *testing.T) {
 		},
 
 		Metrics: MetricsConfig{
-			Mode:      AgentlessMode,
-			Namespace: "opentelemetry",
+			Mode:        AgentlessMode,
+			Namespace:   "opentelemetry",
+			Percentiles: false,
 
 			DogStatsD: DogStatsDConfig{
 				Endpoint:  "127.0.0.1:8125",
@@ -89,8 +90,8 @@ func TestLoadConfig(t *testing.T) {
 		API:        APIConfig{Site: "datadoghq.com"},
 
 		Metrics: MetricsConfig{
-			Mode: DogStatsDMode,
-
+			Mode:        DogStatsDMode,
+			Percentiles: true,
 			DogStatsD: DogStatsDConfig{
 				Endpoint:  "127.0.0.1:8125",
 				Telemetry: true,
diff --git a/exporter/datadogexporter/dogstatsd.go b/exporter/datadogexporter/dogstatsd.go
index 682077361900..78a6f4a3a7a6 100644
--- a/exporter/datadogexporter/dogstatsd.go
+++ b/exporter/datadogexporter/dogstatsd.go
@@ -62,11 +62,17 @@ func (exp *dogStatsDExporter) PushMetricsData(_ context.Context, md pdata.Metric
 
 	for name, data := range metrics {
 		for _, metric := range data {
+
+			tags := metric.GetTags()
+
+			// Send the hostname if it has not been overridden
+			if exp.GetConfig().Hostname == "" && metric.GetHost() != "" {
+				tags = append(tags, fmt.Sprintf("host:%s", metric.GetHost()))
+			}
+
 			switch metric.GetType() {
-			case Count:
-				err = exp.client.Count(name, metric.GetValue().(int64), metric.GetTags(), metric.GetRate())
 			case Gauge:
-				err = exp.client.Gauge(name, metric.GetValue().(float64), metric.GetTags(), metric.GetRate())
+				err = exp.client.Gauge(name, metric.GetValue(), tags, metric.GetRate())
 			}
 
 			if err != nil {
diff --git a/exporter/datadogexporter/example/config.yaml b/exporter/datadogexporter/example/config.yaml
index 40f841ee4a7e..11d575b844ad 100644
--- a/exporter/datadogexporter/example/config.yaml
+++ b/exporter/datadogexporter/example/config.yaml
@@ -74,6 +74,19 @@ exporters:
       #
       # namespace: ""
 
+      ## @param report_percentiles - boolean - optional - default: true
+      ## Whether to report percentiles (including minimum and maximum)
+      ## for summary metric types.
+      ## Disable this to reduce the number of custom metrics.
+      #
+      # report_percentiles: true
+
+      ## @param report_buckets - boolean - optional - default: false
+      ## Whether to report bucket counts for distribution metric types.
+      ## Enabling this will increase the number of custom metrics.
+      #
+      # report_buckets: false
+
       ## @param dogstatsd - custom object - optional
       ## DogStatSD mode specific configuration.
       #
diff --git a/exporter/datadogexporter/factory.go b/exporter/datadogexporter/factory.go
index 0d816521b0ea..9d8a89f422e5 100644
--- a/exporter/datadogexporter/factory.go
+++ b/exporter/datadogexporter/factory.go
@@ -59,7 +59,8 @@ func createDefaultConfig() configmodels.Exporter {
 		},
 
 		Metrics: MetricsConfig{
-			Mode: DogStatsDMode,
+			Mode:        DogStatsDMode,
+			Percentiles: true,
 
 			DogStatsD: DogStatsDConfig{
 				Endpoint:  "127.0.0.1:8125",
diff --git a/exporter/datadogexporter/factory_test.go b/exporter/datadogexporter/factory_test.go
index 42202fc73a1b..ed9fb80b67ba 100644
--- a/exporter/datadogexporter/factory_test.go
+++ b/exporter/datadogexporter/factory_test.go
@@ -36,7 +36,8 @@ func TestCreateDefaultConfig(t *testing.T) {
 	assert.Equal(t, cfg, &Config{
 		API: APIConfig{Site: "datadoghq.com"},
 		Metrics: MetricsConfig{
-			Mode: DogStatsDMode,
+			Mode:        DogStatsDMode,
+			Percentiles: true,
 			DogStatsD: DogStatsDConfig{
 				Endpoint:  "127.0.0.1:8125",
 				Telemetry: true,
diff --git a/exporter/datadogexporter/go.mod b/exporter/datadogexporter/go.mod
index 135ed09b20d0..4d58c64b778a 100644
--- a/exporter/datadogexporter/go.mod
+++ b/exporter/datadogexporter/go.mod
@@ -8,4 +8,5 @@ require (
 	github.com/stretchr/testify v1.6.1
 	go.opentelemetry.io/collector v0.9.0
 	go.uber.org/zap v1.15.0
+	google.golang.org/protobuf v1.25.0
 )
diff --git a/exporter/datadogexporter/metrics.go b/exporter/datadogexporter/metrics.go
index ab68e40420e9..8fb4898e641c 100644
--- a/exporter/datadogexporter/metrics.go
+++ b/exporter/datadogexporter/metrics.go
@@ -17,6 +17,7 @@ package datadogexporter
 import (
 	"context"
 	"fmt"
+	"math"
 
 	v1 "github.com/census-instrumentation/opencensus-proto/gen-go/metrics/v1"
 	"go.opentelemetry.io/collector/consumer/pdata"
@@ -47,82 +48,45 @@ func newMetricsExporter(logger *zap.Logger, cfg *Config) (MetricsExporter, error
 type MetricType int
 
 const (
-	Count MetricType = iota
-	Gauge
+	Gauge MetricType = iota
 )
 
-type Metric struct {
+type MetricValue struct {
 	hostname   string
-	name       string
 	metricType MetricType
-	fvalue     float64
-	ivalue     int64
+	value      float64
 	tags       []string
 	rate       float64
 }
 
-func (m *Metric) GetHost() string {
+func (m *MetricValue) GetHost() string {
 	return m.hostname
 }
 
-func (m *Metric) GetName() string {
-	return m.name
-}
-
-func (m *Metric) GetType() MetricType {
+func (m *MetricValue) GetType() MetricType {
 	return m.metricType
 }
 
-func (m *Metric) GetValue() interface{} {
-	switch m.metricType {
-	case Count:
-		return m.ivalue
-	case Gauge:
-		return m.fvalue
-	}
-	return nil
+func (m *MetricValue) GetValue() float64 {
+	return m.value
 }
 
-func (m *Metric) GetRate() float64 {
+func (m *MetricValue) GetRate() float64 {
 	return m.rate
 }
 
-func (m *Metric) GetTags() []string {
+func (m *MetricValue) GetTags() []string {
 	return m.tags
 }
 
-func NewMetric(hostname, name string, metricType MetricType, value interface{}, tags []string, rate float64) (*Metric, error) {
-	switch metricType {
-	case Count:
-		ivalue, ok := value.(int64)
-		if !ok {
-			return nil, fmt.Errorf("Incorrect value type for count metric '%s'", name)
-		}
-		return &Metric{
-			hostname:   hostname,
-			name:       name,
-			metricType: Count,
-			ivalue:     ivalue,
-			rate:       rate,
-			tags:       tags,
-		}, nil
-
-	case Gauge:
-		fvalue, ok := value.(float64)
-		if !ok {
-			return nil, fmt.Errorf("Incorrect value type for count metric '%s'", name)
-		}
-		return &Metric{
-			hostname:   hostname,
-			name:       name,
-			metricType: Gauge,
-			fvalue:     fvalue,
-			rate:       rate,
-			tags:       tags,
-		}, nil
+func NewGauge(hostname string, value float64, tags []string) MetricValue {
+	return MetricValue{
+		hostname:   hostname,
+		metricType: Gauge,
+		value:      value,
+		rate:       1,
+		tags:       tags,
 	}
-
-	return nil, fmt.Errorf("Unrecognized Metric type for metric '%s'", name)
 }
 
 type OpenCensusKind int
@@ -134,12 +98,12 @@ const (
 	Summary
 )
 
-func MapMetrics(exp MetricsExporter, md pdata.Metrics) (map[string][]*Metric, int, error) {
+func MapMetrics(exp MetricsExporter, md pdata.Metrics) (map[string][]MetricValue, int, error) {
 	// Transform it into OpenCensus format
 	data := pdatautil.MetricsToMetricsData(md)
 
 	// Mapping from metrics name to data
-	metrics := map[string][]*Metric{}
+	metrics := map[string][]MetricValue{}
 
 	logger := exp.GetLogger()
 
@@ -202,30 +166,105 @@ func MapMetrics(exp MetricsExporter, md pdata.Metrics) (map[string][]*Metric, in
 				}
 
 				for _, point := range timeseries.GetPoints() {
-					// We assume the sampling rate is 1.
-					const defaultRate float64 = 1
-
 					switch kind {
-					case Int64, Double:
-						var value float64
-						if kind == Int64 {
-							value = float64(point.GetInt64Value())
-						} else {
-							value = point.GetDoubleValue()
+					case Int64:
+						metrics[metricName] = append(metrics[metricName],
+							NewGauge(hostname, float64(point.GetInt64Value()), tags),
+						)
+					case Double:
+						metrics[metricName] = append(metrics[metricName],
+							NewGauge(hostname, point.GetDoubleValue(), tags),
+						)
+					case Distribution:
+						// A Distribution metric has:
+						// - The count of values in the population
+						// - The sum of values in the population
+						// - The sum of squared deviations
+						// - A number of buckets, each of them having
+						//    - the bounds that define the bucket
+						//    - the count of the number of items in that bucket
+						//    - a sample value from each bucket
+						//
+						// We follow the implementation on `opencensus-go-exporter-datadog`:
+						// we report the first three values and the buckets count can also
+						// be reported (opt-in), but bounds are ignored.
+
+						dist := point.GetDistributionValue()
+
+						distMetrics := map[string]float64{
+							"count":           float64(dist.GetCount()),
+							"sum":             dist.GetSum(),
+							"squared_dev_sum": dist.GetSumOfSquaredDeviation(),
 						}
 
-						newVal, err := NewMetric(hostname, metricName, Gauge, value, tags, defaultRate)
-						if err != nil {
-							logger.Error("Error when creating Datadog metric, continuing...", nameField, zap.Error(err))
-							continue
+						for suffix, value := range distMetrics {
+							fullName := fmt.Sprintf("%s.%s", metricName, suffix)
+							metrics[fullName] = append(metrics[fullName],
+								NewGauge(hostname, value, tags),
+							)
+						}
+
+						if exp.GetConfig().Metrics.Buckets {
+							// We have a single metric, 'count_per_bucket', which is tagged with the bucket id. See:
+							// https://github.com/DataDog/opencensus-go-exporter-datadog/blob/c3b47f1c6dcf1c47b59c32e8dbb7df5f78162daa/stats.go#L99-L104
+							fullName := fmt.Sprintf("%s.count_per_bucket", metricName)
+
+							for idx, bucket := range dist.GetBuckets() {
+								bucketTags := append(tags, fmt.Sprintf("bucket_idx:%d", idx))
+								metrics[fullName] = append(metrics[fullName],
+									NewGauge(hostname, float64(bucket.GetCount()), bucketTags),
+								)
+							}
 						}
-						metrics[metricName] = append(metrics[metricName], newVal)
-					case Distribution:
-						logger.Warn("Ignoring distribution metric, not implemented yet", nameField)
-						continue
 					case Summary:
-						logger.Warn("Ignoring summary metric, not implemented yet", nameField)
-						continue
+						// A Summary metric has:
+						// - The total sum so far
+						// - The total count so far
+						// - A snapshot with
+						//   - the sum in the current snapshot
+						//   - the count in the current snapshot
+						//   - a series of percentiles
+						//
+						// By default we report the sum and count as gauges and percentiles.
+						// Percentiles are opt-out
+
+						// Report count if available
+						if count := point.GetSummaryValue().GetCount(); count != nil {
+							fullName := fmt.Sprintf("%s.count", metricName)
+							metrics[fullName] = append(metrics[fullName],
+								NewGauge(hostname, float64(count.GetValue()), tags),
+							)
+						}
+
+						// Report sum if available
+						if sum := point.GetSummaryValue().GetSum(); sum != nil {
+							fullName := fmt.Sprintf("%s.sum", metricName)
+							metrics[fullName] = append(metrics[fullName],
+								NewGauge(hostname, sum.GetValue(), tags),
+							)
+						}
+
+						if exp.GetConfig().Metrics.Percentiles {
+							snapshot := point.GetSummaryValue().GetSnapshot()
+							for _, pair := range snapshot.GetPercentileValues() {
+								var fullName string
+								if perc := pair.GetPercentile(); perc == 0 {
+									// p0 is the minimum
+									fullName = fmt.Sprintf("%s.min", metricName)
+								} else if perc == 100 {
+									// p100 is the maximum
+									fullName = fmt.Sprintf("%s.max", metricName)
+								} else {
+									// Round to the nearest digit
+									fullName = fmt.Sprintf("%s.p%02d", metricName, int(math.Round(perc)))
+								}
+
+								metrics[fullName] = append(metrics[fullName],
+									NewGauge(hostname, pair.GetValue(), tags),
+								)
+							}
+						}
+
 					}
 				}
 			}
diff --git a/exporter/datadogexporter/metrics_test.go b/exporter/datadogexporter/metrics_test.go
index 02840b67fd2c..7c90875948f1 100644
--- a/exporter/datadogexporter/metrics_test.go
+++ b/exporter/datadogexporter/metrics_test.go
@@ -24,7 +24,6 @@ import (
 	v1agent "github.com/census-instrumentation/opencensus-proto/gen-go/agent/common/v1"
 	v1 "github.com/census-instrumentation/opencensus-proto/gen-go/metrics/v1"
 	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
 	"go.opentelemetry.io/collector/consumer/consumerdata"
 	"go.opentelemetry.io/collector/consumer/pdata"
 	"go.opentelemetry.io/collector/consumer/pdatautil"
@@ -61,44 +60,18 @@ func (m *MockMetricsExporter) GetRetrySettings() exporterhelper.RetrySettings {
 func TestMetricValue(t *testing.T) {
 	var (
 		hostname string   = "unknown"
-		name     string   = "metric.name"
 		value    float64  = math.Pi
 		tags     []string = []string{"tool:opentelemetry", "version:0.1.0"}
 		rate     float64  = 1
 	)
 
-	metric, err := NewMetric(
-		hostname,
-		name,
-		Gauge,
-		value,
-		tags,
-		rate,
-	)
-
-	assert.Nil(t, err)
-	assert.NotNil(t, metric)
+	metric := NewGauge(hostname, value, tags)
 
 	assert.Equal(t, hostname, metric.GetHost())
-	assert.Equal(t, name, metric.GetName())
 	assert.Equal(t, Gauge, metric.GetType())
 	assert.Equal(t, value, metric.GetValue())
 	assert.Equal(t, tags, metric.GetTags())
 	assert.Equal(t, rate, metric.GetRate())
-
-	// Fail when using incorrect type
-	nilMetric, err := NewMetric(
-		hostname,
-		name,
-		Count,
-		value,
-		tags,
-		rate,
-	)
-
-	require.Error(t, err)
-	assert.Nil(t, nilMetric)
-
 }
 
 var (
@@ -137,27 +110,21 @@ func TestMapNumericMetric(t *testing.T) {
 	assert.Equal(t, 0, droppedTimeSeries)
 	assert.Nil(t, err)
 	assert.Equal(t,
-		map[string][]*Metric{
-			"cumulative.float64.test": {
-				{
-					hostname:   "unknown",
-					name:       "cumulative.float64.test",
-					metricType: Gauge,
-					fvalue:     math.Pi,
-					rate:       1,
-					tags:       testTags[:],
-				},
-			},
-			"gauge.float64.test": {
-				{
-					hostname:   "unknown",
-					name:       "gauge.float64.test",
-					metricType: Gauge,
-					fvalue:     math.Pi,
-					rate:       1,
-					tags:       testTags[:],
-				},
-			},
+		map[string][]MetricValue{
+			"cumulative.float64.test": {{
+				hostname:   "unknown",
+				metricType: Gauge,
+				value:      math.Pi,
+				rate:       1,
+				tags:       testTags[:],
+			}},
+			"gauge.float64.test": {{
+				hostname:   "unknown",
+				metricType: Gauge,
+				value:      math.Pi,
+				rate:       1,
+				tags:       testTags[:],
+			}},
 		},
 		metrics,
 	)
@@ -168,29 +135,130 @@ func TestMapDistributionMetric(t *testing.T) {
 	ts := time.Now()
 	md := NewMetricsData([]*v1.Metric{
 		metricstest.GaugeDist("dist.test", testKeys[:],
-			metricstest.Timeseries(ts, testValues[:], metricstest.DistPt(ts, []float64{}, []int64{}))),
-		metricstest.CumulativeDist("cumulative.dist.test", testKeys[:],
-			metricstest.Timeseries(ts, testValues[:], metricstest.DistPt(ts, []float64{}, []int64{}))),
+			metricstest.Timeseries(ts, testValues[:], metricstest.DistPt(ts, []float64{0.1, 0.2}, []int64{100, 200}))),
 	})
 
-	metrics, _, err := MapMetrics(mockExporter, md)
+	metrics, _, err := MapMetrics(
+		&MockMetricsExporter{cfg: &Config{Metrics: MetricsConfig{Buckets: true}}},
+		md,
+	)
 
-	// Right now they are silently ignored
 	assert.Nil(t, err)
-	assert.Equal(t, map[string][]*Metric{}, metrics)
+	assert.Equal(t, map[string][]MetricValue{
+		"dist.test.count": {{
+			hostname:   "unknown",
+			metricType: Gauge,
+			value:      300,
+			rate:       1,
+			tags:       testTags[:],
+		}},
+		"dist.test.sum": {{
+			hostname:   "unknown",
+			metricType: Gauge,
+			// The sum value is approximated by the metricstest
+			// package using lower bounds and counts:
+			// sum = ∑ bound(i-1) * count(i)
+			//     = 0*100 + 0.1*200
+			//     = 20
+			value: 20,
+			rate:  1,
+			tags:  testTags[:],
+		}},
+		"dist.test.squared_dev_sum": {{
+			hostname:   "unknown",
+			metricType: Gauge,
+			// The sum of squared deviations is set to 0 by the
+			// metricstest package
+			value: 0,
+			rate:  1,
+			tags:  testTags[:],
+		}},
+		"dist.test.count_per_bucket": {
+			{
+				hostname:   "unknown",
+				metricType: Gauge,
+				value:      100,
+				rate:       1,
+				tags:       append(testTags[:], "bucket_idx:0"),
+			},
+
+			{
+				hostname:   "unknown",
+				metricType: Gauge,
+				value:      200,
+				rate:       1,
+				tags:       append(testTags[:], "bucket_idx:1"),
+			},
+		},
+	},
+		metrics,
+	)
 }
 
 func TestMapSummaryMetric(t *testing.T) {
 	ts := time.Now()
 
+	summaryPoint := metricstest.Timeseries(
+		ts,
+		testValues[:],
+		metricstest.SummPt(ts, 5, 23, []float64{0, 50.1, 95, 100}, []float64{1, 22, 100, 300}),
+	)
+
 	md := NewMetricsData([]*v1.Metric{
-		metricstest.Summary("summary.test", testKeys[:],
-			metricstest.Timeseries(ts, testValues[:], metricstest.SummPt(ts, 2, 10, []float64{}, []float64{}))),
+		metricstest.Summary("summary.test", testKeys[:], summaryPoint),
 	})
 
-	metrics, _, err := MapMetrics(mockExporter, md)
+	metrics, _, err := MapMetrics(
+		// Enable percentiles for test
+		&MockMetricsExporter{cfg: &Config{Metrics: MetricsConfig{Percentiles: true}}},
+		md,
+	)
 
-	// Right now they are silently ignored
 	assert.Nil(t, err)
-	assert.Equal(t, map[string][]*Metric{}, metrics)
+	assert.Equal(t, map[string][]MetricValue{
+		"summary.test.sum": {{
+			hostname:   "unknown",
+			metricType: Gauge,
+			value:      23,
+			rate:       1,
+			tags:       testTags[:],
+		}},
+		"summary.test.count": {{
+			hostname:   "unknown",
+			metricType: Gauge,
+			value:      5,
+			rate:       1,
+			tags:       testTags[:],
+		}},
+		"summary.test.p50": {{
+			hostname:   "unknown",
+			metricType: Gauge,
+			value:      22,
+			rate:       1,
+			tags:       testTags[:],
+		}},
+		"summary.test.p95": {{
+			hostname:   "unknown",
+			metricType: Gauge,
+			value:      100,
+			rate:       1,
+			tags:       testTags[:],
+		}},
+		"summary.test.min": {{
+			hostname:   "unknown",
+			metricType: Gauge,
+			value:      1,
+			rate:       1,
+			tags:       testTags[:],
+		}},
+		"summary.test.max": {{
+			hostname:   "unknown",
+			metricType: Gauge,
+			value:      300,
+			rate:       1,
+			tags:       testTags[:],
+		}},
+	},
+		metrics,
+	)
 }
diff --git a/exporter/datadogexporter/testdata/config.yaml b/exporter/datadogexporter/testdata/config.yaml
index b7731e0e0cdf..d6da965fc968 100644
--- a/exporter/datadogexporter/testdata/config.yaml
+++ b/exporter/datadogexporter/testdata/config.yaml
@@ -21,6 +21,7 @@ exporters:
     metrics:
       mode: agentless
       namespace: opentelemetry
+      report_percentiles: false
 
   datadog/dogstatsd:
     metrics: