From d427ab70c1c71cd9b7160172bd1133dab2282d01 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 15 Nov 2017 13:21:06 -0800 Subject: [PATCH 1/2] Only publish metric when the task is running and dev mode publishes metrics --- client/task_runner.go | 8 ++++++++ command/agent/config.go | 3 +++ 2 files changed, 11 insertions(+) diff --git a/client/task_runner.go b/client/task_runner.go index f79bb87e774..a6d230a62b8 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -1891,6 +1891,14 @@ func (r *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) { return } + // If the task is not running don't emit anything + r.runningLock.Lock() + running := r.running + r.runningLock.Unlock() + if !running { + return + } + if ru.ResourceUsage.MemoryStats != nil { r.setGaugeForMemory(ru) } diff --git a/command/agent/config.go b/command/agent/config.go index d4407acf086..08e8d2b7899 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -545,6 +545,9 @@ func DevConfig() *Config { conf.Client.GCDiskUsageThreshold = 99 conf.Client.GCInodeUsageThreshold = 99 conf.Client.GCMaxAllocs = 50 + conf.Telemetry.PrometheusMetrics = true + conf.Telemetry.PublishAllocationMetrics = true + conf.Telemetry.PublishNodeMetrics = true return conf } From 85b27621edf0690e7fcbe82f649e073e32b260f8 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 16 Nov 2017 10:42:49 -0800 Subject: [PATCH 2/2] Vendor in fixed go-metrics --- vendor/github.com/armon/go-metrics/inmem.go | 4 +- .../armon/go-metrics/prometheus/prometheus.go | 96 ++++++++++++++++--- vendor/vendor.json | 20 ++-- 3 files changed, 96 insertions(+), 24 deletions(-) diff --git a/vendor/github.com/armon/go-metrics/inmem.go b/vendor/github.com/armon/go-metrics/inmem.go index cd177304213..8fe1de8023c 100644 --- a/vendor/github.com/armon/go-metrics/inmem.go +++ b/vendor/github.com/armon/go-metrics/inmem.go @@ -70,7 +70,7 @@ func NewIntervalMetrics(intv time.Time) *IntervalMetrics { // about a sample type AggregateSample struct { Count int // The count of emitted pairs - Rate float64 `json:"-"` // The count of emitted pairs per time unit (usually 1 second) + Rate float64 // The values rate per time unit (usually 1 second) Sum float64 // The sum of values SumSq float64 `json:"-"` // The sum of squared values Min float64 // Minimum value @@ -107,7 +107,7 @@ func (a *AggregateSample) Ingest(v float64, rateDenom float64) { if v > a.Max || a.Count == 1 { a.Max = v } - a.Rate = float64(a.Count) / rateDenom + a.Rate = float64(a.Sum) / rateDenom a.LastUpdated = time.Now() } diff --git a/vendor/github.com/armon/go-metrics/prometheus/prometheus.go b/vendor/github.com/armon/go-metrics/prometheus/prometheus.go index a647e59659b..a5b27d6d3e7 100644 --- a/vendor/github.com/armon/go-metrics/prometheus/prometheus.go +++ b/vendor/github.com/armon/go-metrics/prometheus/prometheus.go @@ -13,19 +13,91 @@ import ( "github.com/prometheus/client_golang/prometheus" ) +var ( + // DefaultPrometheusOpts is the default set of options used when creating a + // PrometheusSink. + DefaultPrometheusOpts = PrometheusOpts{ + Expiration: 60 * time.Second, + } +) + +// PrometheusOpts is used to configure the Prometheus Sink +type PrometheusOpts struct { + // Expiration is the duration a metric is valid for, after which it will be + // untracked. If the value is zero, a metric is never expired. + Expiration time.Duration +} + type PrometheusSink struct { - mu sync.Mutex - gauges map[string]prometheus.Gauge - summaries map[string]prometheus.Summary - counters map[string]prometheus.Counter + mu sync.Mutex + gauges map[string]prometheus.Gauge + summaries map[string]prometheus.Summary + counters map[string]prometheus.Counter + updates map[string]time.Time + expiration time.Duration } +// NewPrometheusSink creates a new PrometheusSink using the default options. func NewPrometheusSink() (*PrometheusSink, error) { - return &PrometheusSink{ - gauges: make(map[string]prometheus.Gauge), - summaries: make(map[string]prometheus.Summary), - counters: make(map[string]prometheus.Counter), - }, nil + return NewPrometheusSinkFrom(DefaultPrometheusOpts) +} + +// NewPrometheusSinkFrom creates a new PrometheusSink using the passed options. +func NewPrometheusSinkFrom(opts PrometheusOpts) (*PrometheusSink, error) { + sink := &PrometheusSink{ + gauges: make(map[string]prometheus.Gauge), + summaries: make(map[string]prometheus.Summary), + counters: make(map[string]prometheus.Counter), + updates: make(map[string]time.Time), + expiration: opts.Expiration, + } + + return sink, prometheus.Register(sink) +} + +// Describe is needed to meet the Collector interface. +func (p *PrometheusSink) Describe(c chan<- *prometheus.Desc) { + // We must emit some description otherwise an error is returned. This + // description isn't shown to the user! + prometheus.NewGauge(prometheus.GaugeOpts{Name: "Dummy", Help: "Dummy"}).Describe(c) +} + +// Collect meets the collection interface and allows us to enforce our expiration +// logic to clean up ephemeral metrics if their value haven't been set for a +// duration exceeding our allowed expiration time. +func (p *PrometheusSink) Collect(c chan<- prometheus.Metric) { + p.mu.Lock() + defer p.mu.Unlock() + + expire := p.expiration != 0 + now := time.Now() + for k, v := range p.gauges { + last := p.updates[k] + if expire && last.Add(p.expiration).Before(now) { + delete(p.updates, k) + delete(p.gauges, k) + } else { + v.Collect(c) + } + } + for k, v := range p.summaries { + last := p.updates[k] + if expire && last.Add(p.expiration).Before(now) { + delete(p.updates, k) + delete(p.summaries, k) + } else { + v.Collect(c) + } + } + for k, v := range p.counters { + last := p.updates[k] + if expire && last.Add(p.expiration).Before(now) { + delete(p.updates, k) + delete(p.counters, k) + } else { + v.Collect(c) + } + } } var forbiddenChars = regexp.MustCompile("[ .=\\-]") @@ -65,10 +137,10 @@ func (p *PrometheusSink) SetGaugeWithLabels(parts []string, val float32, labels Help: key, ConstLabels: prometheusLabels(labels), }) - prometheus.MustRegister(g) p.gauges[hash] = g } g.Set(float64(val)) + p.updates[hash] = time.Now() } func (p *PrometheusSink) AddSample(parts []string, val float32) { @@ -87,10 +159,10 @@ func (p *PrometheusSink) AddSampleWithLabels(parts []string, val float32, labels MaxAge: 10 * time.Second, ConstLabels: prometheusLabels(labels), }) - prometheus.MustRegister(g) p.summaries[hash] = g } g.Observe(float64(val)) + p.updates[hash] = time.Now() } // EmitKey is not implemented. Prometheus doesn’t offer a type for which an @@ -114,8 +186,8 @@ func (p *PrometheusSink) IncrCounterWithLabels(parts []string, val float32, labe Help: key, ConstLabels: prometheusLabels(labels), }) - prometheus.MustRegister(g) p.counters[hash] = g } g.Add(float64(val)) + p.updates[hash] = time.Now() } diff --git a/vendor/vendor.json b/vendor/vendor.json index d9151e33ca1..979051e5072 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -88,28 +88,28 @@ "revision": "bbbad097214e2918d8543d5201d12bfd7bca254d" }, { - "checksumSHA1": "0et4hA6AYqZCgYiY+c6Z17t3k3k=", + "checksumSHA1": "xp/2s4XclLL17DThGBI7jXZ4Crs=", "path": "github.com/armon/go-metrics", - "revision": "023a4bbe4bb9bfb23ee7e1afc8d0abad217641f3", - "revisionTime": "2017-08-09T01:16:44Z" + "revision": "6c3acc97c61d04290a8ba2e54640151f54c1546a", + "revisionTime": "2017-11-16T18:41:20Z" }, { "checksumSHA1": "xCsGGM9TKBogZDfSN536KtQdLko=", "path": "github.com/armon/go-metrics/circonus", - "revision": "023a4bbe4bb9bfb23ee7e1afc8d0abad217641f3", - "revisionTime": "2017-08-09T01:16:44Z" + "revision": "6c3acc97c61d04290a8ba2e54640151f54c1546a", + "revisionTime": "2017-11-16T18:41:20Z" }, { "checksumSHA1": "Dt0n1sSivvvdZQdzc4Hu/yOG+T0=", "path": "github.com/armon/go-metrics/datadog", - "revision": "023a4bbe4bb9bfb23ee7e1afc8d0abad217641f3", - "revisionTime": "2017-08-09T01:16:44Z" + "revision": "6c3acc97c61d04290a8ba2e54640151f54c1546a", + "revisionTime": "2017-11-16T18:41:20Z" }, { - "checksumSHA1": "NER1U5W8xgC+tAxVUuEckTffFsE=", + "checksumSHA1": "XfPPXw55zKziOWnZbkEGEJ96O9c=", "path": "github.com/armon/go-metrics/prometheus", - "revision": "0a12dc6f6b9da6da644031a1b9b5a85478c5ee27", - "revisionTime": "2017-09-13T18:48:37Z" + "revision": "6c3acc97c61d04290a8ba2e54640151f54c1546a", + "revisionTime": "2017-11-16T18:41:20Z" }, { "checksumSHA1": "gNO0JNpLzYOdInGeq7HqMZUzx9M=",