diff --git a/backend/prometheus.go b/backend/prometheus.go index 7d10eb1..ce51b2b 100644 --- a/backend/prometheus.go +++ b/backend/prometheus.go @@ -23,14 +23,16 @@ var ( // Note: these metrics are not unique to a cluster / queue, as these labels are added to the // value when it is set. type Prometheus struct { - totals map[string]*prometheus.GaugeVec - queues map[string]*prometheus.GaugeVec + totals map[string]*prometheus.GaugeVec + queues map[string]*prometheus.GaugeVec + oldQueues map[string]map[string]struct{} // cluster -> set of queues in cluster from last collect } func NewPrometheusBackend() *Prometheus { return &Prometheus{ - totals: make(map[string]*prometheus.GaugeVec), - queues: make(map[string]*prometheus.GaugeVec), + totals: make(map[string]*prometheus.GaugeVec), + queues: make(map[string]*prometheus.GaugeVec), + oldQueues: make(map[string]map[string]struct{}), } } @@ -46,38 +48,57 @@ func (p *Prometheus) Serve(path, addr string) { // Note: This is called once per agent token per interval func (p *Prometheus) Collect(r *collector.Result) error { for name, value := range r.Totals { - labelNames := []string{"cluster"} gauge, ok := p.totals[name] if !ok { // first time this metric has been seen so create a new gauge gauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: fmt.Sprintf("buildkite_total_%s", camelToUnderscore(name)), Help: fmt.Sprintf("Buildkite Total: %s", name), - }, labelNames) + }, []string{"cluster"}) prometheus.MustRegister(gauge) p.totals[name] = gauge } // note that r.Cluster will be empty for unclustered agents, this label will be dropped by prometheus - gauge.WithLabelValues(r.Cluster).Set(float64(value)) + gauge.With(prometheus.Labels{"cluster": r.Cluster}).Set(float64(value)) } + currentQueues := make(map[string]struct{}) + oldQueues := p.oldQueues[r.Cluster] for queue, counts := range r.Queues { + currentQueues[queue] = struct{}{} + delete(oldQueues, queue) // still current + for name, value := range counts { gauge, ok := p.queues[name] if !ok { // first time this metric has been seen so create a new gauge - labelNames := []string{"queue", "cluster"} gauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: fmt.Sprintf("buildkite_queues_%s", camelToUnderscore(name)), Help: fmt.Sprintf("Buildkite Queues: %s", name), - }, labelNames) + }, []string{"queue", "cluster"}) prometheus.MustRegister(gauge) p.queues[name] = gauge } // note that r.Cluster will be empty for unclustered agents, this label will be dropped by prometheus - gauge.WithLabelValues(queue, r.Cluster).Set(float64(value)) + gauge.With(prometheus.Labels{ + "cluster": r.Cluster, + "queue": queue, + }).Set(float64(value)) + } + } + + // oldQueues contains queues that were in the previous collector result, but + // are no longer present. + // This is to prevent accumulating label values for deleted queues. + for queue := range oldQueues { + for _, gauge := range p.queues { + gauge.Delete(prometheus.Labels{ + "cluster": r.Cluster, + "queue": queue, + }) } } + p.oldQueues[r.Cluster] = currentQueues return nil }