Skip to content

Commit

Permalink
Merge pull request #305 from buildkite/remove-deleted-queues
Browse files Browse the repository at this point in the history
Delete Prometheus labels for deleted queues
  • Loading branch information
DrJosh9000 authored Sep 10, 2024
2 parents 5dd97e6 + 152e57a commit 9ad2cd1
Showing 1 changed file with 31 additions and 10 deletions.
41 changes: 31 additions & 10 deletions backend/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@ var (
// Note: these metrics are not unique to a cluster / queue, as these labels are added to the
// value when it is set.
type Prometheus struct {
totals map[string]*prometheus.GaugeVec
queues map[string]*prometheus.GaugeVec
totals map[string]*prometheus.GaugeVec
queues map[string]*prometheus.GaugeVec
oldQueues map[string]map[string]struct{} // cluster -> set of queues in cluster from last collect
}

func NewPrometheusBackend() *Prometheus {
return &Prometheus{
totals: make(map[string]*prometheus.GaugeVec),
queues: make(map[string]*prometheus.GaugeVec),
totals: make(map[string]*prometheus.GaugeVec),
queues: make(map[string]*prometheus.GaugeVec),
oldQueues: make(map[string]map[string]struct{}),
}
}

Expand All @@ -46,38 +48,57 @@ func (p *Prometheus) Serve(path, addr string) {
// Note: This is called once per agent token per interval
func (p *Prometheus) Collect(r *collector.Result) error {
for name, value := range r.Totals {
labelNames := []string{"cluster"}
gauge, ok := p.totals[name]
if !ok { // first time this metric has been seen so create a new gauge
gauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: fmt.Sprintf("buildkite_total_%s", camelToUnderscore(name)),
Help: fmt.Sprintf("Buildkite Total: %s", name),
}, labelNames)
}, []string{"cluster"})
prometheus.MustRegister(gauge)
p.totals[name] = gauge
}

// note that r.Cluster will be empty for unclustered agents, this label will be dropped by prometheus
gauge.WithLabelValues(r.Cluster).Set(float64(value))
gauge.With(prometheus.Labels{"cluster": r.Cluster}).Set(float64(value))
}

currentQueues := make(map[string]struct{})
oldQueues := p.oldQueues[r.Cluster]
for queue, counts := range r.Queues {
currentQueues[queue] = struct{}{}
delete(oldQueues, queue) // still current

for name, value := range counts {
gauge, ok := p.queues[name]
if !ok { // first time this metric has been seen so create a new gauge
labelNames := []string{"queue", "cluster"}
gauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: fmt.Sprintf("buildkite_queues_%s", camelToUnderscore(name)),
Help: fmt.Sprintf("Buildkite Queues: %s", name),
}, labelNames)
}, []string{"queue", "cluster"})
prometheus.MustRegister(gauge)
p.queues[name] = gauge
}

// note that r.Cluster will be empty for unclustered agents, this label will be dropped by prometheus
gauge.WithLabelValues(queue, r.Cluster).Set(float64(value))
gauge.With(prometheus.Labels{
"cluster": r.Cluster,
"queue": queue,
}).Set(float64(value))
}
}

// oldQueues contains queues that were in the previous collector result, but
// are no longer present.
// This is to prevent accumulating label values for deleted queues.
for queue := range oldQueues {
for _, gauge := range p.queues {
gauge.Delete(prometheus.Labels{
"cluster": r.Cluster,
"queue": queue,
})
}
}
p.oldQueues[r.Cluster] = currentQueues

return nil
}
Expand Down

0 comments on commit 9ad2cd1

Please sign in to comment.