Skip to content

Commit

Permalink
Merge pull request #296 from buildkite/fix_more_selective_reset_of_ga…
Browse files Browse the repository at this point in the history
…uges

fix to remove reset in prometheus backend
  • Loading branch information
wolfeidau authored Jul 22, 2024
2 parents e34824b + 496346c commit e173cfd
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 15 deletions.
31 changes: 16 additions & 15 deletions backend/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,20 @@ var (
camel = regexp.MustCompile("(^[^A-Z0-9]*|[A-Z0-9]*)([A-Z0-9][^A-Z]+|$)")
)

// Prometheus this holds a list of prometheus gauges which have been created, one for each metric
// that we want to expose. These are created on the fly as we receive metrics from the agent.
//
// Note: these metrics are not unique to a cluster / queue, as these labels are added to the
// value when it is set.
type Prometheus struct {
totals map[string]*prometheus.GaugeVec
queues map[string]*prometheus.GaugeVec
pipelines map[string]*prometheus.GaugeVec
totals map[string]*prometheus.GaugeVec
queues map[string]*prometheus.GaugeVec
}

func NewPrometheusBackend() *Prometheus {
return &Prometheus{
totals: make(map[string]*prometheus.GaugeVec),
queues: make(map[string]*prometheus.GaugeVec),
pipelines: make(map[string]*prometheus.GaugeVec),
totals: make(map[string]*prometheus.GaugeVec),
queues: make(map[string]*prometheus.GaugeVec),
}
}

Expand All @@ -38,16 +41,14 @@ func (p *Prometheus) Serve(path, addr string) {
log.Fatal(http.ListenAndServe(addr, m))
}

// Collect receives a set of metrics from the agent and creates or updates the prometheus gauges
//
// Note: This is called once per agent token per interval
func (p *Prometheus) Collect(r *collector.Result) error {
// Clear the gauges to prevent stale values from persisting forever.
for _, gauge := range p.queues {
gauge.Reset()
}

for name, value := range r.Totals {
labelNames := []string{"cluster"}
gauge, ok := p.totals[name]
if !ok {
if !ok { // first time this metric has been seen so create a new gauge
gauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: fmt.Sprintf("buildkite_total_%s", camelToUnderscore(name)),
Help: fmt.Sprintf("Buildkite Total: %s", name),
Expand All @@ -56,14 +57,14 @@ func (p *Prometheus) Collect(r *collector.Result) error {
p.totals[name] = gauge
}

// note that r.Cluster will be empty for unclustered agents, this label will be dropped by promethues
// note that r.Cluster will be empty for unclustered agents, this label will be dropped by prometheus
gauge.WithLabelValues(r.Cluster).Set(float64(value))
}

for queue, counts := range r.Queues {
for name, value := range counts {
gauge, ok := p.queues[name]
if !ok {
if !ok { // first time this metric has been seen so create a new gauge
labelNames := []string{"queue", "cluster"}
gauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: fmt.Sprintf("buildkite_queues_%s", camelToUnderscore(name)),
Expand All @@ -73,7 +74,7 @@ func (p *Prometheus) Collect(r *collector.Result) error {
p.queues[name] = gauge
}

// note that r.Cluster will be empty for unclustered agents, this label will be dropped by promethues
// note that r.Cluster will be empty for unclustered agents, this label will be dropped by prometheus
gauge.WithLabelValues(queue, r.Cluster).Set(float64(value))
}
}
Expand Down
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
var metricsBackend backend.Backend

func main() {
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)

var (
interval = flag.Duration("interval", 0, "Update metrics every interval, rather than once")
showVersion = flag.Bool("version", false, "Show the version")
Expand Down

0 comments on commit e173cfd

Please sign in to comment.