From 85fdfb44b8661fa83073ee41b5f7dfb88c26b038 Mon Sep 17 00:00:00 2001 From: Calle Pettersson Date: Wed, 15 May 2019 21:22:29 +0200 Subject: [PATCH 1/4] Abort scrapes after configurable timeout --- exporter.go | 87 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 7 deletions(-) diff --git a/exporter.go b/exporter.go index 8197865a8..9a1404746 100644 --- a/exporter.go +++ b/exporter.go @@ -23,7 +23,8 @@ import ( // WmiCollector implements the prometheus.Collector interface. type WmiCollector struct { - collectors map[string]collector.Collector + maxScrapeDuration time.Duration + collectors map[string]collector.Collector } const ( @@ -45,6 +46,12 @@ var ( []string{"collector"}, nil, ) + scrapeTimeoutDesc = prometheus.NewDesc( + prometheus.BuildFQName(collector.Namespace, "exporter", "collector_timeout"), + "wmi_exporter: Whether the collector timed out.", + []string{"collector"}, + nil, + ) // This can be removed when client_golang exposes this on Windows // (See https://github.com/prometheus/client_golang/issues/376) @@ -65,8 +72,7 @@ func (coll WmiCollector) Describe(ch chan<- *prometheus.Desc) { } // Collect sends the collected metrics from each of the collectors to -// prometheus. Collect could be called several times concurrently -// and thus its run is protected by a single mutex. +// prometheus. func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) { scrapeContext, err := collector.PrepareScrapeContext() if err != nil { @@ -74,12 +80,40 @@ func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) { return } + remainingCollectors := make(map[string]bool) + for name := range coll.collectors { + remainingCollectors[name] = true + } + + metricsBuffer := make(chan prometheus.Metric) + allDone := make(chan struct{}) + stopped := false + go func() { + for { + select { + case m := <-metricsBuffer: + if !stopped { + ch <- m + } + case <-allDone: + return + } + } + }() + wg := sync.WaitGroup{} wg.Add(len(coll.collectors)) + go func() { + wg.Wait() + close(allDone) + close(metricsBuffer) + }() + for name, c := range coll.collectors { go func(name string, c collector.Collector) { - execute(name, c, scrapeContext, ch) + execute(name, c, scrapeContext, metricsBuffer) wg.Done() + delete(remainingCollectors, name) }(name, c) } @@ -88,7 +122,33 @@ func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) { prometheus.CounterValue, startTime, ) - wg.Wait() + + select { + case <-allDone: + stopped = true + return + case <-time.After(coll.maxScrapeDuration): + stopped = true + remainingCollectorNames := make([]string, 0, len(remainingCollectors)) + for rc := range remainingCollectors { + remainingCollectorNames = append(remainingCollectorNames, rc) + } + log.Warn("Collection timed out, still waiting for ", remainingCollectorNames) + for name := range remainingCollectors { + ch <- prometheus.MustNewConstMetric( + scrapeSuccessDesc, + prometheus.GaugeValue, + 0.0, + name, + ) + ch <- prometheus.MustNewConstMetric( + scrapeTimeoutDesc, + prometheus.GaugeValue, + 1.0, + name, + ) + } + } } func filterAvailableCollectors(collectors string) string { @@ -127,6 +187,12 @@ func execute(name string, c collector.Collector, ctx *collector.ScrapeContext, c success, name, ) + ch <- prometheus.MustNewConstMetric( + scrapeTimeoutDesc, + prometheus.GaugeValue, + 0.0, + name, + ) } func expandEnabledCollectors(enabled string) []string { @@ -198,6 +264,10 @@ func main() { "collectors.print", "If true, print available collectors and exit.", ).Bool() + maxScrapeDuration = kingpin.Flag( + "scrape.max-duration", + "Time after which collectors are aborted during a scrape", + ).Default("30s").Duration() ) log.AddFlags(kingpin.CommandLine) @@ -242,8 +312,11 @@ func main() { log.Infof("Enabled collectors: %v", strings.Join(keys(collectors), ", ")) - nodeCollector := WmiCollector{collectors: collectors} - prometheus.MustRegister(nodeCollector) + exporter := WmiCollector{ + collectors: collectors, + maxScrapeDuration: *maxScrapeDuration, + } + prometheus.MustRegister(exporter) http.Handle(*metricsPath, promhttp.Handler()) http.HandleFunc("/health", healthCheck) From daa6f3d111ba34a9eb64cdfbe5160192b4566e1a Mon Sep 17 00:00:00 2001 From: Calle Pettersson Date: Sat, 25 May 2019 14:43:36 +0200 Subject: [PATCH 2/4] Support releases with prerelease tags --- appveyor.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 101a4a7b4..232026053 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -40,12 +40,13 @@ after_build: return } $ErrorActionPreference = "Stop" + $BuildVersion = Get-Content VERSION # The MSI version is not semver compliant, so just take the numerical parts - $Version = $env:APPVEYOR_REPO_TAG_NAME -replace '^v?([0-9\.]+).*$','$1' + $MSIVersion = $env:APPVEYOR_REPO_TAG_NAME -replace '^v?([0-9\.]+).*$','$1' foreach($Arch in "amd64","386") { - Write-Verbose "Building wmi_exporter $Version msi for $Arch" - .\installer\build.ps1 -PathToExecutable .\output\$Arch\wmi_exporter-$Version-$Arch.exe -Version $Version -Arch "$Arch" - Move-Item installer\Output\wmi_exporter-$Version-$Arch.msi output\$Arch\ + Write-Verbose "Building wmi_exporter $MSIVersion msi for $Arch" + .\installer\build.ps1 -PathToExecutable .\output\$Arch\wmi_exporter-$BuildVersion-$Arch.exe -Version $MSIVersion -Arch "$Arch" + Move-Item installer\Output\wmi_exporter-$MSIVersion-$Arch.msi output\$Arch\ } - promu checksum output\ From 411954cf9dd900ede320a51a8e1cd3e5909e4a69 Mon Sep 17 00:00:00 2001 From: Calle Pettersson Date: Sun, 23 Jun 2019 22:01:43 +0200 Subject: [PATCH 3/4] Create custom metrics endpoint to read timeout from request header --- exporter.go | 79 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 16 deletions(-) diff --git a/exporter.go b/exporter.go index 9a1404746..49ba149d0 100644 --- a/exporter.go +++ b/exporter.go @@ -5,7 +5,9 @@ package main import ( "fmt" "net/http" + "os" "sort" + "strconv" "strings" "sync" "time" @@ -52,6 +54,12 @@ var ( []string{"collector"}, nil, ) + snapshotDuration = prometheus.NewDesc( + prometheus.BuildFQName(collector.Namespace, "exporter", "perflib_snapshot_duration_seconds"), + "Duration of perflib snapshot capture", + nil, + nil, + ) // This can be removed when client_golang exposes this on Windows // (See https://github.com/prometheus/client_golang/issues/376) @@ -74,7 +82,13 @@ func (coll WmiCollector) Describe(ch chan<- *prometheus.Desc) { // Collect sends the collected metrics from each of the collectors to // prometheus. func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) { + t := time.Now() scrapeContext, err := collector.PrepareScrapeContext() + ch <- prometheus.MustNewConstMetric( + snapshotDuration, + prometheus.GaugeValue, + time.Since(t).Seconds(), + ) if err != nil { ch <- prometheus.NewInvalidMetric(scrapeSuccessDesc, fmt.Errorf("failed to prepare scrape: %v", err)) return @@ -91,8 +105,8 @@ func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) { go func() { for { select { - case m := <-metricsBuffer: - if !stopped { + case m, ok := <-metricsBuffer: + if ok && !stopped { ch <- m } case <-allDone: @@ -111,8 +125,8 @@ func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) { for name, c := range coll.collectors { go func(name string, c collector.Collector) { + defer wg.Done() execute(name, c, scrapeContext, metricsBuffer) - wg.Done() delete(remainingCollectors, name) }(name, c) } @@ -229,10 +243,6 @@ func loadCollectors(list string) (map[string]collector.Collector, error) { return collectors, nil } -func init() { - prometheus.MustRegister(version.NewCollector("wmi_exporter")) -} - func initWbem() { // This initialization prevents a memory leak on WMF 5+. See // https://github.com/martinlindhe/wmi_exporter/issues/77 and linked issues @@ -264,10 +274,10 @@ func main() { "collectors.print", "If true, print available collectors and exit.", ).Bool() - maxScrapeDuration = kingpin.Flag( - "scrape.max-duration", - "Time after which collectors are aborted during a scrape", - ).Default("30s").Duration() + timeoutMargin = kingpin.Flag( + "scrape.timeout-margin", + "Seconds to subtract from the timeout allowed by the client. Tune to allow for overhead or high loads.", + ).Default("0.5").Float64() ) log.AddFlags(kingpin.CommandLine) @@ -312,13 +322,17 @@ func main() { log.Infof("Enabled collectors: %v", strings.Join(keys(collectors), ", ")) - exporter := WmiCollector{ - collectors: collectors, - maxScrapeDuration: *maxScrapeDuration, + h := &metricsHandler{ + timeoutMargin: *timeoutMargin, + collectorFactory: func(timeout time.Duration) *WmiCollector { + return &WmiCollector{ + collectors: collectors, + maxScrapeDuration: timeout, + } + }, } - prometheus.MustRegister(exporter) - http.Handle(*metricsPath, promhttp.Handler()) + http.Handle(*metricsPath, h) http.HandleFunc("/health", healthCheck) http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, *metricsPath, http.StatusMovedPermanently) @@ -382,3 +396,36 @@ loop: changes <- svc.Status{State: svc.StopPending} return } + +type metricsHandler struct { + timeoutMargin float64 + collectorFactory func(timeout time.Duration) *WmiCollector +} + +func (mh *metricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + const defaultTimeout = 10.0 + + var timeoutSeconds float64 + if v := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds"); v != "" { + var err error + timeoutSeconds, err = strconv.ParseFloat(v, 64) + if err != nil { + log.Warnf("Couldn't parse X-Prometheus-Scrape-Timeout-Seconds: %q. Defaulting timeout to %d", v, defaultTimeout) + } + } + if timeoutSeconds == 0 { + timeoutSeconds = defaultTimeout + } + timeoutSeconds = timeoutSeconds - mh.timeoutMargin + + reg := prometheus.NewRegistry() + reg.MustRegister(mh.collectorFactory(time.Duration(timeoutSeconds * float64(time.Second)))) + reg.MustRegister( + prometheus.NewProcessCollector(os.Getpid(), ""), + prometheus.NewGoCollector(), + version.NewCollector("wmi_exporter"), + ) + + h := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) + h.ServeHTTP(w, r) +} From c5f23b4e6420d0eb9a310d953f9e2c184f013f17 Mon Sep 17 00:00:00 2001 From: Calle Pettersson Date: Mon, 24 Jun 2019 21:55:33 +0200 Subject: [PATCH 4/4] Fix float-format --- exporter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter.go b/exporter.go index 49ba149d0..fa88aa6f6 100644 --- a/exporter.go +++ b/exporter.go @@ -410,7 +410,7 @@ func (mh *metricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { var err error timeoutSeconds, err = strconv.ParseFloat(v, 64) if err != nil { - log.Warnf("Couldn't parse X-Prometheus-Scrape-Timeout-Seconds: %q. Defaulting timeout to %d", v, defaultTimeout) + log.Warnf("Couldn't parse X-Prometheus-Scrape-Timeout-Seconds: %q. Defaulting timeout to %f", v, defaultTimeout) } } if timeoutSeconds == 0 {