diff --git a/.run/all.run.xml b/.run/all.run.xml index 887b5de80..a6633cb0e 100644 --- a/.run/all.run.xml +++ b/.run/all.run.xml @@ -2,7 +2,7 @@ - + diff --git a/README.md b/README.md index a0c4036c0..9b85380cf 100644 --- a/README.md +++ b/README.md @@ -199,6 +199,14 @@ Windows Server 2012 and 2012R2 are supported as best-effort only, but not guaran The prometheus metrics will be exposed on [localhost:9182](http://localhost:9182) +### HTTP Endpoints + +windows_exporter provides the following HTTP endpoints: + +* `/metrics`: Exposes metrics in the [Prometheus text format](https://prometheus.io/docs/instrumenting/exposition_formats/). +* `/health`: Returns 200 OK when the exporter is running. +* `/debug/pprof/`: Exposes the [pprof](https://golang.org/pkg/net/http/pprof/) endpoints. Only, if `--debug.enabled` is set. + ## Examples ### Enable only service collector and specify a custom query diff --git a/cmd/windows_exporter/main.go b/cmd/windows_exporter/main.go index d0b596e9b..879176f9b 100644 --- a/cmd/windows_exporter/main.go +++ b/cmd/windows_exporter/main.go @@ -87,10 +87,6 @@ func run() int { "web.disable-exporter-metrics", "Exclude metrics about the exporter itself (promhttp_*, process_*, go_*).", ).Bool() - maxRequests = app.Flag( - "telemetry.max-requests", - "Maximum number of concurrent requests. 0 to disable.", - ).Default("5").Int() enabledCollectors = app.Flag( "collectors.enabled", "Comma-separated list of collectors to use. Use '[defaults]' as a placeholder for all the collectors enabled by default."). @@ -220,7 +216,6 @@ func run() int { mux.Handle("GET "+*metricsPath, httphandler.New(logger, collectors, &httphandler.Options{ DisableExporterMetrics: *disableExporterMetrics, TimeoutMargin: *timeoutMargin, - MaxRequests: *maxRequests, })) if *debugEnabled { diff --git a/internal/httphandler/httphandler.go b/internal/httphandler/httphandler.go index 299102910..4d706d5dd 100644 --- a/internal/httphandler/httphandler.go +++ b/internal/httphandler/httphandler.go @@ -49,7 +49,6 @@ type MetricsHTTPHandler struct { type Options struct { DisableExporterMetrics bool TimeoutMargin float64 - MaxRequests int } func New(logger *slog.Logger, metricCollectors *collector.MetricCollectors, options *Options) *MetricsHTTPHandler { @@ -57,7 +56,6 @@ func New(logger *slog.Logger, metricCollectors *collector.MetricCollectors, opti options = &Options{ DisableExporterMetrics: false, TimeoutMargin: 0.5, - MaxRequests: 5, } } @@ -65,7 +63,9 @@ func New(logger *slog.Logger, metricCollectors *collector.MetricCollectors, opti metricCollectors: metricCollectors, logger: logger, options: *options, - concurrencyCh: make(chan struct{}, options.MaxRequests), + + // We are expose metrics directly from the memory region of the Win32 API. We should not allow more than one request at a time. + concurrencyCh: make(chan struct{}, 1), } if !options.DisableExporterMetrics { @@ -131,21 +131,11 @@ func (c *MetricsHTTPHandler) handlerFactory(logger *slog.Logger, scrapeTimeout t if len(requestedCollectors) == 0 { metricCollectors = c.metricCollectors } else { - filteredCollectors := make(collector.Map) - - for _, name := range requestedCollectors { - metricCollector, ok := c.metricCollectors.Collectors[name] - if !ok { - return nil, fmt.Errorf("couldn't find collector %s", name) - } - - filteredCollectors[name] = metricCollector - } + var err error - metricCollectors = &collector.MetricCollectors{ - Collectors: filteredCollectors, - MISession: c.metricCollectors.MISession, - PerfCounterQuery: c.metricCollectors.PerfCounterQuery, + metricCollectors, err = c.metricCollectors.CloneWithCollectors(requestedCollectors) + if err != nil { + return nil, fmt.Errorf("couldn't clone metric collectors: %w", err) } } @@ -162,8 +152,10 @@ func (c *MetricsHTTPHandler) handlerFactory(logger *slog.Logger, scrapeTimeout t promhttp.HandlerOpts{ ErrorLog: slog.NewLogLogger(logger.Handler(), slog.LevelError), ErrorHandling: promhttp.ContinueOnError, - MaxRequestsInFlight: c.options.MaxRequests, + MaxRequestsInFlight: 1, Registry: c.exporterMetricsRegistry, + EnableOpenMetrics: true, + ProcessStartTime: c.metricCollectors.GetStartTime(), }, ) @@ -178,7 +170,9 @@ func (c *MetricsHTTPHandler) handlerFactory(logger *slog.Logger, scrapeTimeout t promhttp.HandlerOpts{ ErrorLog: slog.NewLogLogger(logger.Handler(), slog.LevelError), ErrorHandling: promhttp.ContinueOnError, - MaxRequestsInFlight: c.options.MaxRequests, + MaxRequestsInFlight: 1, + EnableOpenMetrics: true, + ProcessStartTime: c.metricCollectors.GetStartTime(), }, ) } @@ -187,10 +181,6 @@ func (c *MetricsHTTPHandler) handlerFactory(logger *slog.Logger, scrapeTimeout t } func (c *MetricsHTTPHandler) withConcurrencyLimit(next http.HandlerFunc) http.HandlerFunc { - if c.options.MaxRequests <= 0 { - return next - } - return func(w http.ResponseWriter, r *http.Request) { select { case c.concurrencyCh <- struct{}{}: diff --git a/internal/perfdata/collector.go b/internal/perfdata/collector.go index ce6d892e0..f5b3a45ed 100644 --- a/internal/perfdata/collector.go +++ b/internal/perfdata/collector.go @@ -193,6 +193,13 @@ func (c *Collector) Collect() (CounterValues, error) { } func (c *Collector) collectRoutine() { + var ( + itemCount uint32 + bytesNeeded uint32 + ) + + buf := make([]byte, 1) + for range c.collectCh { if ret := PdhCollectQueryData(c.handle); ret != ErrorSuccess { c.counterValuesCh <- nil @@ -207,25 +214,24 @@ func (c *Collector) collectRoutine() { for _, counter := range c.counters { for _, instance := range counter.Instances { // Get the info with the current buffer size - var itemCount uint32 - - // Get the info with the current buffer size - bufLen := uint32(0) + bytesNeeded = uint32(cap(buf)) - ret := PdhGetRawCounterArray(instance, &bufLen, &itemCount, nil) - if ret != PdhMoreData { - return nil, fmt.Errorf("PdhGetRawCounterArray: %w", NewPdhError(ret)) - } + for { + ret := PdhGetRawCounterArray(instance, &bytesNeeded, &itemCount, &buf[0]) - buf := make([]byte, bufLen) + if ret == ErrorSuccess { + break + } - ret = PdhGetRawCounterArray(instance, &bufLen, &itemCount, &buf[0]) - if ret != ErrorSuccess { - if err := NewPdhError(ret); !isKnownCounterDataError(err) { + if err := NewPdhError(ret); ret != PdhMoreData && !isKnownCounterDataError(err) { return nil, fmt.Errorf("PdhGetRawCounterArray: %w", err) } - continue + if bytesNeeded <= uint32(cap(buf)) { + return nil, fmt.Errorf("PdhGetRawCounterArray reports buffer too small (%d), but buffer is large enough (%d): %w", uint32(cap(buf)), bytesNeeded, NewPdhError(ret)) + } + + buf = make([]byte, bytesNeeded) } items := unsafe.Slice((*PdhRawCounterItem)(unsafe.Pointer(&buf[0])), itemCount) diff --git a/internal/perfdata/collector_bench_test.go b/internal/perfdata/collector_bench_test.go index 9b719796d..b16e807b3 100644 --- a/internal/perfdata/collector_bench_test.go +++ b/internal/perfdata/collector_bench_test.go @@ -61,4 +61,6 @@ func BenchmarkTestCollector(b *testing.B) { } performanceData.Close() + + b.ReportAllocs() } diff --git a/pkg/collector/collector.go b/pkg/collector/collector.go index aee367c00..f542380bc 100644 --- a/pkg/collector/collector.go +++ b/pkg/collector/collector.go @@ -19,8 +19,10 @@ import ( "errors" "fmt" "log/slog" + "maps" "slices" "sync" + stdtime "time" "github.com/alecthomas/kingpin/v2" "github.com/prometheus-community/windows_exporter/internal/collector/ad" @@ -145,21 +147,21 @@ func NewWithConfig(config Config) *MetricCollectors { // New To be called by the external libraries for collector initialization. func New(collectors Map) *MetricCollectors { return &MetricCollectors{ - Collectors: collectors, + collectors: collectors, } } // Enable removes all collectors that not enabledCollectors. func (c *MetricCollectors) Enable(enabledCollectors []string) error { for _, name := range enabledCollectors { - if _, ok := c.Collectors[name]; !ok { + if _, ok := c.collectors[name]; !ok { return fmt.Errorf("unknown collector %s", name) } } - for name := range c.Collectors { + for name := range c.collectors { if !slices.Contains(enabledCollectors, name) { - delete(c.Collectors, name) + delete(c.collectors, name) } } @@ -168,22 +170,24 @@ func (c *MetricCollectors) Enable(enabledCollectors []string) error { // Build To be called by the exporter for collector initialization. func (c *MetricCollectors) Build(logger *slog.Logger) error { + c.startTime = stdtime.Now() + err := c.initMI() if err != nil { return fmt.Errorf("error from initialize MI: %w", err) } wg := sync.WaitGroup{} - wg.Add(len(c.Collectors)) + wg.Add(len(c.collectors)) - errCh := make(chan error, len(c.Collectors)) - errs := make([]error, 0, len(c.Collectors)) + errCh := make(chan error, len(c.collectors)) + errs := make([]error, 0, len(c.collectors)) - for _, collector := range c.Collectors { + for _, collector := range c.collectors { go func() { defer wg.Done() - if err = collector.Build(logger, c.MISession); err != nil { + if err = collector.Build(logger, c.miSession); err != nil { errCh <- fmt.Errorf("error build collector %s: %w", collector.GetName(), err) } }() @@ -202,20 +206,20 @@ func (c *MetricCollectors) Build(logger *slog.Logger) error { // Close To be called by the exporter for collector cleanup. func (c *MetricCollectors) Close() error { - errs := make([]error, 0, len(c.Collectors)) + errs := make([]error, 0, len(c.collectors)) - for _, collector := range c.Collectors { + for _, collector := range c.collectors { if err := collector.Close(); err != nil { errs = append(errs, fmt.Errorf("error from close collector %s: %w", collector.GetName(), err)) } } - app, err := c.MISession.GetApplication() + app, err := c.miSession.GetApplication() if err != nil && !errors.Is(err, mi.ErrNotInitialized) { errs = append(errs, fmt.Errorf("error from get MI application: %w", err)) } - if err := c.MISession.Close(); err != nil && !errors.Is(err, mi.ErrNotInitialized) { + if err := c.miSession.Close(); err != nil && !errors.Is(err, mi.ErrNotInitialized) { errs = append(errs, fmt.Errorf("error from close MI session: %w", err)) } @@ -226,7 +230,7 @@ func (c *MetricCollectors) Close() error { return errors.Join(errs...) } -// Close To be called by the exporter for collector cleanup. +// initMI To be called by the exporter for collector initialization. func (c *MetricCollectors) initMI() error { app, err := mi.Application_Initialize() if err != nil { @@ -242,10 +246,29 @@ func (c *MetricCollectors) initMI() error { return fmt.Errorf("error from set locale: %w", err) } - c.MISession, err = app.NewSession(destinationOptions) + c.miSession, err = app.NewSession(destinationOptions) if err != nil { return fmt.Errorf("error from create NewSession: %w", err) } return nil } + +// CloneWithCollectors To be called by the exporter for collector initialization. +func (c *MetricCollectors) CloneWithCollectors(collectors []string) (*MetricCollectors, error) { + metricCollectors := &MetricCollectors{ + collectors: maps.Clone(c.collectors), + miSession: c.miSession, + startTime: c.startTime, + } + + if err := metricCollectors.Enable(collectors); err != nil { + return nil, err + } + + return metricCollectors, nil +} + +func (c *MetricCollectors) GetStartTime() stdtime.Time { + return c.startTime +} diff --git a/pkg/collector/prometheus.go b/pkg/collector/prometheus.go index 7fd53ad2d..a25bbc20c 100644 --- a/pkg/collector/prometheus.go +++ b/pkg/collector/prometheus.go @@ -27,7 +27,7 @@ import ( "github.com/prometheus-community/windows_exporter/internal/mi" "github.com/prometheus-community/windows_exporter/internal/perfdata" - types "github.com/prometheus-community/windows_exporter/internal/types" + "github.com/prometheus-community/windows_exporter/internal/types" "github.com/prometheus/client_golang/prometheus" ) @@ -103,15 +103,15 @@ func (p *Prometheus) Collect(ch chan<- prometheus.Metric) { // WaitGroup to wait for all collectors to finish wg := sync.WaitGroup{} - wg.Add(len(p.metricCollectors.Collectors)) + wg.Add(len(p.metricCollectors.collectors)) // Using a channel to collect the status of each collector // A channel is safe to use concurrently while a map is not - collectorStatusCh := make(chan collectorStatus, len(p.metricCollectors.Collectors)) + collectorStatusCh := make(chan collectorStatus, len(p.metricCollectors.collectors)) // Execute all collectors concurrently // timeout handling is done in the execute function - for name, metricsCollector := range p.metricCollectors.Collectors { + for name, metricsCollector := range p.metricCollectors.collectors { go func(name string, metricsCollector Collector) { defer wg.Done() diff --git a/pkg/collector/types.go b/pkg/collector/types.go index 535ef1f20..3e6d1bacd 100644 --- a/pkg/collector/types.go +++ b/pkg/collector/types.go @@ -17,6 +17,7 @@ package collector import ( "log/slog" + "time" "github.com/alecthomas/kingpin/v2" "github.com/prometheus-community/windows_exporter/internal/mi" @@ -26,9 +27,9 @@ import ( const DefaultCollectors = "cpu,cs,memory,logical_disk,physical_disk,net,os,service,system" type MetricCollectors struct { - Collectors Map - MISession *mi.Session - PerfCounterQuery string + collectors Map + miSession *mi.Session + startTime time.Time } type (