Skip to content

Commit

Permalink
Replace --collectors.enabled with per-collector flags (prometheus#640)
Browse files Browse the repository at this point in the history
* Move NodeCollector into package collector

* Refactor collector enabling

* Update README with new collector enabled flags

* Fix out-of-date inline flag reference syntax

* Use new flags in end-to-end tests

* Add flag to disable all default collectors

* Track if a flag has been set explicitly

* Add --collectors.disable-defaults to README

* Revert disable-defaults flag

* Shorten flags

* Fixup timex collector registration

* Fix end-to-end tests

* Change procfs and sysfs path flags

* Fix review comments
  • Loading branch information
carlpett authored and oblitorum committed Apr 9, 2024
1 parent 38c1485 commit 7444179
Show file tree
Hide file tree
Showing 58 changed files with 409 additions and 389 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ This fork adds HTTP Basic authentication and TLS support using [Percona's shared
There is varying support for collectors on each operating system. The tables
below list all existing collectors and the supported systems.

Which collectors are used is controlled by the `--collectors.enabled` flag.
Collectors are enabled by providing a `--collector.<name>` flag.
Collectors that are enabled by default can be disabled by providing a `--no-collector.<name>` flag.

### Enabled by default

Expand Down Expand Up @@ -164,8 +165,8 @@ docker run -d -p 9100:9100 \
-v "/:/rootfs:ro" \
--net="host" \
quay.io/prometheus/node-exporter \
--collector.procfs /host/proc \
--collector.sysfs /host/sys \
--path.procfs /host/proc \
--path.sysfs /host/sys \
--collector.filesystem.ignored-mount-points "^/(sys|proc|dev|host|etc)($|/)"
```

Expand Down
4 changes: 2 additions & 2 deletions collector/arp_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ type arpCollector struct {
}

func init() {
Factories["arp"] = NewARPCollector
registerCollector("arp", defaultEnabled, NewARPCollector)
}

// NewARPCollector returns a new Collector exposing ARP stats.
func NewARPCollector() (Collector, error) {
return &arpCollector{
entries: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "arp", "entries"),
prometheus.BuildFQName(namespace, "arp", "entries"),
"ARP entries by device",
[]string{"device"}, nil,
),
Expand Down
4 changes: 2 additions & 2 deletions collector/bcache_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
)

func init() {
Factories["bcache"] = NewBcacheCollector
registerCollector("bcache", defaultEnabled, NewBcacheCollector)
}

// A bcacheCollector is a Collector which gathers metrics from Linux bcache.
Expand Down Expand Up @@ -283,7 +283,7 @@ func (c *bcacheCollector) updateBcacheStats(ch chan<- prometheus.Metric, s *bcac
labels := append(devLabel, m.extraLabel...)

desc := prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, m.name),
prometheus.BuildFQName(namespace, subsystem, m.name),
m.desc,
labels,
nil,
Expand Down
6 changes: 3 additions & 3 deletions collector/bonding_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,20 @@ type bondingCollector struct {
}

func init() {
Factories["bonding"] = NewBondingCollector
registerCollector("bonding", defaultDisabled, NewBondingCollector)
}

// NewBondingCollector returns a newly allocated bondingCollector.
// It exposes the number of configured and active slave of linux bonding interfaces.
func NewBondingCollector() (Collector, error) {
return &bondingCollector{
slaves: typedDesc{prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "bonding", "slaves"),
prometheus.BuildFQName(namespace, "bonding", "slaves"),
"Number of configured slaves per bonding interface.",
[]string{"master"}, nil,
), prometheus.GaugeValue},
active: typedDesc{prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "bonding", "active"),
prometheus.BuildFQName(namespace, "bonding", "active"),
"Number of active slaves per bonding interface.",
[]string{"master"}, nil,
), prometheus.GaugeValue},
Expand Down
4 changes: 2 additions & 2 deletions collector/buddyinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ type buddyinfoCollector struct {
}

func init() {
Factories["buddyinfo"] = NewBuddyinfoCollector
registerCollector("buddyinfo", defaultDisabled, NewBuddyinfoCollector)
}

// NewBuddyinfoCollector returns a new Collector exposing buddyinfo stats.
func NewBuddyinfoCollector() (Collector, error) {
desc := prometheus.NewDesc(
prometheus.BuildFQName(Namespace, buddyInfoSubsystem, "count"),
prometheus.BuildFQName(namespace, buddyInfoSubsystem, "count"),
"Count of free blocks according to size.",
[]string{"node", "zone", "size"}, nil,
)
Expand Down
107 changes: 104 additions & 3 deletions collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,121 @@
package collector

import (
"fmt"
"sync"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"gopkg.in/alecthomas/kingpin.v2"
)

// Namespace defines the common namespace to be used by all metrics.
const Namespace = "node"
const namespace = "node"

// Factories contains the list of all available collectors.
var Factories = make(map[string]func() (Collector, error))
var (
scrapeDurationDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "scrape", "collector_duration_seconds"),
"node_exporter: Duration of a collector scrape.",
[]string{"collector"},
nil,
)
scrapeSuccessDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "scrape", "collector_success"),
"node_exporter: Whether a collector succeeded.",
[]string{"collector"},
nil,
)
)

func warnDeprecated(collector string) {
log.Warnf("The %s collector is deprecated and will be removed in the future!", collector)
}

const (
defaultEnabled = true
defaultDisabled = false
)

var (
factories = make(map[string]func() (Collector, error))
collectorState = make(map[string]*bool)
)

func registerCollector(collector string, isDefaultEnabled bool, factory func() (Collector, error)) {
var helpDefaultState string
if isDefaultEnabled {
helpDefaultState = "enabled"
} else {
helpDefaultState = "disabled"
}

flagName := fmt.Sprintf("collector.%s", collector)
flagHelp := fmt.Sprintf("Enable the %s collector (default: %s).", collector, helpDefaultState)
defaultValue := fmt.Sprintf("%v", isDefaultEnabled)

flag := kingpin.Flag(flagName, flagHelp).Default(defaultValue).Bool()
collectorState[collector] = flag

factories[collector] = factory
}

// NodeCollector implements the prometheus.Collector interface.
type nodeCollector struct {
Collectors map[string]Collector
}

// NewNodeCollector creates a new NodeCollector
func NewNodeCollector() (*nodeCollector, error) {
collectors := make(map[string]Collector)
for key, enabled := range collectorState {
if *enabled {
collector, err := factories[key]()
if err != nil {
return nil, err
}
collectors[key] = collector
}
}
return &nodeCollector{Collectors: collectors}, nil
}

// Describe implements the prometheus.Collector interface.
func (n nodeCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- scrapeDurationDesc
ch <- scrapeSuccessDesc
}

// Collect implements the prometheus.Collector interface.
func (n nodeCollector) Collect(ch chan<- prometheus.Metric) {
wg := sync.WaitGroup{}
wg.Add(len(n.Collectors))
for name, c := range n.Collectors {
go func(name string, c Collector) {
execute(name, c, ch)
wg.Done()
}(name, c)
}
wg.Wait()
}

func execute(name string, c Collector, ch chan<- prometheus.Metric) {
begin := time.Now()
err := c.Update(ch)
duration := time.Since(begin)
var success float64

if err != nil {
log.Errorf("ERROR: %s collector failed after %fs: %s", name, duration.Seconds(), err)
success = 0
} else {
log.Debugf("OK: %s collector succeeded after %fs.", name, duration.Seconds())
success = 1
}
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name)
ch <- prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, name)
}

// Collector is the interface a collector has to implement.
type Collector interface {
// Get new metrics and expose them via prometheus registry.
Expand Down
6 changes: 3 additions & 3 deletions collector/conntrack_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,19 @@ type conntrackCollector struct {
}

func init() {
Factories["conntrack"] = NewConntrackCollector
registerCollector("conntrack", defaultEnabled, NewConntrackCollector)
}

// NewConntrackCollector returns a new Collector exposing conntrack stats.
func NewConntrackCollector() (Collector, error) {
return &conntrackCollector{
current: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "", "nf_conntrack_entries"),
prometheus.BuildFQName(namespace, "", "nf_conntrack_entries"),
"Number of currently allocated flow entries for connection tracking.",
nil, nil,
),
limit: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "", "nf_conntrack_entries_limit"),
prometheus.BuildFQName(namespace, "", "nf_conntrack_entries_limit"),
"Maximum size of connection tracking table.",
nil, nil,
),
Expand Down
4 changes: 2 additions & 2 deletions collector/cpu_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ type statCollector struct {
}

func init() {
Factories["cpu"] = NewCPUCollector
registerCollector("cpu", defaultEnabled, NewCPUCollector)
}

// NewCPUCollector returns a new Collector exposing CPU stats.
func NewCPUCollector() (Collector, error) {
return &statCollector{
cpu: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "", "cpu"),
prometheus.BuildFQName(namespace, "", "cpu"),
"Seconds the cpus spent in each mode.",
[]string{"cpu", "mode"}, nil,
),
Expand Down
4 changes: 2 additions & 2 deletions collector/cpu_dragonfly.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,14 @@ type statCollector struct {
}

func init() {
Factories["cpu"] = NewStatCollector
registerCollector("cpu", defaultEnabled, NewStatCollector)
}

// NewStatCollector returns a new Collector exposing CPU stats.
func NewStatCollector() (Collector, error) {
return &statCollector{
cpu: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "", "cpu"),
prometheus.BuildFQName(namespace, "", "cpu"),
"Seconds the cpus spent in each mode.",
[]string{"cpu", "mode"}, nil,
),
Expand Down
6 changes: 3 additions & 3 deletions collector/cpu_freebsd.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,19 +86,19 @@ type statCollector struct {
}

func init() {
Factories["cpu"] = NewStatCollector
registerCollector("cpu", defaultEnabled, NewStatCollector)
}

// NewStatCollector returns a new Collector exposing CPU stats.
func NewStatCollector() (Collector, error) {
return &statCollector{
cpu: typedDesc{prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "cpu", "seconds_total"),
prometheus.BuildFQName(namespace, "cpu", "seconds_total"),
"Seconds the CPU spent in each mode.",
[]string{"cpu", "mode"}, nil,
), prometheus.CounterValue},
temp: typedDesc{prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "cpu", "temperature_celsius"),
prometheus.BuildFQName(namespace, "cpu", "temperature_celsius"),
"CPU temperature",
[]string{"cpu"}, nil,
), prometheus.GaugeValue},
Expand Down
16 changes: 8 additions & 8 deletions collector/cpu_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
)

const (
cpuCollectorNamespace = "cpu"
cpuCollectorSubsystem = "cpu"
)

var (
Expand All @@ -46,40 +46,40 @@ type cpuCollector struct {
}

func init() {
Factories["cpu"] = NewCPUCollector
registerCollector("cpu", defaultEnabled, NewCPUCollector)
}

// NewCPUCollector returns a new Collector exposing kernel/system statistics.
func NewCPUCollector() (Collector, error) {
return &cpuCollector{
cpu: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "", cpuCollectorNamespace),
prometheus.BuildFQName(namespace, "", cpuCollectorSubsystem),
"Seconds the cpus spent in each mode.",
[]string{"cpu", "mode"}, nil,
),
cpuFreq: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "frequency_hertz"),
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_hertz"),
"Current cpu thread frequency in hertz.",
[]string{"cpu"}, nil,
),
cpuFreqMin: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "frequency_min_hertz"),
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_min_hertz"),
"Minimum cpu thread frequency in hertz.",
[]string{"cpu"}, nil,
),
cpuFreqMax: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "frequency_max_hertz"),
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_max_hertz"),
"Maximum cpu thread frequency in hertz.",
[]string{"cpu"}, nil,
),
// FIXME: This should be a per core metric, not per cpu!
cpuCoreThrottle: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "core_throttles_total"),
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "core_throttles_total"),
"Number of times this cpu core has been throttled.",
[]string{"cpu"}, nil,
),
cpuPackageThrottle: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "package_throttles_total"),
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "package_throttles_total"),
"Number of times this cpu package has been throttled.",
[]string{"node"}, nil,
),
Expand Down
8 changes: 4 additions & 4 deletions collector/devstat_dragonfly.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,24 +99,24 @@ type devstatCollector struct {
}

func init() {
Factories["devstat"] = NewDevstatCollector
registerCollector("devstat", defaultDisabled, NewDevstatCollector)
}

// NewDevstatCollector returns a new Collector exposing Device stats.
func NewDevstatCollector() (Collector, error) {
return &devstatCollector{
bytesDesc: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, devstatSubsystem, "bytes_total"),
prometheus.BuildFQName(namespace, devstatSubsystem, "bytes_total"),
"The total number of bytes transferred for reads and writes on the device.",
[]string{"device"}, nil,
),
transfersDesc: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, devstatSubsystem, "transfers_total"),
prometheus.BuildFQName(namespace, devstatSubsystem, "transfers_total"),
"The total number of transactions completed.",
[]string{"device"}, nil,
),
blocksDesc: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, devstatSubsystem, "blocks_total"),
prometheus.BuildFQName(namespace, devstatSubsystem, "blocks_total"),
"The total number of bytes given in terms of the devices blocksize.",
[]string{"device"}, nil,
),
Expand Down
Loading

0 comments on commit 7444179

Please sign in to comment.