Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose cpu bugs and flags as info metrics. #1788

Merged
merged 5 commits into from
Jul 17, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 96 additions & 2 deletions collector/cpu_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package collector
import (
"fmt"
"path/filepath"
"regexp"
"strconv"
"sync"

Expand All @@ -32,16 +33,23 @@ type cpuCollector struct {
fs procfs.FS
cpu *prometheus.Desc
cpuInfo *prometheus.Desc
cpuFlagsInfo *prometheus.Desc
cpuBugsInfo *prometheus.Desc
cpuGuest *prometheus.Desc
cpuCoreThrottle *prometheus.Desc
cpuPackageThrottle *prometheus.Desc
logger log.Logger
cpuStats []procfs.CPUStat
cpuStatsMutex sync.Mutex

cpuFlagsIncludeRegexp *regexp.Regexp
cpuBugsIncludeRegexp *regexp.Regexp
}

var (
enableCPUInfo = kingpin.Flag("collector.cpu.info", "Enables metric cpu_info").Bool()
flagsInclude = kingpin.Flag("collector.cpu.info.flags-include", "Filter the `flags` field in cpuInfo with a value that must be a regular expression").String()
bugsInclude = kingpin.Flag("collector.cpu.info.bugs-include", "Filter the `bugs` field in cpuInfo with a value that must be a regular expression").String()
)

func init() {
Expand All @@ -54,14 +62,24 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
if err != nil {
return nil, fmt.Errorf("failed to open procfs: %w", err)
}
return &cpuCollector{
c := &cpuCollector{
fs: fs,
cpu: nodeCPUSecondsDesc,
cpuInfo: prometheus.NewDesc(
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "info"),
"CPU information from /proc/cpuinfo.",
[]string{"package", "core", "cpu", "vendor", "family", "model", "model_name", "microcode", "stepping", "cachesize"}, nil,
),
cpuFlagsInfo: prometheus.NewDesc(
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "flag_info"),
"The `flags` field of CPU information from /proc/cpuinfo.",
[]string{"flag"}, nil,
),
cpuBugsInfo: prometheus.NewDesc(
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "bug_info"),
"The `bugs` field of CPU information from /proc/cpuinfo.",
[]string{"bug"}, nil,
),
cpuGuest: prometheus.NewDesc(
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"),
"Seconds the cpus spent in guests (VMs) for each mode.",
Expand All @@ -78,7 +96,36 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
[]string{"package"}, nil,
),
logger: logger,
}, nil
}
err = c.compileIncludeFlags(flagsInclude, bugsInclude)
if err != nil {
return nil, fmt.Errorf("fail to compile --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include, the values of them must be regular expressions: %w", err)
}
return c, nil
}

func (c *cpuCollector) compileIncludeFlags(flagsIncludeFlag, bugsIncludeFlag *string) error {
if !*enableCPUInfo {
if flagsIncludeFlag != nil || bugsIncludeFlag != nil {
level.Info(c.logger).Log("msg", "--collector.cpu.info.flags-include and --collector.cpu.info.bugs-include will not take effect until --collector.cpu.info is set to true")
}
return nil
}
var err error

if flagsIncludeFlag != nil {
c.cpuFlagsIncludeRegexp, err = regexp.Compile(*flagsIncludeFlag)
if err != nil {
return err
}
}
if bugsIncludeFlag != nil {
c.cpuBugsIncludeRegexp, err = regexp.Compile(*bugsIncludeFlag)
if err != nil {
return err
}
}
return nil
}

// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/.
Expand Down Expand Up @@ -117,6 +164,43 @@ func (c *cpuCollector) updateInfo(ch chan<- prometheus.Metric) error {
cpu.Microcode,
cpu.Stepping,
cpu.CacheSize)

if err := c.updateFlagInfo(cpu, ch); err != nil {
domechn marked this conversation as resolved.
Show resolved Hide resolved
return err
}
if err := c.updateBugInfo(cpu, ch); err != nil {
domechn marked this conversation as resolved.
Show resolved Hide resolved
return err
}
}
return nil
}

// updateFlagInfo reads the flags field from /proc/cpuinfo, and filters them through input regular expressions
func (c *cpuCollector) updateFlagInfo(cpu procfs.CPUInfo, ch chan<- prometheus.Metric) error {
if c.cpuFlagsIncludeRegexp == nil {
return nil
}
for _, flag := range filterByRegexp(cpu.Flags, c.cpuFlagsIncludeRegexp) {
ch <- prometheus.MustNewConstMetric(c.cpuFlagsInfo,
prometheus.GaugeValue,
1,
flag,
)
}
return nil
}

// updateFlagInfo reads the bugs field from /proc/cpuinfo, and filters them through input regular expressions
domechn marked this conversation as resolved.
Show resolved Hide resolved
func (c *cpuCollector) updateBugInfo(cpu procfs.CPUInfo, ch chan<- prometheus.Metric) error {
if c.cpuBugsIncludeRegexp == nil {
return nil
}
for _, bug := range filterByRegexp(cpu.Bugs, c.cpuBugsIncludeRegexp) {
ch <- prometheus.MustNewConstMetric(c.cpuBugsInfo,
prometheus.GaugeValue,
1,
bug,
)
}
return nil
}
Expand Down Expand Up @@ -304,3 +388,13 @@ func (c *cpuCollector) updateCPUStats(newStats []procfs.CPUStat) {
}
}
}

func filterByRegexp(list []string, reg *regexp.Regexp) []string {
var res []string
for _, s := range list {
if reg.MatchString(s) {
res = append(res, s)
}
}
return res
}
12 changes: 12 additions & 0 deletions collector/fixtures/e2e-64k-page-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,18 @@ node_cpu_info{cachesize="8192 KB",core="2",cpu="2",family="6",microcode="0xb4",m
node_cpu_info{cachesize="8192 KB",core="2",cpu="6",family="6",microcode="0xb4",model="142",model_name="Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz",package="0",stepping="10",vendor="GenuineIntel"} 1
node_cpu_info{cachesize="8192 KB",core="3",cpu="3",family="6",microcode="0xb4",model="142",model_name="Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz",package="0",stepping="10",vendor="GenuineIntel"} 1
node_cpu_info{cachesize="8192 KB",core="3",cpu="7",family="6",microcode="0xb4",model="142",model_name="Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz",package="0",stepping="10",vendor="GenuineIntel"} 1
# HELP node_cpu_flag_info The `flags` field of CPU information from /proc/cpuinfo.
# TYPE node_cpu_flag_info gauge
node_cpu_flag_info{flag="aes"} 1
node_cpu_flag_info{flag="avx"} 1
node_cpu_flag_info{flag="avx2"} 1
node_cpu_flag_info{flag="constant_tsc"} 1
# HELP node_cpu_bug_info The `bugs` field of CPU information from /proc/cpuinfo.
# TYPE node_cpu_bug_info gauge
node_cpu_bug_info{bug="cpu_meltdown"} 1
node_cpu_bug_info{bug="spectre_v1"} 1
node_cpu_bug_info{bug="spectre_v2"} 1
node_cpu_bug_info{bug="mds"} 1
# HELP node_cpu_package_throttles_total Number of times this cpu package has been throttled.
# TYPE node_cpu_package_throttles_total counter
node_cpu_package_throttles_total{package="0"} 30
Expand Down
12 changes: 12 additions & 0 deletions collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,18 @@ node_cpu_info{cachesize="8192 KB",core="2",cpu="2",family="6",microcode="0xb4",m
node_cpu_info{cachesize="8192 KB",core="2",cpu="6",family="6",microcode="0xb4",model="142",model_name="Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz",package="0",stepping="10",vendor="GenuineIntel"} 1
node_cpu_info{cachesize="8192 KB",core="3",cpu="3",family="6",microcode="0xb4",model="142",model_name="Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz",package="0",stepping="10",vendor="GenuineIntel"} 1
node_cpu_info{cachesize="8192 KB",core="3",cpu="7",family="6",microcode="0xb4",model="142",model_name="Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz",package="0",stepping="10",vendor="GenuineIntel"} 1
# HELP node_cpu_flag_info The `flags` field of CPU information from /proc/cpuinfo.
# TYPE node_cpu_flag_info gauge
node_cpu_flag_info{flag="aes"} 1
node_cpu_flag_info{flag="avx"} 1
node_cpu_flag_info{flag="avx2"} 1
node_cpu_flag_info{flag="constant_tsc"} 1
# HELP node_cpu_bug_info The `bugs` field of CPU information from /proc/cpuinfo.
# TYPE node_cpu_bug_info gauge
node_cpu_bug_info{bug="cpu_meltdown"} 1
node_cpu_bug_info{bug="spectre_v1"} 1
node_cpu_bug_info{bug="spectre_v2"} 1
node_cpu_bug_info{bug="mds"} 1
# HELP node_cpu_package_throttles_total Number of times this cpu package has been throttled.
# TYPE node_cpu_package_throttles_total counter
node_cpu_package_throttles_total{package="0"} 30
Expand Down
2 changes: 2 additions & 0 deletions end-to-end-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ fi
--collector.qdisc.fixtures="collector/fixtures/qdisc/" \
--collector.netclass.ignored-devices="(bond0|dmz|int)" \
--collector.cpu.info \
--collector.cpu.info.flags-include="^(aes|avx.?|constant_tsc)$" \
--collector.cpu.info.bugs-include="^(cpu_meltdown|spectre_.*|mds)$" \
--web.listen-address "127.0.0.1:${port}" \
--log.level="debug" > "${tmpdir}/node_exporter.log" 2>&1 &

Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.