diff --git a/docs/LICENSE_OF_DEPENDENCIES.md b/docs/LICENSE_OF_DEPENDENCIES.md index 254e0c4926dfc..be2a86206727c 100644 --- a/docs/LICENSE_OF_DEPENDENCIES.md +++ b/docs/LICENSE_OF_DEPENDENCIES.md @@ -201,6 +201,7 @@ following works: - github.com/influxdata/toml [MIT License](https://github.com/influxdata/toml/blob/master/LICENSE) - github.com/influxdata/wlog [MIT License](https://github.com/influxdata/wlog/blob/master/LICENSE) - github.com/intel/iaevents [Apache License 2.0](https://github.com/intel/iaevents/blob/main/LICENSE) +- github.com/intel/powertelemetry [Apache License 2.0](https://github.com/intel/powertelemetry/blob/main/LICENSE) - github.com/jackc/chunkreader [MIT License](https://github.com/jackc/chunkreader/blob/master/LICENSE) - github.com/jackc/pgconn [MIT License](https://github.com/jackc/pgconn/blob/master/LICENSE) - github.com/jackc/pgio [MIT License](https://github.com/jackc/pgio/blob/master/LICENSE) @@ -220,6 +221,7 @@ following works: - github.com/jeremywohl/flatten [MIT License](https://github.com/jeremywohl/flatten/blob/master/LICENSE) - github.com/jhump/protoreflect [Apache License 2.0](https://github.com/jhump/protoreflect/blob/master/LICENSE) - github.com/jmespath/go-jmespath [Apache License 2.0](https://github.com/jmespath/go-jmespath/blob/master/LICENSE) +- github.com/jmhodges/clock [MIT Licence](https://github.com/jmhodges/clock/blob/main/LICENSE) - github.com/josharian/intern [MIT License](https://github.com/josharian/intern/blob/master/LICENSE.md) - github.com/josharian/native [MIT License](https://github.com/josharian/native/blob/main/license) - github.com/jpillora/backoff [MIT License](https://github.com/jpillora/backoff/blob/master/LICENSE) diff --git a/go.mod b/go.mod index f8ed6451d7800..568c1fac8df98 100644 --- a/go.mod +++ b/go.mod @@ -112,6 +112,7 @@ require ( github.com/influxdata/toml v0.0.0-20190415235208-270119a8ce65 github.com/influxdata/wlog v0.0.0-20160411224016-7c63b0a71ef8 github.com/intel/iaevents v1.1.0 + github.com/intel/powertelemetry v1.0.0 github.com/jackc/pgconn v1.14.1 github.com/jackc/pgio v1.0.0 github.com/jackc/pgtype v1.14.0 @@ -363,6 +364,7 @@ require ( github.com/jcmturner/gofork v1.7.6 // indirect github.com/jcmturner/gokrb5/v8 v8.4.4 // indirect github.com/jcmturner/rpc/v2 v2.0.3 // indirect + github.com/jmhodges/clock v1.2.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/josharian/native v1.1.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect @@ -374,7 +376,7 @@ require ( github.com/kr/fs v0.1.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/leodido/ragel-machinery v0.0.0-20181214104525-299bdde78165 // indirect - github.com/lufia/plan9stats v0.0.0-20220913051719-115f729f3c8c // indirect + github.com/lufia/plan9stats v0.0.0-20230110061619-bbe2e5e100de // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect @@ -418,7 +420,7 @@ require ( github.com/pkg/sftp v1.13.5 // indirect github.com/pkg/xattr v0.4.9 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/power-devops/perfstat v0.0.0-20220216144756-c35f1ee13d7c // indirect + github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b // indirect github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/robertkrimen/otto v0.0.0-20191219234010-c382bd3c16ff // indirect diff --git a/go.sum b/go.sum index aebe448f8c585..ec01e7b6d8de4 100644 --- a/go.sum +++ b/go.sum @@ -1535,6 +1535,8 @@ github.com/influxdata/wlog v0.0.0-20160411224016-7c63b0a71ef8 h1:W2IgzRCb0L9VzMu github.com/influxdata/wlog v0.0.0-20160411224016-7c63b0a71ef8/go.mod h1:/2NMgWB1DHM1ti/gqhOlg+LJeBVk6FqR5aVGYY0hlwI= github.com/intel/iaevents v1.1.0 h1:FzxMBfXk/apG2EUXUCfaq3gUQ+q+TgZ1HNMjjUILUGE= github.com/intel/iaevents v1.1.0/go.mod h1:CyUUzXw0lHRCsmyyF7Pwco9Y7NiTNQUUlcJ7RJAazKs= +github.com/intel/powertelemetry v1.0.0 h1:9MP7OjNSqPPok1GCMRcVvToAcIJ4HvuNgt9rq7shnfk= +github.com/intel/powertelemetry v1.0.0/go.mod h1:0/EKcFml0Imic4Mva8QzsZhT/L0nc3Y+MbT9IU0y1FA= github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo= github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk= github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8= @@ -1610,6 +1612,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/jmhodges/clock v1.2.0 h1:eq4kys+NI0PLngzaHEe7AmPT90XMGIEySD1JfV1PDIs= +github.com/jmhodges/clock v1.2.0/go.mod h1:qKjhA7x7u/lQpPB1XAqX1b1lCI/w3/fNuYpI/ZjLynI= github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc= github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= @@ -1721,8 +1725,8 @@ github.com/loov/hrtime v1.0.3/go.mod h1:yDY3Pwv2izeY4sq7YcPX/dtLwzg5NU1AxWuWxKwd github.com/loov/hrtime/hrplot v1.0.2/go.mod h1:9t65xYn4d42ntjv40Wt5lbU72/VC5S0zGDgjC8kD5BU= github.com/loov/plot v0.0.0-20200413101321-e09a6f01d2f5/go.mod h1:gSrhfSMoiPGG0CZ9E66kXjaHxFw0fzJhooyicOnz5z4= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= -github.com/lufia/plan9stats v0.0.0-20220913051719-115f729f3c8c h1:VtwQ41oftZwlMnOEbMWQtSEUgU64U4s+GHk7hZK+jtY= -github.com/lufia/plan9stats v0.0.0-20220913051719-115f729f3c8c/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= +github.com/lufia/plan9stats v0.0.0-20230110061619-bbe2e5e100de h1:V53FWzU6KAZVi1tPp5UIsMoUWJ2/PNwYIDXnu7QuBCE= +github.com/lufia/plan9stats v0.0.0-20230110061619-bbe2e5e100de/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= github.com/lxc/lxd v0.0.0-20220920163450-e9b4b514106a h1:VCh69Giyzh/1qPZHC62ysQvFGI93vEQkFNo7iApvlzM= github.com/lxc/lxd v0.0.0-20220920163450-e9b4b514106a/go.mod h1:Y+Ny8KSylQRtfyOxVN0Cha/jAfd2l1AlHiDulGP+GQk= github.com/lyft/protoc-gen-star v0.6.0/go.mod h1:TGAoBVkt8w7MPG72TrKIu85MIdXwDuzJYeZuUPFPNwA= @@ -2003,8 +2007,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= -github.com/power-devops/perfstat v0.0.0-20220216144756-c35f1ee13d7c h1:NRoLoZvkBTKvR5gQLgA3e0hqjkY9u1wm+iOL45VN/qI= -github.com/power-devops/perfstat v0.0.0-20220216144756-c35f1ee13d7c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b h1:0LFwY6Q3gMACTjAbMZBjXAqTOzOwFaj2Ld6cjeQ7Rig= +github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus-community/pro-bing v0.3.0 h1:SFT6gHqXwbItEDJhTkzPWVqU6CLEtqEfNAPp47RUON4= github.com/prometheus-community/pro-bing v0.3.0/go.mod h1:p9dLb9zdmv+eLxWfCT6jESWuDrS+YzpPkQBgysQF8a0= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= diff --git a/plugins/inputs/intel_powerstat/README.md b/plugins/inputs/intel_powerstat/README.md index 76f3400278818..639b113e85d98 100644 --- a/plugins/inputs/intel_powerstat/README.md +++ b/plugins/inputs/intel_powerstat/README.md @@ -27,15 +27,15 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. # Intel PowerStat plugin enables monitoring of platform metrics (power, TDP) # and per-CPU metrics like temperature, power and utilization. Please see the # plugin readme for details on software and hardware compatability. -# This plugin ONLY supports Linux +# This plugin ONLY supports Linux. [[inputs.intel_powerstat]] ## The user can choose which package metrics are monitored by the plugin with ## the package_metrics setting: ## - The default, will collect "current_power_consumption", - ## "current_dram_power_consumption" and "thermal_design_power" - ## - Leaving this setting empty means no package metrics will be collected + ## "current_dram_power_consumption" and "thermal_design_power". + ## - Leaving this setting empty means no package metrics will be collected. ## - Finally, a user can specify individual metrics to capture from the - ## supported options list + ## supported options list. ## Supported options: ## "current_power_consumption", "current_dram_power_consumption", ## "thermal_design_power", "max_turbo_frequency", "uncore_frequency", @@ -48,13 +48,49 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. ## by the plugin. ## Supported options: ## "cpu_frequency", "cpu_c0_state_residency", "cpu_c1_state_residency", - ## "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature", - ## "cpu_busy_frequency" - ## ATTENTION: cpu_busy_cycles is DEPRECATED - use cpu_c0_state_residency + ## "cpu_c3_state_residency", "cpu_c6_state_residency", "cpu_c7_state_residency", + ## "cpu_temperature", "cpu_busy_frequency", "cpu_c0_substate_c01", + ## "cpu_c0_substate_c02", "cpu_c0_substate_c0_wait" # cpu_metrics = [] + + ## Optionally the user can choose for which CPUs metrics configured in cpu_metrics array should be gathered. + ## Can't be combined with excluded_cpus. + ## Empty or missing array means CPU metrics are gathered for all CPUs. + ## e.g. ["0-3", "4,5,6"] or ["1-3,4"] + # included_cpus = [] + + ## Optionally the user can choose which CPUs should be excluded from gathering metrics configured in cpu_metrics array. + ## Can't be combined with included_cpus. + ## Empty or missing array means CPU metrics are gathered for all CPUs. + ## e.g. ["0-3", "4,5,6"] or ["1-3,4"] + # excluded_cpus = [] + + ## Filesystem location of JSON file that contains PMU event definitions. + ## Mandatory only for perf-related metrics (cpu_c0_substate_c01, cpu_c0_substate_c02, cpu_c0_substate_c0_wait). + # event_definitions = "" + + ## The user can set the timeout duration for MSR reading. + ## Enabling this timeout can be useful in situations where, on heavily loaded systems, + ## the code waits too long for a kernel response to MSR read requests. + ## 0 disables the timeout (default). + # msr_read_timeout = "0ms" ``` -## Example: Configuration with no per-CPU telemetry +### Configuration notes + +1. The configuration of `included_cpus` or `excluded_cpus` may affect the ability to collect `package_metrics`. + Some of them (`max_turbo_frequency`, `cpu_base_frequency`, and `uncore_frequency`) need to read data + from exactly one processor for each package. If `included_cpus` or `excluded_cpus` exclude all processors + from the package, reading the mentioned metrics for that package will not be possible. +2. `event_definitions` JSON file for specific architecture can be found at [perfmon](https://github.com/intel/perfmon). + A script to download the event definition that is appropriate for current environment (`event_download.py`) is + available at [pmu-tools](https://github.com/andikleen/pmu-tools). + For perf-related metrics supported by this plugin, an event definition JSON file + with events for the `core` is required. + + For example: `sapphirerapids_core.json` or `GenuineIntel-6-8F-core.json`. + +### Example: Configuration with no per-CPU telemetry This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected: @@ -64,7 +100,7 @@ no per-CPU metrics are collected: cpu_metrics = [] ``` -## Example: Configuration with no per-CPU telemetry - equivalent case +### Example: Configuration with no per-CPU telemetry - equivalent case This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected: @@ -73,268 +109,276 @@ no per-CPU metrics are collected: [[inputs.intel_powerstat]] ``` -## Example: Configuration for CPU Temperature and CPU Frequency +### Example: Configuration for CPU Temperature and CPU Frequency This configuration allows getting default processor package specific metrics, -plus subset of per-CPU metrics (CPU Temperature and CPU Frequency): +plus subset of per-CPU metrics (CPU Temperature and CPU Frequency) which will be +gathered only for `cpu_id = 0`: ```toml [[inputs.intel_powerstat]] cpu_metrics = ["cpu_frequency", "cpu_temperature"] + included_cpus = ["0"] ``` -## Example: Configuration for CPU Temperature and CPU Frequency without default package metrics +### Example: Configuration for CPU Temperature and CPU Frequency without default package metrics -This configuration allows getting only a subset of per-CPU metrics (CPU -Temperature and CPU Frequency): +This configuration allows getting only a subset of per-CPU metrics +(CPU Temperature and CPU Frequency) which will be gathered for +all `cpus` except `cpu_id = ["1-3"]`: ```toml [[inputs.intel_powerstat]] package_metrics = [] cpu_metrics = ["cpu_frequency", "cpu_temperature"] + excluded_cpus = ["1-3"] ``` -## Example: Configuration with all available metrics +### Example: Configuration with all available metrics This configuration allows getting all processor package specific metrics and all per-CPU metrics: ```toml [[inputs.intel_powerstat]] - package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency", "uncore_frequency"] - cpu_metrics = ["cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency"] + package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency", "uncore_frequency", "cpu_base_frequency"] + cpu_metrics = ["cpu_frequency", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c3_state_residency", "cpu_c6_state_residency", "cpu_c7_state_residency", "cpu_temperature", "cpu_busy_frequency", "cpu_c0_substate_c01", "cpu_c0_substate_c02", "cpu_c0_substate_c0_wait"] + event_definitions = "/home/telegraf/.cache/pmu-events/GenuineIntel-6-8F-core.json" ``` ## SW Dependencies -Plugin is based on Linux Kernel modules that expose specific metrics over +### Kernel modules + +Plugin is mostly based on Linux Kernel modules that expose specific metrics over `sysfs` or `devfs` interfaces. The following dependencies are expected by plugin: -- _intel-rapl_ module which exposes Intel Runtime Power Limiting metrics over +- `intel-rapl` kernel module which exposes Intel Runtime Power Limiting metrics over `sysfs` (`/sys/devices/virtual/powercap/intel-rapl`), -- _msr_ kernel module that provides access to processor model specific +- `msr` kernel module that provides access to processor model specific registers over `devfs` (`/dev/cpu/cpu%d/msr`), -- _cpufreq_ kernel module - which exposes per-CPU Frequency over `sysfs` - (`/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq`). -- _intel-uncore-frequency_ module exposes Intel uncore frequency metrics - over `sysfs` (`/sys/devices/system/cpu/intel_uncore_frequency`), - -Minimum kernel version required is 3.13 to satisfy most of requirements, -for `uncore_frequency` metrics `intel-uncore-frequency` module is required -(available since kernel 5.6). +- `cpufreq` kernel module - which exposes per-CPU Frequency over `sysfs` + (`/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq`), +- `intel-uncore-frequency` kernel module exposes Intel uncore frequency metrics + over `sysfs` (`/sys/devices/system/cpu/intel_uncore_frequency`). -Please make sure that kernel modules are loaded and running (cpufreq is -integrated in kernel). Modules might have to be manually enabled by using -`modprobe`. Depending on the kernel version, run commands: +Make sure that required kernel modules are loaded and running. +Modules might have to be manually enabled by using `modprobe`. +Depending on the kernel version, run commands: ```sh -# kernel 5.x.x: +# rapl modules: +## kernel < 4.0 +sudo modprobe intel_rapl +## kernel >= 4.0 sudo modprobe rapl -sudo modprobe msr sudo modprobe intel_rapl_common sudo modprobe intel_rapl_msr -# also for kernel >= 5.6.0 -sudo modprobe intel-uncore-frequency - -# kernel 4.x.x: +# msr module: sudo modprobe msr -sudo modprobe intel_rapl -``` -**Telegraf with Intel PowerStat plugin enabled may require root access to read -model specific registers (MSRs)** to retrieve data for calculation of most -critical per-CPU specific metrics: +# cpufreq module: +### integrated in kernel + +# intel-uncore-frequency module: +## only for kernel >= 5.6.0 +sudo modprobe intel-uncore-frequency +``` -- `cpu_busy_frequency_mhz` -- `cpu_temperature_celsius` -- `cpu_c0_state_residency_percent` -- `cpu_c1_state_residency_percent` -- `cpu_c6_state_residency_percent` +### Kernel's perf interface -and to retrieve data for calculation per-package specific metric: +For perf-related metrics, when Telegraf is not running as root, +the following capability should be added to the Telegraf executable: -- `max_turbo_frequency_mhz` -- `uncore_frequency_mhz_cur` -- `cpu_base_frequency_mhz` +```sh +sudo setcap cap_sys_admin+ep +``` -To expose other Intel PowerStat metrics root access may or may not be required +Alternatively, `/proc/sys/kernel/perf_event_paranoid` has to be set to +value less than 1. + +Depending on environment and configuration (number of monitored CPUs +and number of enabled metrics), it might be required to increase +the limit on the number of open file descriptors allowed. +This can be done for example by using `ulimit -n` command. + +### Dependencies of metrics on system configuration + +Details of these dependencies are discussed above: + +| Configuration option | Type | Dependency | +|-------------------------------------------------------------------------------------|-------------------|------------------------------------------------| +| `current_power_consumption` | `package_metrics` | `rapl` kernel module(s) | +| `current_dram_power_consumption` | `package_metrics` | `rapl` kernel module(s) | +| `thermal_design_power` | `package_metrics` | `rapl` kernel module(s) | +| `max_turbo_frequency` | `package_metrics` | `msr` kernel module | +| `uncore_frequency` | `package_metrics` | `intel-uncore-frequency`/`msr` kernel modules* | +| `cpu_base_frequency` | `package_metrics` | `msr` kernel module | +| `cpu_frequency` | `cpu_metrics` | `cpufreq` kernel module | +| `cpu_c0_state_residency` | `cpu_metrics` | `msr` kernel module | +| `cpu_c1_state_residency` | `cpu_metrics` | `msr` kernel module | +| `cpu_c3_state_residency` | `cpu_metrics` | `msr` kernel module | +| `cpu_c6_state_residency` | `cpu_metrics` | `msr` kernel module | +| `cpu_c7_state_residency` | `cpu_metrics` | `msr` kernel module | +| `cpu_busy_cycles` (**DEPRECATED** - superseded by `cpu_c0_state_residency_percent`) | `cpu_metrics` | `msr` kernel module | +| `cpu_temperature` | `cpu_metrics` | `msr` kernel module | +| `cpu_busy_frequency` | `cpu_metrics` | `msr` kernel module | +| `cpu_c0_substate_c01` | `cpu_metrics` | kernel's `perf` interface | +| `cpu_c0_substate_c02` | `cpu_metrics` | kernel's `perf` interface | +| `cpu_c0_substate_c0_wait` | `cpu_metrics` | kernel's `perf` interface | + +*for all metrics enabled by the configuration option `uncore_frequency`, +starting from kernel version 5.18, only the `intel-uncore-frequency` module +is required. For older kernel versions, the metric `uncore_frequency_mhz_cur` +requires the `msr` module to be enabled. + +### Root privileges + +**Telegraf with Intel PowerStat plugin enabled may require +root privileges to read all the metrics** (depending on OS type or configuration). +Alternatively, the following capabilities can be added to +the Telegraf executable: + +```sh +#without perf-related metrics: +sudo setcap cap_sys_rawio,cap_dac_read_search+ep + +#with perf-related metrics: +sudo setcap cap_sys_rawio,cap_dac_read_search,cap_sys_admin+ep +``` + ## HW Dependencies Specific metrics require certain processor features to be present, otherwise -Intel PowerStat plugin won't be able to read them. When using Linux Kernel based -OS, user can detect supported processor features reading `/proc/cpuinfo` file. +Intel PowerStat plugin won't be able to read them. The user can detect supported +processor features by reading `/proc/cpuinfo` file. Plugin assumes crucial properties are the same for all CPU cores in the system. -The following processor properties are examined in more detail in this section: -processor _cpu family_, _model_ and _flags_. The following processor properties -are required by the plugin: -- Processor _cpu family_ must be Intel (0x6) - since data used by the plugin - assumes Intel specific model specific registers for all features +The following `processor` properties are examined in more detail +in this section: + +- `vendor_id` +- `cpu family` +- `model` +- `flags` + +The following processor properties are required by the plugin: + +- Processor `vendor_id` must be `GenuineIntel` and `cpu family` must be `6` - + since data used by the plugin are Intel-specific. - The following processor flags shall be present: - - "_msr_" shall be present for plugin to read platform data from processor + - `msr` shall be present for plugin to read platform data from processor model specific registers and collect the following metrics: - _powerstat\_core.cpu\_temperature_, _powerstat\_core.cpu\_busy\_frequency_, - _powerstat\_core.cpu\_c0\_state\_residency_, - _powerstat\_core.cpu\_c1\_state\_residency_, - _powerstat\_core.cpu\_c6\_state\_residency_ - - "_aperfmperf_" shall be present to collect the following metrics: - _powerstat\_core.cpu\_busy\_frequency_, - _powerstat\_core.cpu\_c0\_state\_residency_, - _powerstat\_core.cpu\_c1\_state\_residency_ - - "_dts_" shall be present to collect _powerstat\_core.cpu\_temperature_ -- Processor _Model number_ must be one of the following values for plugin to - read _powerstat\_core.cpu\_c1\_state\_residency_ / - _powerstat\_core.cpu\_c6\_state\_residency_ and - _powerstat\_package.cpu\_base\_frequency_ metrics: - -| Model number | Processor name | -|--------------|---------------------------------| -| 0x37 | Intel Atom® Bay Trail | -| 0x4D | Intel Atom® Avaton | -| 0x5C | Intel Atom® Apollo Lake | -| 0x5F | Intel Atom® Denverton | -| 0x7A | Intel Atom® Goldmont | -| 0x4C | Intel Atom® Airmont | -| 0x86 | Intel Atom® Jacobsville | -| 0x96 | Intel Atom® Elkhart Lake | -| 0x9C | Intel Atom® Jasper Lake | -| 0x1A | Intel Nehalem-EP | -| 0x1E | Intel Nehalem | -| 0x1F | Intel Nehalem-G | -| 0x2E | Intel Nehalem-EX | -| 0x25 | Intel Westmere | -| 0x2C | Intel Westmere-EP | -| 0x2F | Intel Westmere-EX | -| 0x2A | Intel Sandybridge | -| 0x2D | Intel Sandybridge-X | -| 0x3A | Intel Ivybridge | -| 0x3E | Intel Ivybridge-X | -| 0x4E | Intel Atom® Silvermont-MID | -| 0x5E | Intel Skylake | -| 0x55 | Intel Skylake-X | -| 0x8E | Intel KabyLake-L | -| 0x9E | Intel KabyLake | -| 0x6A | Intel IceLake-X | -| 0x6C | Intel IceLake-D | -| 0x7D | Intel IceLake | -| 0x7E | Intel IceLake-L | -| 0x9D | Intel IceLake-NNPI | -| 0x3C | Intel Haswell | -| 0x3F | Intel Haswell-X | -| 0x45 | Intel Haswell-L | -| 0x46 | Intel Haswell-G | -| 0x3D | Intel Broadwell | -| 0x47 | Intel Broadwell-G | -| 0x4F | Intel Broadwell-X | -| 0x56 | Intel Broadwell-D | -| 0x66 | Intel CannonLake-L | -| 0x57 | Intel Xeon® PHI Knights Landing | -| 0x85 | Intel Xeon® PHI Knights Mill | -| 0xA5 | Intel CometLake | -| 0xA6 | Intel CometLake-L | -| 0x8A | Intel Lakefield | -| 0x8F | Intel Sapphire Rapids X | -| 0x8C | Intel TigerLake-L | -| 0x8D | Intel TigerLake | -| 0xA7 | Intel RocketLake | -| 0x97 | Intel AlderLake | -| 0x9A | Intel AlderLake-L | -| 0xBE | Intel AlderLake-N | -| 0xB7 | Intel RaptorLake | -| 0xBA | Intel RaptorLake-P | -| 0xBF | Intel RaptorLake-S | -| 0xAC | Intel MeteorLake | -| 0xAA | Intel MeteorLake-L | - -### uncore frequency - -Note that only certain processors support the uncore frequency module as well: - -| Model number | Processor name | -|--------------|---------------------------------| -| 0x55 | Intel Skylake-X | -| 0x6A | Intel IceLake-X | -| 0x6C | Intel IceLake-D | -| 0x47 | Intel Broadwell-G | -| 0x4F | Intel Broadwell-X | -| 0x56 | Intel Broadwell-D | -| 0x8F | Intel Sapphire Rapids X | -| 0xCF | Intel Emerald Rapids X | + - `cpu_c0_state_residency` + - `cpu_c1_state_residency` + - `cpu_c3_state_residency` + - `cpu_c6_state_residency` + - `cpu_c7_state_residency` + - `cpu_busy_cycles` (**DEPRECATED** - superseded by `cpu_c0_state_residency_percent`) + - `cpu_busy_frequency` + - `cpu_temperature` + - `cpu_base_frequency` + - `max_turbo_frequency` + - `uncore_frequency` (for kernel < 5.18) + - `aperfmperf` shall be present to collect the following metrics: + - `cpu_c0_state_residency` + - `cpu_c1_state_residency` + - `cpu_busy_cycles` (**DEPRECATED** - superseded by `cpu_c0_state_residency_percent`) + - `cpu_busy_frequency` + - `dts` shall be present to collect: + - `cpu_temperature` +- Please consult the table of [supported CPU models](#supported-cpu-models) to see which metrics are supported by your `model`. The following metrics exist: + - `cpu_c1_state_residency` + - `cpu_c3_state_residency` + - `cpu_c6_state_residency` + - `cpu_c7_state_residency` + - `cpu_temperature` + - `cpu_base_frequency` + - `uncore_frequency` ## Metrics All metrics collected by Intel PowerStat plugin are collected in fixed intervals. Metrics that reports processor C-state residency or power are -calculated over elapsed intervals. When starting to measure metrics, plugin -skips first iteration of metrics if they are based on deltas with previous -value. +calculated over elapsed intervals. **The following measurements are supported by Intel PowerStat plugin:** -- powerstat_core - - - The following Tags are returned by plugin with powerstat_core measurements: - - | Tag | Description | - |--------------|-------------------------------| - | `package_id` | ID of platform package/socket | - | `core_id` | ID of physical processor core | - | `cpu_id` | ID of logical processor core | - - Measurement powerstat_core metrics are collected per-CPU (cpu_id is the key) - while core_id and package_id tags are additional topology information. - - - Available metrics for powerstat_core measurement - - | Metric name (field) | Description | Units | - |---------------------|-------------|-------| - | `cpu_frequency_mhz` | Current operational frequency of CPU Core | MHz | - | `cpu_busy_frequency_mhz` | CPU Core Busy Frequency measured as frequency adjusted to CPU Core busy cycles | MHz | - | `cpu_temperature_celsius` | Current temperature of CPU Core | Celsius degrees | - | `cpu_c0_state_residency_percent` | Percentage of time that CPU Core spent in C0 Core residency state | % | - | `cpu_c1_state_residency_percent` | Percentage of time that CPU Core spent in C1 Core residency state | % | - | `cpu_c6_state_residency_percent` | Percentage of time that CPU Core spent in C6 Core residency state | % | - | `cpu_busy_cycles_percent` | (**DEPRECATED** - superseded by cpu_c0_state_residency_percent) CPU Core Busy cycles as a ratio of Cycles spent in C0 state residency to all cycles executed by CPU Core | % | - -- powerstat_package - - The following Tags are returned by plugin with powerstat_package measurements: - - | Tag | Description | - |-----|-------------| - | `package_id` | ID of platform package/socket | - | `active_cores`| Specific tag for `max_turbo_frequency_mhz` metric. The maximum number of activated cores for reachable turbo frequency - | `die`| Specific tag for all `uncore_frequency` metrics. Id of die - | `type`| Specific tag for all `uncore_frequency` metrics. Type of uncore frequency (current or initial) - - Measurement powerstat_package metrics are collected per processor package - _package_id_ tag indicates which package metric refers to. - - Available metrics for powerstat_package measurement - - | Metric name (field) | Description | Units | - |-----|-------------|-----| - | `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts | - | `current_power_consumption_watts` | Current power consumption of processor package | Watts | - | `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem | Watts | - | `max_turbo_frequency_mhz`| Maximum reachable turbo frequency for number of cores active | MHz - | `uncore_frequency_limit_mhz_min`| Minimum uncore frequency limit for die in processor package | MHz - | `uncore_frequency_limit_mhz_max`| Maximum uncore frequency limit for die in processor package | MHz - | `uncore_frequency_mhz_cur`| Current uncore frequency for die in processor package. Available only with tag `current`. Since this value is not yet available from `intel-uncore-frequency` module it needs to be accessed via MSR. In case of lack of loaded msr, only `uncore_frequency_limit_mhz_min` and `uncore_frequency_limit_mhz_max` metrics will be collected | MHz - | `cpu_base_frequency_mhz`| CPU Base Frequency (maximum non-turbo frequency) for the processor package | MHz +- `powerstat_core` + - The following tags are returned by plugin with + `powerstat_core` measurements: + + | Tag | Description | + |--------------|--------------------------------| + | `package_id` | ID of platform package/socket. | + | `core_id` | ID of physical processor core. | + | `cpu_id` | ID of logical processor core. | + + Measurement `powerstat_core` metrics are collected per-CPU (`cpu_id` is the key) + while `core_id` and `package_id` tags are additional topology information. + + - Available metrics for `powerstat_core` measurement: + + | Metric name (field) | Description | Units | + |-----------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------| + | `cpu_frequency_mhz` | Current operational frequency of CPU Core. | MHz | + | `cpu_busy_frequency_mhz` | CPU Core Busy Frequency measured as frequency adjusted to CPU Core busy cycles. | MHz | + | `cpu_temperature_celsius` | Current temperature of CPU Core. | Celsius degrees | + | `cpu_c0_state_residency_percent` | Percentage of time that CPU Core spent in C0 Core residency state. | % | + | `cpu_c1_state_residency_percent` | Percentage of time that CPU Core spent in C1 Core residency state. | % | + | `cpu_c3_state_residency_percent` | Percentage of time that CPU Core spent in C3 Core residency state. | % | + | `cpu_c6_state_residency_percent` | Percentage of time that CPU Core spent in C6 Core residency state. | % | + | `cpu_c7_state_residency_percent` | Percentage of time that CPU Core spent in C7 Core residency state. | % | + | `cpu_c0_substate_c01_percent` | Percentage of time that CPU Core spent in C0.1 substate out of the total time in the C0 state. | % | + | `cpu_c0_substate_c02_percent` | Percentage of time that CPU Core spent in C0.2 substate out of the total time in the C0 state. | % | + | `cpu_c0_substate_c0_wait_percent` | Percentage of time that CPU Core spent in C0_Wait substate out of the total time in the C0 state. | % | + | `cpu_busy_cycles_percent` | (**DEPRECATED** - superseded by cpu_c0_state_residency_percent) CPU Core Busy cycles as a ratio of Cycles spent in C0 state residency to all cycles executed by CPU Core. | % | + +- `powerstat_package` + - The following tags are returned by plugin with `powerstat_package` measurements: + + | Tag | Description | + |----------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | `package_id` | ID of platform package/socket. | + | `active_cores` | Specific tag for `max_turbo_frequency_mhz` metric. The maximum number of activated cores for reachable turbo frequency. | + | `hybrid` | Specific tag for `max_turbo_frequency_mhz` metric. Available only for hybrid processors. Will be set to `primary` for primary cores of a hybrid architecture, and to `secondary` for secondary cores of a hybrid architecture. | + | `die` | Specific tag for all `uncore_frequency` metrics. Id of die. | + | `type` | Specific tag for all `uncore_frequency` metrics. Type of uncore frequency (`current` or `initial`). | + + Measurement `powerstat_package` metrics are collected per processor package + `package_id` tag indicates which package metric refers to. + + - Available metrics for `powerstat_package` measurement: + + | Metric name (field) | Description | Units | + |----------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------| + | `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package. | Watts | + | `current_power_consumption_watts` | Current power consumption of processor package. | Watts | + | `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem. | Watts | + | `max_turbo_frequency_mhz` | Maximum reachable turbo frequency for number of cores active. | MHz | + | `uncore_frequency_limit_mhz_min` | Minimum uncore frequency limit for die in processor package. | MHz | + | `uncore_frequency_limit_mhz_max` | Maximum uncore frequency limit for die in processor package. | MHz | + | `uncore_frequency_mhz_cur` | Current uncore frequency for die in processor package. Available only with tag `current`. This value is available from `intel-uncore-frequency` module for kernel >= 5.18. For older kernel versions it needs to be accessed via MSR. In case of lack of loaded `msr`, only `uncore_frequency_limit_mhz_min` and `uncore_frequency_limit_mhz_max` metrics will be collected. | MHz | + | `cpu_base_frequency_mhz` | CPU Base Frequency (maximum non-turbo frequency) for the processor package. | MHz | ### Known issues -From linux kernel version v5.4.77 with [this kernel change][19f6d91b] resources -like `/sys/class/powercap/intel-rapl*/*/energy_uj` are readable only by root for -security reasons, so this plugin needs root privileges to work properly. +Starting from Linux kernel version v5.4.77, due to +[this kernel change][19f6d91b], resources such as +`/sys/devices/virtual/powercap/intel-rapl//*/energy_uj` +can only be accessed by the root user for security reasons. +Therefore, this plugin requires root privileges to gather +`rapl` metrics correctly. -If such strict security restrictions are not relevant, reading permissions to -files in `/sys/devices/virtual/powercap/intel-rapl/` directory can be manually -changed for example with `chmod` command with custom parameters. For example to -give all users permission to all files in `intel-rapl` directory: +If such strict security restrictions are not relevant, reading permissions for +files in the `/sys/devices/virtual/powercap/intel-rapl/` directory can be +manually altered, for example, using the chmod command with custom parameters. +For instance, read and execute permissions for all files in the +intel-rapl directory can be granted to all users using: ```bash sudo chmod -R a+rx /sys/devices/virtual/powercap/intel-rapl/ @@ -355,8 +399,82 @@ powerstat_package,die=0,host=ubuntu,package_id=0,type=initial uncore_frequency_l powerstat_package,die=0,host=ubuntu,package_id=0,type=current uncore_frequency_mhz_cur=800i,uncore_frequency_limit_mhz_min=800,uncore_frequency_limit_mhz_max=2400 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_frequency_mhz=1200.29 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_temperature_celsius=34i 1606494744000000000 -powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c6_state_residency_percent=92.52 1606494744000000000 -powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c1_state_residency_percent=6.68 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c0_state_residency_percent=0.8 1606494744000000000 +powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c1_state_residency_percent=6.68 1606494744000000000 +powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c3_state_residency_percent=0 1606494744000000000 +powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c6_state_residency_percent=92.52 1606494744000000000 +powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c7_state_residency_percent=0 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_frequency_mhz=1213.24 1606494744000000000 +powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c0_substate_c01_percent=0 1606494744000000000 +powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c0_substate_c02_percent=5.68 1606494744000000000 +powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c0_substate_c0_wait_percent=43.74 1606494744000000000 ``` + +## Supported CPU models + +| Model number | Processor name | `cpu_c1_state_residency`
`cpu_c6_state_residency`
`cpu_temperature`
`cpu_base_frequency` | `cpu_c3_state_residency` | `cpu_c7_state_residency` | `uncore_frequency` | +|--------------|---------------------------------|:----------------------------------------------------------------------------------------------------:|:------------------------:|:------------------------:|:------------------:| +| 0x1E | Intel Nehalem | ✓ | ✓ | | | +| 0x1F | Intel Nehalem-G | ✓ | ✓ | | | +| 0x1A | Intel Nehalem-EP | ✓ | ✓ | | | +| 0x2E | Intel Nehalem-EX | ✓ | ✓ | | | +| 0x25 | Intel Westmere | ✓ | ✓ | | | +| 0x2C | Intel Westmere-EP | ✓ | ✓ | | | +| 0x2F | Intel Westmere-EX | ✓ | ✓ | | | +| 0x2A | Intel Sandybridge | ✓ | ✓ | ✓ | | +| 0x2D | Intel Sandybridge-X | ✓ | ✓ | ✓ | | +| 0x3A | Intel Ivybridge | ✓ | ✓ | ✓ | | +| 0x3E | Intel Ivybridge-X | ✓ | ✓ | ✓ | | +| 0x3C | Intel Haswell | ✓ | ✓ | ✓ | | +| 0x3F | Intel Haswell-X | ✓ | ✓ | ✓ | | +| 0x45 | Intel Haswell-L | ✓ | ✓ | ✓ | | +| 0x46 | Intel Haswell-G | ✓ | ✓ | ✓ | | +| 0x3D | Intel Broadwell | ✓ | ✓ | ✓ | | +| 0x47 | Intel Broadwell-G | ✓ | ✓ | ✓ | ✓ | +| 0x4F | Intel Broadwell-X | ✓ | ✓ | | ✓ | +| 0x56 | Intel Broadwell-D | ✓ | ✓ | | ✓ | +| 0x4E | Intel Skylake-L | ✓ | ✓ | ✓ | | +| 0x5E | Intel Skylake | ✓ | ✓ | ✓ | | +| 0x55 | Intel Skylake-X | ✓ | | | ✓ | +| 0x8E | Intel KabyLake-L | ✓ | ✓ | ✓ | | +| 0x9E | Intel KabyLake | ✓ | ✓ | ✓ | | +| 0xA5 | Intel CometLake | ✓ | ✓ | ✓ | | +| 0xA6 | Intel CometLake-L | ✓ | ✓ | ✓ | | +| 0x66 | Intel CannonLake-L | ✓ | | ✓ | | +| 0x6A | Intel IceLake-X | ✓ | | | ✓ | +| 0x6C | Intel IceLake-D | ✓ | | | ✓ | +| 0x7D | Intel IceLake | ✓ | | | | +| 0x7E | Intel IceLake-L | ✓ | | ✓ | | +| 0x9D | Intel IceLake-NNPI | ✓ | | ✓ | | +| 0xA7 | Intel RocketLake | ✓ | | ✓ | | +| 0x8C | Intel TigerLake-L | ✓ | | ✓ | | +| 0x8D | Intel TigerLake | ✓ | | ✓ | | +| 0x8F | Intel Sapphire Rapids X | ✓ | | | ✓ | +| 0xCF | Intel Emerald Rapids X | ✓ | | | ✓ | +| 0xAD | Intel Granite Rapids X | ✓ | | | | +| 0x8A | Intel Lakefield | ✓ | | ✓ | | +| 0x97 | Intel AlderLake | ✓ | | ✓ | ✓ | +| 0x9A | Intel AlderLake-L | ✓ | | ✓ | ✓ | +| 0xB7 | Intel RaptorLake | ✓ | | ✓ | ✓ | +| 0xBA | Intel RaptorLake-P | ✓ | | ✓ | ✓ | +| 0xBF | Intel RaptorLake-S | ✓ | | ✓ | ✓ | +| 0xAC | Intel MeteorLake | ✓ | | ✓ | ✓ | +| 0xAA | Intel MeteorLake-L | ✓ | | ✓ | ✓ | +| 0xC6 | Intel ArrowLake | ✓ | | ✓ | | +| 0xBD | Intel LunarLake | ✓ | | ✓ | | +| 0x37 | Intel Atom® Bay Trail | ✓ | | | | +| 0x4D | Intel Atom® Avaton | ✓ | | | | +| 0x4A | Intel Atom® Merrifield | ✓ | | | | +| 0x5A | Intel Atom® Moorefield | ✓ | | | | +| 0x4C | Intel Atom® Airmont | ✓ | ✓ | | | +| 0x5C | Intel Atom® Apollo Lake | ✓ | ✓ | ✓ | | +| 0x5F | Intel Atom® Denverton | ✓ | | | | +| 0x7A | Intel Atom® Goldmont | ✓ | ✓ | ✓ | | +| 0x86 | Intel Atom® Jacobsville | ✓ | | | | +| 0x96 | Intel Atom® Elkhart Lake | ✓ | | ✓ | | +| 0x9C | Intel Atom® Jasper Lake | ✓ | | ✓ | | +| 0xBE | Intel AlderLake-N | ✓ | | ✓ | | +| 0xAF | Intel Sierra Forest | ✓ | | | | +| 0xB6 | Intel Grand Ridge | ✓ | | | | +| 0x57 | Intel Xeon® PHI Knights Landing | ✓ | | | | +| 0x85 | Intel Xeon® PHI Knights Mill | ✓ | | | | diff --git a/plugins/inputs/intel_powerstat/dto.go b/plugins/inputs/intel_powerstat/dto.go deleted file mode 100644 index 75ae2645e4c8c..0000000000000 --- a/plugins/inputs/intel_powerstat/dto.go +++ /dev/null @@ -1,39 +0,0 @@ -//go:build linux - -package intel_powerstat - -type msrData struct { - mperf uint64 - aperf uint64 - timeStampCounter uint64 - c3 uint64 - c6 uint64 - c7 uint64 - throttleTemp int64 - temp int64 - mperfDelta uint64 - aperfDelta uint64 - timeStampCounterDelta uint64 - c3Delta uint64 - c6Delta uint64 - c7Delta uint64 - readDate int64 -} - -type raplData struct { - dramCurrentEnergy float64 - socketCurrentEnergy float64 - socketEnergy float64 - dramEnergy float64 - readDate int64 -} - -type cpuInfo struct { - physicalID string - coreID string - cpuID string - vendorID string - cpuFamily string - model string - flags string -} diff --git a/plugins/inputs/intel_powerstat/fetcher.go b/plugins/inputs/intel_powerstat/fetcher.go new file mode 100644 index 0000000000000..bdbd44ef1897d --- /dev/null +++ b/plugins/inputs/intel_powerstat/fetcher.go @@ -0,0 +1,139 @@ +//go:build linux && amd64 + +package intel_powerstat + +import ( + ptel "github.com/intel/powertelemetry" +) + +// topologyFetcher fetches topology information of the host. +type topologyFetcher interface { + // GetMsrCPUIDs returns a slice with available CPU IDs of the host for which msr will access to. + GetMsrCPUIDs() []int + + // GetPerfCPUIDs returns a slice with available CPU IDs of the host for which perf will access to. + GetPerfCPUIDs() []int + + // GetPackageIDs returns a slice with available package IDs of the host. + GetPackageIDs() []int + + // GetCPUPackageID returns the package ID of the host corresponding to the given CPU ID. + GetCPUPackageID(cpuID int) (int, error) + + // GetCPUCoreID returns the core ID of the host corresponding to the given CPU ID. + GetCPUCoreID(cpuID int) (int, error) + + // GetPackageDieIDs returns the die IDs of the host corresponding to the given package ID. + GetPackageDieIDs(packageID int) ([]int, error) +} + +// cpuFreqFetcher fetches supported CPU-related metrics relying on core frequency. +type cpuFreqFetcher interface { + // GetCPUFrequency returns the current frequency value of a given CPU ID, in MHz. + GetCPUFrequency(cpuID int) (float64, error) +} + +// cpuMsrFetcher fetches supported CPU-related metrics relying on msr registers. +type cpuMsrFetcher interface { + // GetCPUTemperature returns the temperature value of a given CPU ID, in degrees Celsius. + GetCPUTemperature(cpuID int) (uint64, error) + + // UpdatePerCPUMetrics reads multiple MSR offsets needed to get metric values that are time sensitive. + // Below are the list of methods that need the update to be performed beforehand. + UpdatePerCPUMetrics(cpuID int) error + + // GetCPUC0StateResidency returns the C0 state residency value of a given CPU ID, as a percentage. + GetCPUC0StateResidency(cpuID int) (float64, error) + + // GetCPUC1StateResidency returns the C1 state residency value of a given CPU ID, as a percentage. + GetCPUC1StateResidency(cpuID int) (float64, error) + + // GetCPUC3StateResidency returns the C3 state residency value of a given CPU ID, as a percentage. + GetCPUC3StateResidency(cpuID int) (float64, error) + + // GetCPUC6StateResidency returns the C6 state residency value of a given CPU ID, as a percentage. + GetCPUC6StateResidency(cpuID int) (float64, error) + + // GetCPUC7StateResidency returns the C7 state residency value of a given CPU ID, as a percentage. + GetCPUC7StateResidency(cpuID int) (float64, error) + + // GetCPUBusyFrequencyMhz returns the busy frequency value of a given CPU ID, in MHz. + GetCPUBusyFrequencyMhz(cpuID int) (float64, error) +} + +// cpuPerfFetcher fetches supported CPU-related metrics relying on perf events. +type cpuPerfFetcher interface { + // ReadPerfEvents reads values of perf events needed to get C0X state residency metrics. + // Below getter methods that need this operation to be performed previously. + ReadPerfEvents() error + + // DeactivatePerfEvents deactivates perf events. It closes file descriptors used to get perf event values. + DeactivatePerfEvents() error + + // GetCPUC0SubstateC01Percent takes a CPU ID and returns a value indicating the percentage of time + // the processor spent in its C0.1 substate out of the total time in the C0 state. + // C0.1 is characterized by a light-weight slower wakeup time but more power-saving optimized state. + GetCPUC0SubstateC01Percent(cpuID int) (float64, error) + + // GetCPUC0SubstateC02Percent takes a CPU ID and returns a value indicating the percentage of time + // the processor spent in its C0.2 substate out of the total time in the C0 state. + // C0.2 is characterized by a light-weight faster wakeup time but less power saving optimized state. + GetCPUC0SubstateC02Percent(cpuID int) (float64, error) + + // GetCPUC0SubstateC0WaitPercent takes a CPU ID and returns a value indicating the percentage of time + // the processor spent in its C0_Wait substate out of the total time in the C0 state. + // CPU is in C0_Wait substate when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state. + GetCPUC0SubstateC0WaitPercent(cpuID int) (float64, error) +} + +// packageRaplFetcher fetches supported package related metrics relying on rapl. +type packageRaplFetcher interface { + // GetCurrentPackagePowerConsumptionWatts returns the current package power consumption value of a given package ID, in watts. + GetCurrentPackagePowerConsumptionWatts(packageID int) (float64, error) + + // GetCurrentDramPowerConsumptionWatts returns the current dram power consumption value of a given package ID, in watts. + GetCurrentDramPowerConsumptionWatts(packageID int) (float64, error) + + // GetPackageThermalDesignPowerWatts returns the thermal power design value of a given package ID, in watts. + GetPackageThermalDesignPowerWatts(packageID int) (float64, error) +} + +// packageUncoreFreqFetcher fetches supported package related metrics relying on uncore frequency. +type packageUncoreFreqFetcher interface { + // GetInitialUncoreFrequencyMin returns the minimum initial uncore frequency value of a given package ID, in MHz. + GetInitialUncoreFrequencyMin(packageID, dieID int) (float64, error) + + // GetInitialUncoreFrequencyMax returns the maximum initial uncore frequency value of a given package ID, in MHz. + GetInitialUncoreFrequencyMax(packageID, dieID int) (float64, error) + + // GetCustomizedUncoreFrequencyMin returns the minimum custom uncore frequency value of a given package ID, in MHz. + GetCustomizedUncoreFrequencyMin(packageID, dieID int) (float64, error) + + // GetCustomizedUncoreFrequencyMax returns the maximum custom uncore frequency value of a given package ID, in MHz. + GetCustomizedUncoreFrequencyMax(packageID, dieID int) (float64, error) + + // GetCurrentUncoreFrequency returns the current uncore frequency value of a given package ID, in MHz. + GetCurrentUncoreFrequency(packageID, dieID int) (float64, error) +} + +// packageMsrFetcher fetches supported package related metrics relying on msr registers. +type packageMsrFetcher interface { + // GetCPUBaseFrequency returns the CPU base frequency value of a given package ID, in MHz. + GetCPUBaseFrequency(packageID int) (uint64, error) + + // GetMaxTurboFreqList returns a list of max turbo frequencies and related active cores of a given package ID. + GetMaxTurboFreqList(packageID int) ([]ptel.MaxTurboFreq, error) +} + +// metricFetcher fetches metrics supported by this plugin. +type metricFetcher interface { + topologyFetcher + + cpuFreqFetcher + cpuMsrFetcher + cpuPerfFetcher + + packageRaplFetcher + packageUncoreFreqFetcher + packageMsrFetcher +} diff --git a/plugins/inputs/intel_powerstat/file.go b/plugins/inputs/intel_powerstat/file.go deleted file mode 100644 index 5e74521b5b6eb..0000000000000 --- a/plugins/inputs/intel_powerstat/file.go +++ /dev/null @@ -1,172 +0,0 @@ -//go:build linux - -package intel_powerstat - -import ( - "bufio" - "encoding/binary" - "fmt" - "io" - "os" - "path/filepath" - "regexp" - "strconv" - "strings" - "time" -) - -// fileService is responsible for handling operations on files. -type fileService interface { - getCPUInfoStats() (map[string]*cpuInfo, error) - getStringsMatchingPatternOnPath(path string) ([]string, error) - readFile(path string) ([]byte, error) - readFileToFloat64(reader io.Reader) (float64, int64, error) - readFileAtOffsetToUint64(reader io.ReaderAt, offset int64) (uint64, error) -} - -type fileServiceImpl struct { -} - -// getCPUInfoStats retrieves basic information about CPU from /proc/cpuinfo. -func (fs *fileServiceImpl) getCPUInfoStats() (map[string]*cpuInfo, error) { - path := "/proc/cpuinfo" - cpuInfoFile, err := os.Open(path) - if err != nil { - return nil, fmt.Errorf("error while reading %q: %w", path, err) - } - defer cpuInfoFile.Close() - - scanner := bufio.NewScanner(cpuInfoFile) - - processorRegexp := regexp.MustCompile(`^processor\t+:\s([0-9]+)\n*$`) - physicalIDRegexp := regexp.MustCompile(`^physical id\t+:\s([0-9]+)\n*$`) - coreIDRegexp := regexp.MustCompile(`^core id\t+:\s([0-9]+)\n*$`) - vendorIDRegexp := regexp.MustCompile(`^vendor_id\t+:\s([a-zA-Z]+)\n*$`) - cpuFamilyRegexp := regexp.MustCompile(`^cpu\sfamily\t+:\s([0-9]+)\n*$`) - modelRegexp := regexp.MustCompile(`^model\t+:\s([0-9]+)\n*$`) - flagsRegexp := regexp.MustCompile(`^flags\t+:\s(.+)\n*$`) - - stats := make(map[string]*cpuInfo) - currentInfo := &cpuInfo{} - - for scanner.Scan() { - line := scanner.Text() - - processorRes := processorRegexp.FindStringSubmatch(line) - if len(processorRes) > 1 { - currentInfo = &cpuInfo{ - cpuID: processorRes[1], - } - } - - vendorIDRes := vendorIDRegexp.FindStringSubmatch(line) - if len(vendorIDRes) > 1 { - currentInfo.vendorID = vendorIDRes[1] - } - - physicalIDRes := physicalIDRegexp.FindStringSubmatch(line) - if len(physicalIDRes) > 1 { - currentInfo.physicalID = physicalIDRes[1] - } - - coreIDRes := coreIDRegexp.FindStringSubmatch(line) - if len(coreIDRes) > 1 { - currentInfo.coreID = coreIDRes[1] - } - - cpuFamilyRes := cpuFamilyRegexp.FindStringSubmatch(line) - if len(cpuFamilyRes) > 1 { - currentInfo.cpuFamily = cpuFamilyRes[1] - } - - modelRes := modelRegexp.FindStringSubmatch(line) - if len(modelRes) > 1 { - currentInfo.model = modelRes[1] - } - - flagsRes := flagsRegexp.FindStringSubmatch(line) - if len(flagsRes) > 1 { - currentInfo.flags = flagsRes[1] - - // Flags is the last value we have to acquire, so currentInfo is added to map. - stats[currentInfo.cpuID] = currentInfo - } - } - - return stats, nil -} - -// getStringsMatchingPatternOnPath looks for filenames and directory names on path matching given regexp. -// It ignores file system errors such as I/O errors reading directories. The only possible returned error -// is ErrBadPattern, when pattern is malformed. -func (fs *fileServiceImpl) getStringsMatchingPatternOnPath(path string) ([]string, error) { - return filepath.Glob(path) -} - -// readFile reads file on path and return string content. -func (fs *fileServiceImpl) readFile(path string) ([]byte, error) { - out, err := os.ReadFile(path) - if err != nil { - return make([]byte, 0), err - } - return out, nil -} - -// readFileToFloat64 reads file on path and tries to parse content to float64. -func (fs *fileServiceImpl) readFileToFloat64(reader io.Reader) (float64, int64, error) { - read, err := io.ReadAll(reader) - if err != nil { - return 0, 0, err - } - - readDate := time.Now().UnixNano() - - // Remove new line character - trimmedString := strings.TrimRight(string(read), "\n") - // Parse result to float64 - parsedValue, err := strconv.ParseFloat(trimmedString, 64) - if err != nil { - return 0, 0, fmt.Errorf("error parsing string to float for %s", trimmedString) - } - - return parsedValue, readDate, nil -} - -// readFileAtOffsetToUint64 reads 8 bytes from passed file at given offset. -func (fs *fileServiceImpl) readFileAtOffsetToUint64(reader io.ReaderAt, offset int64) (uint64, error) { - buffer := make([]byte, 8) - - if offset == 0 { - return 0, fmt.Errorf("file offset %d should not be 0", offset) - } - - _, err := reader.ReadAt(buffer, offset) - if err != nil { - return 0, fmt.Errorf("error on reading file at offset %d: %w", offset, err) - } - - return binary.LittleEndian.Uint64(buffer), nil -} - -func newFileService() *fileServiceImpl { - return &fileServiceImpl{} -} - -func checkFile(path string) error { - if path == "" { - return fmt.Errorf("empty path given") - } - - lInfo, err := os.Lstat(path) - if err != nil { - if os.IsNotExist(err) { - return fmt.Errorf("file %q doesn't exist", path) - } - return fmt.Errorf("cannot obtain file info of %q: %w", path, err) - } - mode := lInfo.Mode() - if mode&os.ModeSymlink != 0 { - return fmt.Errorf("file %q is a symlink", path) - } - return nil -} diff --git a/plugins/inputs/intel_powerstat/file_mock_test.go b/plugins/inputs/intel_powerstat/file_mock_test.go deleted file mode 100644 index b4a5e16fb378f..0000000000000 --- a/plugins/inputs/intel_powerstat/file_mock_test.go +++ /dev/null @@ -1,149 +0,0 @@ -//go:build linux - -// Code generated by mockery v2.12.3. DO NOT EDIT. - -package intel_powerstat - -import ( - io "io" - - mock "github.com/stretchr/testify/mock" -) - -// mockFileService is an autogenerated mock type for the mockFileService type -type mockFileService struct { - mock.Mock -} - -// getCPUInfoStats provides a mock function with given fields: -func (_m *mockFileService) getCPUInfoStats() (map[string]*cpuInfo, error) { - ret := _m.Called() - - var r0 map[string]*cpuInfo - if rf, ok := ret.Get(0).(func() map[string]*cpuInfo); ok { - r0 = rf() - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(map[string]*cpuInfo) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func() error); ok { - r1 = rf() - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// getStringsMatchingPatternOnPath provides a mock function with given fields: path -func (_m *mockFileService) getStringsMatchingPatternOnPath(path string) ([]string, error) { - ret := _m.Called(path) - - var r0 []string - if rf, ok := ret.Get(0).(func(string) []string); ok { - r0 = rf(path) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).([]string) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(string) error); ok { - r1 = rf(path) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// readFile provides a mock function with given fields: path -func (_m *mockFileService) readFile(path string) ([]byte, error) { - ret := _m.Called(path) - - var r0 []byte - if rf, ok := ret.Get(0).(func(string) []byte); ok { - r0 = rf(path) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).([]byte) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(string) error); ok { - r1 = rf(path) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// readFileAtOffsetToUint64 provides a mock function with given fields: reader, offset -func (_m *mockFileService) readFileAtOffsetToUint64(reader io.ReaderAt, offset int64) (uint64, error) { - ret := _m.Called(reader, offset) - - var r0 uint64 - if rf, ok := ret.Get(0).(func(io.ReaderAt, int64) uint64); ok { - r0 = rf(reader, offset) - } else { - r0 = ret.Get(0).(uint64) - } - - var r1 error - if rf, ok := ret.Get(1).(func(io.ReaderAt, int64) error); ok { - r1 = rf(reader, offset) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// readFileToFloat64 provides a mock function with given fields: reader -func (_m *mockFileService) readFileToFloat64(reader io.Reader) (float64, int64, error) { - ret := _m.Called(reader) - - var r0 float64 - if rf, ok := ret.Get(0).(func(io.Reader) float64); ok { - r0 = rf(reader) - } else { - r0 = ret.Get(0).(float64) - } - - var r1 int64 - if rf, ok := ret.Get(1).(func(io.Reader) int64); ok { - r1 = rf(reader) - } else { - r1 = ret.Get(1).(int64) - } - - var r2 error - if rf, ok := ret.Get(2).(func(io.Reader) error); ok { - r2 = rf(reader) - } else { - r2 = ret.Error(2) - } - - return r0, r1, r2 -} - -type newmockFileServiceT interface { - mock.TestingT - Cleanup(func()) -} - -// newmockFileService creates a new instance of mockFileService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func newmockFileService(t newmockFileServiceT) *mockFileService { - mock := &mockFileService{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/plugins/inputs/intel_powerstat/intel_powerstat.go b/plugins/inputs/intel_powerstat/intel_powerstat.go index cd9418976330a..9990deef014b0 100644 --- a/plugins/inputs/intel_powerstat/intel_powerstat.go +++ b/plugins/inputs/intel_powerstat/intel_powerstat.go @@ -1,5 +1,5 @@ //go:generate ../../../tools/readme_config_includer/generator -//go:build linux +//go:build linux && amd64 package intel_powerstat @@ -7,909 +7,1202 @@ import ( _ "embed" "errors" "fmt" - "math/big" + "os" + "slices" "strconv" "strings" - "sync" "time" + ptel "github.com/intel/powertelemetry" + cpuUtil "github.com/shirou/gopsutil/v3/cpu" + "golang.org/x/exp/constraints" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/config" + "github.com/influxdata/telegraf/models" "github.com/influxdata/telegraf/plugins/inputs" ) //go:embed sample.conf var sampleConfig string -const ( - cpuFrequency = "cpu_frequency" - cpuBusyFrequency = "cpu_busy_frequency" - cpuTemperature = "cpu_temperature" - cpuC0StateResidency = "cpu_c0_state_residency" - cpuC1StateResidency = "cpu_c1_state_residency" - cpuC6StateResidency = "cpu_c6_state_residency" - cpuBusyCycles = "cpu_busy_cycles" - packageCurrentPowerConsumption = "current_power_consumption" - packageCurrentDramPowerConsumption = "current_dram_power_consumption" - packageThermalDesignPower = "thermal_design_power" - packageTurboLimit = "max_turbo_frequency" - packageUncoreFrequency = "uncore_frequency" - packageCPUBaseFrequency = "cpu_base_frequency" - percentageMultiplier = 100 -) - -// PowerStat plugin enables monitoring of platform metrics (power, TDP) and Core metrics like temperature, power and utilization. +// PowerStat plugin enables monitoring of platform metrics. type PowerStat struct { - CPUMetrics []string `toml:"cpu_metrics"` - PackageMetrics []string `toml:"package_metrics"` - Log telegraf.Logger `toml:"-"` - - fs fileService - rapl raplService - msr msrService - - cpuFrequency bool - cpuBusyFrequency bool - cpuTemperature bool - cpuC0StateResidency bool - cpuC1StateResidency bool - cpuC6StateResidency bool - cpuBusyCycles bool - - packageTurboLimit bool - packageCurrentPowerConsumption bool - packageCurrentDramPowerConsumption bool - packageThermalDesignPower bool - packageUncoreFrequency bool - packageCPUBaseFrequency bool - - cpuBusClockValue float64 - cpuInfo map[string]*cpuInfo - skipFirstIteration bool - logOnce map[string]error + CPUMetrics []cpuMetricType `toml:"cpu_metrics"` + PackageMetrics []packageMetricType `toml:"package_metrics"` + IncludedCPUs []string `toml:"included_cpus"` + ExcludedCPUs []string `toml:"excluded_cpus"` + EventDefinitions string `toml:"event_definitions"` + MsrReadTimeout config.Duration `toml:"msr_read_timeout"` + Log telegraf.Logger `toml:"-"` + + parsedIncludedCores []int + parsedExcludedCores []int + + parsedCPUTimedMsrMetrics []cpuMetricType + parsedCPUPerfMetrics []cpuMetricType + parsedPackageRaplMetrics []packageMetricType + parsedPackageMsrMetrics []packageMetricType + + option optionGenerator + fetcher metricFetcher + + needsCoreFreq bool + needsMsrCPU bool + needsPerf bool + needsTimeRelatedMsr bool + + needsRapl bool + needsMsrPackage bool + + logOnce map[string]struct{} } +// SampleConfig returns a sample configuration (See sample.conf). func (*PowerStat) SampleConfig() string { return sampleConfig } -// Init performs one time setup of the plugin +// Init parses config file and sets up configuration of the plugin. func (p *PowerStat) Init() error { - p.parsePackageMetricsConfig() - p.parseCPUMetricsConfig() - err := p.verifyProcessor() - if err != nil { + if err := p.disableUnsupportedMetrics(); err != nil { return err } - p.initMSR() - p.initRaplService() - - if !p.areCoreMetricsEnabled() && !p.areGlobalMetricsEnabled() { - return fmt.Errorf("all configuration options are empty or invalid. Did not find anything to gather") + if err := p.parseConfig(); err != nil { + return err } - p.fillCPUBusClock() + p.option = &optGenerator{} + p.logOnce = make(map[string]struct{}) + return nil } -func (p *PowerStat) initMSR() { - // Initialize MSR service only when there is at least one metric enabled - if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency || - p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit || p.packageUncoreFrequency || p.packageCPUBaseFrequency { - p.msr = newMsrServiceWithFs(p.Log, p.fs) - } -} +// Start initializes the metricFetcher interface of the receiver to gather metrics. +func (p *PowerStat) Start(_ telegraf.Accumulator) error { + opts := p.option.generate(optConfig{ + cpuMetrics: p.CPUMetrics, + packageMetrics: p.PackageMetrics, + includedCPUs: p.parsedIncludedCores, + excludedCPUs: p.parsedExcludedCores, + perfEventFile: p.EventDefinitions, + msrReadTimeout: time.Duration(p.MsrReadTimeout), + log: p.Log, + }) + + var err error + var initErr *ptel.MultiError + p.fetcher, err = ptel.New(opts...) + if err != nil { + if !errors.As(err, &initErr) { + // Error caused by failing to get information about the CPU, or CPU is not supported. + return fmt.Errorf("failed to initialize metric fetcher interface: %w", err) + } -func (p *PowerStat) initRaplService() { - if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit || - p.packageUncoreFrequency || p.packageCPUBaseFrequency { - p.rapl = newRaplServiceWithFs(p.Log, p.fs) + // One or more modules, needed to get metrics, failed to initialize. The plugin continues its execution, and it will not + // gather metrics relying on these modules. Instead, logs the error message including module names that failed to initialize. + p.Log.Warnf("Plugin started with errors: %v", err) } + + return nil } -// fill CPUBusClockValue if required -func (p *PowerStat) fillCPUBusClock() { - if p.packageCPUBaseFrequency { - // cpuBusClock is the same for every core/socket. - busClockInfo := p.getBusClock("0") - if busClockInfo == 0 { - p.Log.Warn("Disabling package metric: cpu_base_frequency_mhz. Can't detect bus clock value") - p.packageCPUBaseFrequency = false - return - } +// Stop deactivates perf events if one or more of the requested metrics rely on perf. +func (p *PowerStat) Stop() { + if !p.needsPerf { + return + } - p.cpuBusClockValue = busClockInfo + if err := p.fetcher.DeactivatePerfEvents(); err != nil { + p.Log.Errorf("Failed to deactivate perf events: %v", err) } } -// Gather takes in an accumulator and adds the metrics that the Input gathers +// Gather collects the plugin's metrics. func (p *PowerStat) Gather(acc telegraf.Accumulator) error { - if p.areGlobalMetricsEnabled() { - p.addGlobalMetrics(acc) + // gather CPU metrics relying on coreFreq and msr which share CPU IDs. + if p.needsCoreFreq || p.needsMsrCPU { + p.addCPUMetrics(acc) } - if p.areCoreMetricsEnabled() { - if p.msr.isMsrLoaded() { - p.logOnce["msr"] = nil - p.addPerCoreMetrics(acc) - } else { - err := errors.New("Error while trying to read MSR (probably msr module was not loaded)") - if val := p.logOnce["msr"]; val == nil || val.Error() != err.Error() { - p.Log.Errorf("%v", err) - // Remember that specific error occurs to omit logging next time - p.logOnce["msr"] = err - } - } + // gather CPU metrics relying on perf. + if p.needsPerf { + p.addCPUPerfMetrics(acc) } - // Gathering the first iteration of metrics was skipped for most of them because they are based on delta calculations - p.skipFirstIteration = false + // gather package metrics. + if len(p.PackageMetrics) != 0 { + p.addPackageMetrics(acc) + } return nil } -func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) { - // Prepare RAPL data each gather because there is a possibility to disable rapl kernel module - p.rapl.initializeRaplData() - for socketID := range p.rapl.getRaplData() { - if p.packageTurboLimit { - p.addTurboRatioLimit(socketID, acc) - } +// parseConfig is a helper method that parses configuration fields from the receiver such as included/excluded CPU IDs. +func (p *PowerStat) parseConfig() error { + if p.MsrReadTimeout < 0 { + return errors.New("msr_read_timeout should be positive number or equal to 0 (to disable timeouts)") + } - if p.packageUncoreFrequency { - die := maxDiePerSocket(socketID) - for actualDie := 0; actualDie < die; actualDie++ { - p.addUncoreFreq(socketID, strconv.Itoa(actualDie), acc) - } - } + if err := p.parsePackageMetrics(); err != nil { + return fmt.Errorf("failed to parse package metrics: %w", err) + } - if p.packageCPUBaseFrequency { - p.addCPUBaseFreq(socketID, acc) - } + if err := p.parseCPUMetrics(); err != nil { + return fmt.Errorf("failed to parse cpu metrics: %w", err) + } - err := p.rapl.retrieveAndCalculateData(socketID) - if err != nil { - // In case of an error skip calculating metrics for this socket - if val := p.logOnce[socketID+"rapl"]; val == nil || val.Error() != err.Error() { - p.Log.Errorf("Error fetching rapl data for socket %s, err: %v", socketID, err) - // Remember that specific error occurs for socketID to omit logging next time - p.logOnce[socketID+"rapl"] = err - } - continue - } + if len(p.CPUMetrics) == 0 && len(p.PackageMetrics) == 0 { + return errors.New("no metrics were found in the configuration file") + } - // If error stops occurring, clear logOnce indicator - p.logOnce[socketID+"rapl"] = nil - if p.packageThermalDesignPower { - p.addThermalDesignPowerMetric(socketID, acc) - } + p.parseCPUTimeRelatedMsrMetrics() + p.parseCPUPerfMetrics() - if p.skipFirstIteration { - continue - } - if p.packageCurrentPowerConsumption { - p.addCurrentSocketPowerConsumption(socketID, acc) - } - if p.packageCurrentDramPowerConsumption { - p.addCurrentDramPowerConsumption(socketID, acc) - } + p.parsePackageRaplMetrics() + p.parsePackageMsrMetrics() + + if len(p.ExcludedCPUs) != 0 && len(p.IncludedCPUs) != 0 { + return errors.New("both 'included_cpus' and 'excluded_cpus' configured; provide only one or none of the two") } -} -func maxDiePerSocket(_ string) int { - /* - TODO: - At the moment, linux does not distinguish between more dies per socket. - This piece of code will need to be upgraded in the future. - https://github.com/torvalds/linux/blob/v5.17/arch/x86/include/asm/topology.h#L153 - */ - return 1 -} -func (p *PowerStat) addUncoreFreq(socketID string, die string, acc telegraf.Accumulator) { - err := checkFile("/sys/devices/system/cpu/intel_uncore_frequency") - if err != nil { - err := fmt.Errorf("Error while checking existing intel_uncore_frequency (probably intel-uncore-frequency module was not loaded)") - if val := p.logOnce["intel_uncore_frequency"]; val == nil || val.Error() != err.Error() { - p.Log.Errorf("%v", err) - // Remember that specific error occurs to omit logging next time - p.logOnce["intel_uncore_frequency"] = err + var err error + if len(p.ExcludedCPUs) != 0 { + p.parsedExcludedCores, err = parseCores(p.ExcludedCPUs) + if err != nil { + return fmt.Errorf("failed to parse excluded CPUs: %w", err) } - return } - p.logOnce["intel_uncore_frequency"] = nil - p.readUncoreFreq("initial", socketID, die, acc) - p.readUncoreFreq("current", socketID, die, acc) -} -func (p *PowerStat) readUncoreFreq(typeFreq string, socketID string, die string, acc telegraf.Accumulator) { - fields := map[string]interface{}{} - if typeFreq == "current" { - if p.areCoreMetricsEnabled() && p.msr.isMsrLoaded() { - p.logOnce[socketID+"msr"] = nil - cpuID, err := p.GetCPUIDFromSocketID(socketID) - if err != nil { - p.Log.Debugf("Error while reading socket ID: %v", err) - return - } - actualUncoreFreq, err := p.msr.readSingleMsr(cpuID, msrUncorePerfStatusString) - if err != nil { - p.Log.Debugf("Error while reading %s: %v", msrUncorePerfStatusString, err) - return - } - actualUncoreFreq = (actualUncoreFreq & 0x3F) * 100 - fields["uncore_frequency_mhz_cur"] = actualUncoreFreq - } else { - err := errors.New("Error while trying to read MSR (probably msr module was not loaded), uncore_frequency_mhz_cur metric will not be collected") - if val := p.logOnce[socketID+"msr"]; val == nil || val.Error() != err.Error() { - p.Log.Errorf("%v", err) - // Remember that specific error occurs for socketID to omit logging next time - p.logOnce[socketID+"msr"] = err - } + if len(p.IncludedCPUs) != 0 { + p.parsedIncludedCores, err = parseCores(p.IncludedCPUs) + if err != nil { + return fmt.Errorf("failed to parse included CPUs: %w", err) } } - initMinFreq, err := p.msr.retrieveUncoreFrequency(socketID, typeFreq, "min", die) - if err != nil { - p.Log.Errorf("Error while retrieving minimum uncore frequency of the socket %s, err: %v", socketID, err) - return + + p.needsCoreFreq = needsCoreFreq(p.CPUMetrics) + p.needsMsrCPU = needsMsrCPU(p.CPUMetrics) + p.needsPerf = needsPerf(p.CPUMetrics) + p.needsTimeRelatedMsr = needsTimeRelatedMsr(p.CPUMetrics) + + p.needsRapl = needsRapl(p.PackageMetrics) + p.needsMsrPackage = needsMsrPackage(p.PackageMetrics) + + // Skip checks on event_definitions file path if perf module is not needed. + if !p.needsPerf { + return nil } - initMaxFreq, err := p.msr.retrieveUncoreFrequency(socketID, typeFreq, "max", die) + + // Check that event_definitions option contains a valid file path. + if len(p.EventDefinitions) == 0 { + return errors.New("'event_definitions' contains an empty path") + } + + fInfo, err := os.Lstat(p.EventDefinitions) if err != nil { - p.Log.Errorf("Error while retrieving maximum uncore frequency of the socket %s, err: %v", socketID, err) - return + if errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("'event_definitions' file %q does not exist", p.EventDefinitions) + } + return fmt.Errorf("could not get the info for file %q: %w", p.EventDefinitions, err) } - tags := map[string]string{ - "package_id": socketID, - "type": typeFreq, - "die": die, + // Check that file is not a symlink. + if fMode := fInfo.Mode(); fMode&os.ModeSymlink != 0 { + return fmt.Errorf("file %q is a symlink", p.EventDefinitions) } - fields["uncore_frequency_limit_mhz_min"] = initMinFreq - fields["uncore_frequency_limit_mhz_max"] = initMaxFreq - acc.AddGauge("powerstat_package", fields, tags) + return nil } -func (p *PowerStat) addThermalDesignPowerMetric(socketID string, acc telegraf.Accumulator) { - maxPower, err := p.rapl.getConstraintMaxPowerWatts(socketID) - if err != nil { - p.Log.Errorf("Error while retrieving TDP of the socket %s, err: %v", socketID, err) - return +// parsePackageMetrics ensures there are no duplicates in 'package_metrics'. +// If 'package_metrics' is not provided, the following default package metrics are set: +// "current_power_consumption", "current_dram_power_consumption", and "thermal_design_power". +func (p *PowerStat) parsePackageMetrics() error { + if p.PackageMetrics == nil { + // Sets default package metrics if `package_metrics` config option is an empty list. + p.PackageMetrics = []packageMetricType{ + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + } + return nil } - tags := map[string]string{ - "package_id": socketID, + if hasDuplicate(p.PackageMetrics) { + return errors.New("package metrics contains duplicates") } + return nil +} - fields := map[string]interface{}{ - "thermal_design_power_watts": roundFloatToNearestTwoDecimalPlaces(maxPower), +// parseCPUMetrics ensures there are no duplicates in 'cpu_metrics'. +// Also, it warns if deprecated metric has been set. +func (p *PowerStat) parseCPUMetrics() error { + if slices.Contains(p.CPUMetrics, cpuBusyCycles) { + models.PrintOptionValueDeprecationNotice(telegraf.Warn, "inputs.intel_powerstat", "cpu_metrics", cpuBusyCycles, telegraf.DeprecationInfo{ + Since: "1.23.0", + RemovalIn: "2.0.0", + Notice: "'cpu_c0_state_residency' metric name should be used instead.", + }) } - acc.AddGauge("powerstat_package", fields, tags) + if hasDuplicate(p.CPUMetrics) { + return errors.New("cpu metrics contains duplicates") + } + return nil } -func (p *PowerStat) addCurrentSocketPowerConsumption(socketID string, acc telegraf.Accumulator) { - tags := map[string]string{ - "package_id": socketID, +// parsedCPUTimedMsrMetrics parses only the metrics which depend on time-related MSR offset reads from CPU metrics +// of the receiver, and sets them to a separate slice. +func (p *PowerStat) parseCPUTimeRelatedMsrMetrics() { + p.parsedCPUTimedMsrMetrics = make([]cpuMetricType, 0) + for _, m := range p.CPUMetrics { + switch m { + case cpuC0StateResidency: + case cpuC1StateResidency: + case cpuC3StateResidency: + case cpuC6StateResidency: + case cpuC7StateResidency: + case cpuBusyCycles: + case cpuBusyFrequency: + default: + continue + } + p.parsedCPUTimedMsrMetrics = append(p.parsedCPUTimedMsrMetrics, m) } +} - fields := map[string]interface{}{ - "current_power_consumption_watts": roundFloatToNearestTwoDecimalPlaces(p.rapl.getRaplData()[socketID].socketCurrentEnergy), +// parseCPUPerfMetrics parses only the metrics which depend on perf event reads from CPU metrics of the receiver, and sets +// them to a separate slice. +func (p *PowerStat) parseCPUPerfMetrics() { + p.parsedCPUPerfMetrics = make([]cpuMetricType, 0) + for _, m := range p.CPUMetrics { + switch m { + case cpuC0SubstateC01Percent: + case cpuC0SubstateC02Percent: + case cpuC0SubstateC0WaitPercent: + default: + continue + } + p.parsedCPUPerfMetrics = append(p.parsedCPUPerfMetrics, m) } - - acc.AddGauge("powerstat_package", fields, tags) } -func (p *PowerStat) addCurrentDramPowerConsumption(socketID string, acc telegraf.Accumulator) { - tags := map[string]string{ - "package_id": socketID, +// parsePackageRaplMetrics parses only the metrics which depend on rapl from package metrics of the receiver, and sets +// them to a separate slice. +func (p *PowerStat) parsePackageRaplMetrics() { + p.parsedPackageRaplMetrics = make([]packageMetricType, 0) + for _, m := range p.PackageMetrics { + switch m { + case packageCurrentPowerConsumption: + case packageCurrentDramPowerConsumption: + case packageThermalDesignPower: + default: + continue + } + p.parsedPackageRaplMetrics = append(p.parsedPackageRaplMetrics, m) } +} - fields := map[string]interface{}{ - "current_dram_power_consumption_watts": roundFloatToNearestTwoDecimalPlaces(p.rapl.getRaplData()[socketID].dramCurrentEnergy), +// parsePackageMsrMetrics parses only the metrics which depend on msr from package metrics of the receiver, and sets +// them to a separate slice. +func (p *PowerStat) parsePackageMsrMetrics() { + p.parsedPackageMsrMetrics = make([]packageMetricType, 0) + for _, m := range p.PackageMetrics { + switch m { + case packageCPUBaseFrequency: + case packageTurboLimit: + default: + continue + } + p.parsedPackageMsrMetrics = append(p.parsedPackageMsrMetrics, m) } +} - acc.AddGauge("powerstat_package", fields, tags) +// hasDuplicate takes a slice of a generic type, and returns true +// if the slice contains duplicates. Otherwise, it returns false. +func hasDuplicate[S ~[]E, E constraints.Ordered](s S) bool { + m := make(map[E]struct{}, len(s)) + for _, v := range s { + if _, ok := m[v]; ok { + return true + } + m[v] = struct{}{} + } + return false } -func (p *PowerStat) addPerCoreMetrics(acc telegraf.Accumulator) { - var wg sync.WaitGroup - wg.Add(len(p.msr.getCPUCoresData())) +// parseCores takes a slice of strings where each string represents a group of +// one or more CPU IDs (e.g. ["0", "1-3", "4,5,6"] or ["1-3,4"]). It returns a slice +// of integers. +func parseCores(cores []string) ([]int, error) { + parsedCores := make([]int, 0, len(cores)) + for _, elem := range cores { + pCores, err := parseGroupCores(elem) + if err != nil { + return nil, fmt.Errorf("failed to parse core group: %w", err) + } + parsedCores = append(parsedCores, pCores...) + } - for cpuID := range p.msr.getCPUCoresData() { - go p.addMetricsForSingleCore(cpuID, acc, &wg) + if hasDuplicate(parsedCores) { + return nil, errors.New("core values cannot be duplicated") } + return parsedCores, nil +} + +// parseGroupCores takes a string which represents a group of one or more +// CPU IDs (e.g. "0", "1-3", or "4,5,6") and returns a slice of integers with +// all CPU IDs within the group. +func parseGroupCores(coreGroup string) ([]int, error) { + coreElems := strings.Split(coreGroup, ",") + cores := make([]int, 0, len(coreElems)) - wg.Wait() + for _, coreElem := range coreElems { + if strings.Contains(coreElem, "-") { + pCores, err := parseCoreRange(coreElem) + if err != nil { + return nil, fmt.Errorf("failed to parse core range %q: %w", coreElem, err) + } + cores = append(cores, pCores...) + } else { + singleCore, err := strconv.Atoi(coreElem) + if err != nil { + return nil, fmt.Errorf("failed to parse single core %q: %w", coreElem, err) + } + cores = append(cores, singleCore) + } + } + return cores, nil } -func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulator, wg *sync.WaitGroup) { - defer wg.Done() +// parseCoreRange takes a string representing a core range (e.g. "0-4"), and +// returns a slice of integers with all elements within this range. +func parseCoreRange(coreRange string) ([]int, error) { + rangeVals := strings.Split(coreRange, "-") + if len(rangeVals) != 2 { + return nil, errors.New("invalid core range format") + } - if p.cpuFrequency { - p.addCPUFrequencyMetric(cpuID, acc) + low, err := strconv.Atoi(rangeVals[0]) + if err != nil { + return nil, fmt.Errorf("failed to parse low bounds' core range: %w", err) } - // Read data from MSR only if required - if p.cpuC0StateResidency || p.cpuC1StateResidency || p.cpuC6StateResidency || p.cpuBusyCycles || p.cpuTemperature || p.cpuBusyFrequency { - err := p.msr.openAndReadMsr(cpuID) - if err != nil { - // In case of an error exit the function. All metrics past this point are dependent on MSR - p.Log.Debugf("Error while reading msr: %v", err) - return - } + high, err := strconv.Atoi(rangeVals[1]) + if err != nil { + return nil, fmt.Errorf("failed to parse high bounds' core range: %w", err) } - if p.cpuTemperature { - p.addCPUTemperatureMetric(cpuID, acc) + if high < low { + return nil, errors.New("high bound of core range cannot be less than low bound") } - // cpuBusyFrequency metric does some calculations inside that are required in another plugin cycle - if p.cpuBusyFrequency { - p.addCPUBusyFrequencyMetric(cpuID, acc) + cores := make([]int, high-low+1) + for i := range cores { + cores[i] = i + low } - if !p.skipFirstIteration { - if p.cpuC0StateResidency || p.cpuBusyCycles { - p.addCPUC0StateResidencyMetric(cpuID, acc) + return cores, nil +} + +// addCPUMetrics takes an accumulator, and adds to it enabled metrics which rely on +// coreFreq and msr. +func (p *PowerStat) addCPUMetrics(acc telegraf.Accumulator) { + for _, cpuID := range p.fetcher.GetMsrCPUIDs() { + coreID, packageID, err := getDataCPUID(p.fetcher, cpuID) + if err != nil { + acc.AddError(fmt.Errorf("failed to get coreFreq and/or msr metrics for CPU ID %v: %w", cpuID, err)) + continue } - if p.cpuC1StateResidency { - p.addCPUC1StateResidencyMetric(cpuID, acc) + // Add requested metrics which rely on coreFreq. + if p.needsCoreFreq { + p.addCPUFrequency(acc, cpuID, coreID, packageID) } - if p.cpuC6StateResidency { - p.addCPUC6StateResidencyMetric(cpuID, acc) + // Add requested metrics which rely on msr. + if p.needsMsrCPU { + p.addPerCPUMsrMetrics(acc, cpuID, coreID, packageID) } } } -func (p *PowerStat) addCPUFrequencyMetric(cpuID string, acc telegraf.Accumulator) { - frequency, err := p.msr.retrieveCPUFrequencyForCore(cpuID) - - // In case of an error leave func - if err != nil { - p.Log.Debugf("Error while reading file: %v", err) - return +// addPerCPUMsrMetrics adds to the accumulator enabled metrics, which rely on msr, +// for a given CPU ID. MSR-related metrics comprise single-time MSR read and several +// time-related MSR offset reads. +func (p *PowerStat) addPerCPUMsrMetrics(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + // cpuTemperature metric is a single MSR offset read. + if slices.Contains(p.CPUMetrics, cpuTemperature) { + p.addCPUTemperature(acc, cpuID, coreID, packageID) } - cpu := p.cpuInfo[cpuID] - tags := map[string]string{ - "package_id": cpu.physicalID, - "core_id": cpu.coreID, - "cpu_id": cpu.cpuID, + if !p.needsTimeRelatedMsr { + return } - fields := map[string]interface{}{ - "cpu_frequency_mhz": roundFloatToNearestTwoDecimalPlaces(frequency), + // Read several time-related MSR offsets. + var moduleErr *ptel.ModuleNotInitializedError + err := p.fetcher.UpdatePerCPUMetrics(cpuID) + if err == nil { + // Add time-related MSR offset metrics to the accumulator + p.addCPUTimeRelatedMsrMetrics(acc, cpuID, coreID, packageID) + return } - acc.AddGauge("powerstat_core", fields, tags) -} - -func (p *PowerStat) addCPUTemperatureMetric(cpuID string, acc telegraf.Accumulator) { - coresData := p.msr.getCPUCoresData() - temp := coresData[cpuID].throttleTemp - coresData[cpuID].temp - - cpu := p.cpuInfo[cpuID] - tags := map[string]string{ - "package_id": cpu.physicalID, - "core_id": cpu.coreID, - "cpu_id": cpu.cpuID, - } - fields := map[string]interface{}{ - "cpu_temperature_celsius": temp, + // Always add to the accumulator errors not related to module not initialized. + if !errors.As(err, &moduleErr) { + acc.AddError(fmt.Errorf("failed to update MSR time-related metrics for CPU ID %v: %w", cpuID, err)) + return } - acc.AddGauge("powerstat_core", fields, tags) + // Add only once module not initialized error related to msr module and updating time-related msr metrics. + logErrorOnce( + acc, + p.logOnce, + "msr_time_related", + fmt.Errorf("failed to update MSR time-related metrics: %w", moduleErr), + ) } -func calculateTurboRatioGroup(coreCounts uint64, msr uint64, group map[int]uint64) { - // value of number of active cores of bucket 1 is written in the first 8 bits. The next buckets values are saved on the following 8-bit sides - from := coreCounts & 0xFF - for i := 0; i < 8; i++ { - to := (coreCounts >> (i * 8)) & 0xFF - if to == 0 { - break - } - value := (msr >> (i * 8)) & 0xFF - // value of freq ratio is stored in 8-bit blocks, and their real value is obtained after multiplication by 100 - if value != 0 && to != 0 { - for ; from <= to; from++ { - group[int(from)] = value * 100 - } +// addCPUTimeRelatedMsrMetrics adds to the accumulator enabled time-related MSR metrics, +// for a given CPU ID. NOTE: Requires to run first fetcher.UpdatePerCPUMetrics method +// to update the values of MSR offsets read. +func (p *PowerStat) addCPUTimeRelatedMsrMetrics(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + for _, m := range p.parsedCPUTimedMsrMetrics { + switch m { + case cpuC0StateResidency: + p.addCPUC0StateResidency(acc, cpuID, coreID, packageID) + case cpuC1StateResidency: + p.addCPUC1StateResidency(acc, cpuID, coreID, packageID) + case cpuC3StateResidency: + p.addCPUC3StateResidency(acc, cpuID, coreID, packageID) + case cpuC6StateResidency: + p.addCPUC6StateResidency(acc, cpuID, coreID, packageID) + case cpuC7StateResidency: + p.addCPUC7StateResidency(acc, cpuID, coreID, packageID) + case cpuBusyFrequency: + p.addCPUBusyFrequency(acc, cpuID, coreID, packageID) + case cpuBusyCycles: + p.addCPUBusyCycles(acc, cpuID, coreID, packageID) } - from = to + 1 } } -func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator) { - var err error - turboRatioLimitGroups := make(map[int]uint64) - - var cpuID = "" - var model = "" - for _, v := range p.cpuInfo { - if v.physicalID == socketID { - cpuID = v.cpuID - model = v.model +// addCPUPerfMetrics takes an accumulator, and adds to it enabled metrics which rely on perf. +func (p *PowerStat) addCPUPerfMetrics(acc telegraf.Accumulator) { + var moduleErr *ptel.ModuleNotInitializedError + + // Read events related to perf-related metrics. + err := p.fetcher.ReadPerfEvents() + if err != nil { + // Always add to the accumulator errors not related to module not initialized. + if !errors.As(err, &moduleErr) { + acc.AddError(fmt.Errorf("failed to read perf events: %w", err)) + return } - } - if cpuID == "" || model == "" { - p.Log.Debug("Error while reading socket ID") + + // Add only once module not initialized error related to perf module and reading perf-related metrics. + logErrorOnce( + acc, + p.logOnce, + "perf_read", + fmt.Errorf("failed to read perf events: %w", moduleErr), + ) return } - // dump_hsw_turbo_ratio_limit - if model == strconv.FormatInt(0x3F, 10) { // INTEL_FAM6_HASWELL_X - coreCounts := uint64(0x1211) // counting the number of active cores 17 and 18 - msrTurboRatioLimit2, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimit2String) + + for _, cpuID := range p.fetcher.GetPerfCPUIDs() { + coreID, packageID, err := getDataCPUID(p.fetcher, cpuID) if err != nil { - p.Log.Debugf("Error while reading %s: %v", msrTurboRatioLimit2String, err) - return + acc.AddError(fmt.Errorf("failed to get perf metrics for CPU ID %v: %w", cpuID, err)) + continue } - calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit2, turboRatioLimitGroups) + p.addPerCPUPerfMetrics(acc, cpuID, coreID, packageID) } +} - // dump_ivt_turbo_ratio_limit - if (model == strconv.FormatInt(0x3E, 10)) || // INTEL_FAM6_IVYBRIDGE_X - (model == strconv.FormatInt(0x3F, 10)) { // INTEL_FAM6_HASWELL_X - coreCounts := uint64(0x100F0E0D0C0B0A09) // counting the number of active cores 9 to 16 - msrTurboRatioLimit1, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimit1String) - if err != nil { - p.Log.Debugf("Error while reading %s: %v", msrTurboRatioLimit1String, err) - return +// addPerCPUPerfMetrics adds to the accumulator enabled metrics, which rely on perf, for a given CPU ID. +func (p *PowerStat) addPerCPUPerfMetrics(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + for _, m := range p.parsedCPUPerfMetrics { + switch m { + case cpuC0SubstateC01Percent: + p.addCPUC0SubstateC01Percent(acc, cpuID, coreID, packageID) + case cpuC0SubstateC02Percent: + p.addCPUC0SubstateC02Percent(acc, cpuID, coreID, packageID) + case cpuC0SubstateC0WaitPercent: + p.addCPUC0SubstateC0WaitPercent(acc, cpuID, coreID, packageID) } - calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit1, turboRatioLimitGroups) - } - - if (model != strconv.FormatInt(0x37, 10)) && // INTEL_FAM6_ATOM_SILVERMONT - (model != strconv.FormatInt(0x4A, 10)) && // INTEL_FAM6_ATOM_SILVERMONT_MID - (model != strconv.FormatInt(0x5A, 10)) && // INTEL_FAM6_ATOM_AIRMONT_MID - (model != strconv.FormatInt(0x2E, 10)) && // INTEL_FAM6_NEHALEM_EX - (model != strconv.FormatInt(0x2F, 10)) && // INTEL_FAM6_WESTMERE_EX - (model != strconv.FormatInt(0x57, 10)) && // INTEL_FAM6_XEON_PHI_KNL - (model != strconv.FormatInt(0x85, 10)) { // INTEL_FAM6_XEON_PHI_KNM - coreCounts := uint64(0x0807060504030201) // default value (counting the number of active cores 1 to 8). May be changed in "if" segment below - if (model == strconv.FormatInt(0x5C, 10)) || // INTEL_FAM6_ATOM_GOLDMONT - (model == strconv.FormatInt(0x55, 10)) || // INTEL_FAM6_SKYLAKE_X - (model == strconv.FormatInt(0x6C, 10) || model == strconv.FormatInt(0x8F, 10) || model == strconv.FormatInt(0x6A, 10)) || // INTEL_FAM6_ICELAKE_X - (model == strconv.FormatInt(0x5F, 10)) || // INTEL_FAM6_ATOM_GOLDMONT_D - (model == strconv.FormatInt(0x86, 10)) { // INTEL_FAM6_ATOM_TREMONT_D - coreCounts, err = p.msr.readSingleMsr(cpuID, msrTurboRatioLimit1String) + } +} - if err != nil { - p.Log.Debugf("Error while reading %s: %v", msrTurboRatioLimit1String, err) - return - } - } +// getDataCPUID takes a topologyFetcher and CPU ID, and returns the core ID and package ID corresponding to the CPU ID. +func getDataCPUID(t topologyFetcher, cpuID int) (coreID int, packageID int, err error) { + coreID, err = t.GetCPUCoreID(cpuID) + if err != nil { + return 0, 0, fmt.Errorf("failed to get core ID from CPU ID %v: %w", cpuID, err) + } - msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimitString) - if err != nil { - p.Log.Debugf("Error while reading %s: %v", msrTurboRatioLimitString, err) - return - } - calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit, turboRatioLimitGroups) + packageID, err = t.GetCPUPackageID(cpuID) + if err != nil { + return 0, 0, fmt.Errorf("failed to get package ID from CPU ID %v: %w", cpuID, err) } - // dump_atom_turbo_ratio_limits - if model == strconv.FormatInt(0x37, 10) || // INTEL_FAM6_ATOM_SILVERMONT - model == strconv.FormatInt(0x4A, 10) || // INTEL_FAM6_ATOM_SILVERMONT_MID - model == strconv.FormatInt(0x5A, 10) { // INTEL_FAM6_ATOM_AIRMONT_MID - coreCounts := uint64(0x04030201) // counting the number of active cores 1 to 4 - msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrAtomCoreTurboRatiosString) - if err != nil { - p.Log.Debugf("Error while reading %s: %v", msrAtomCoreTurboRatiosString, err) - return - } - value := uint64(0) - newValue := uint64(0) + return coreID, packageID, nil +} - for i := 0; i < 4; i++ { // value "4" is specific for this group of processors - newValue = (msrTurboRatioLimit >> (8 * (i))) & 0x3F // value of freq ratio is stored in 6-bit blocks, saved every 8 bits - value = value + (newValue << ((i - 1) * 8)) // now value of freq ratio is stored in 8-bit blocks, saved every 8 bits +// addPackageMetrics takes an accumulator, and adds enabled package metrics to it. +func (p *PowerStat) addPackageMetrics(acc telegraf.Accumulator) { + for _, packageID := range p.fetcher.GetPackageIDs() { + // Add requested metrics which rely on rapl. + if p.needsRapl { + p.addPerPackageRaplMetrics(acc, packageID) } - calculateTurboRatioGroup(coreCounts, value, turboRatioLimitGroups) - } - // dump_knl_turbo_ratio_limits - if model == strconv.FormatInt(0x57, 10) { // INTEL_FAM6_XEON_PHI_KNL - msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimitString) - if err != nil { - p.Log.Debugf("Error while reading %s: %v", msrTurboRatioLimitString, err) - return + // Add requested metrics which rely on msr. + if p.needsMsrPackage { + p.addPerPackageMsrMetrics(acc, packageID) } - // value of freq ratio of bucket 1 is saved in bits 15 to 8. - // each next value is calculated as the previous value - delta. Delta is stored in 3-bit blocks every 8 bits (start at 21 (2*8+5)) - value := (msrTurboRatioLimit >> 8) & 0xFF - newValue := value - for i := 2; i < 8; i++ { - newValue = newValue - (msrTurboRatioLimit>>(8*i+5))&0x7 - value = value + (newValue << ((i - 1) * 8)) + // Add uncore frequency metric which relies on both uncoreFreq and msr. + if slices.Contains(p.PackageMetrics, packageUncoreFrequency) { + p.addUncoreFrequency(acc, packageID) } + } +} - // value of number of active cores of bucket 1 is saved in bits 1 to 7. - // each next value is calculated as the previous value + delta. Delta is stored in 5-bit blocks every 8 bits (start at 16 (2*8)) - coreCounts := (msrTurboRatioLimit & 0xFF) >> 1 - newBucket := coreCounts - for i := 2; i < 8; i++ { - newBucket = newBucket + (msrTurboRatioLimit>>(8*i))&0x1F - coreCounts = coreCounts + (newBucket << ((i - 1) * 8)) +// addPerPackageRaplMetrics adds to the accumulator enabled metrics, which rely on rapl, for a given package ID. +func (p *PowerStat) addPerPackageRaplMetrics(acc telegraf.Accumulator, packageID int) { + for _, m := range p.parsedPackageRaplMetrics { + switch m { + case packageCurrentPowerConsumption: + p.addCurrentPackagePower(acc, packageID) + case packageCurrentDramPowerConsumption: + p.addCurrentDramPower(acc, packageID) + case packageThermalDesignPower: + p.addThermalDesignPower(acc, packageID) } - calculateTurboRatioGroup(coreCounts, value, turboRatioLimitGroups) } +} - for key, val := range turboRatioLimitGroups { - tags := map[string]string{ - "package_id": socketID, - "active_cores": strconv.Itoa(key), - } - fields := map[string]interface{}{ - "max_turbo_frequency_mhz": val, +// addPerPackageMsrMetrics adds to the accumulator enabled metrics, which rely on msr registers, for a given package ID. +func (p *PowerStat) addPerPackageMsrMetrics(acc telegraf.Accumulator, packageID int) { + for _, m := range p.parsedPackageMsrMetrics { + switch m { + case packageCPUBaseFrequency: + p.addCPUBaseFrequency(acc, packageID) + case packageTurboLimit: + p.addMaxTurboFreqLimits(acc, packageID) } - acc.AddGauge("powerstat_package", fields, tags) } } -func (p *PowerStat) addCPUBusyFrequencyMetric(cpuID string, acc telegraf.Accumulator) { - coresData := p.msr.getCPUCoresData() - mperfDelta := coresData[cpuID].mperfDelta - // Avoid division by 0 - if mperfDelta == 0 { - p.Log.Errorf("Value of mperf delta should not equal 0 on core %s", cpuID) - return - } - aperfMperf := float64(coresData[cpuID].aperfDelta) / float64(mperfDelta) - tsc := convertProcessorCyclesToHertz(coresData[cpuID].timeStampCounterDelta) - timeNow := time.Now().UnixNano() - interval := convertNanoSecondsToSeconds(timeNow - coresData[cpuID].readDate) - coresData[cpuID].readDate = timeNow +// addCPUFrequency fetches CPU frequency metric for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUFrequency(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[float64]{ + metricCommon: metricCommon{ + metric: cpuFrequency, + units: "mhz", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUFrequency, + }, + p.logOnce, + ) +} - if p.skipFirstIteration { - return - } +// addCPUFrequency fetches CPU temperature metric for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUTemperature(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[uint64]{ + metricCommon: metricCommon{ + metric: cpuTemperature, + units: "celsius", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUTemperature, + }, + p.logOnce, + ) +} - if interval == 0 { - p.Log.Errorf("Interval between last two Telegraf cycles is 0") - return - } +// addCPUC0StateResidency fetches C0 state residency metric for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUC0StateResidency(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[float64]{ + metricCommon: metricCommon{ + metric: cpuC0StateResidency, + units: "percent", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUC0StateResidency, + }, + p.logOnce, + ) +} - busyMhzValue := roundFloatToNearestTwoDecimalPlaces(tsc * aperfMperf / interval) +// addCPUC1StateResidency fetches C1 state residency metric for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUC1StateResidency(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[float64]{ + metricCommon: metricCommon{ + metric: cpuC1StateResidency, + units: "percent", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUC1StateResidency, + }, + p.logOnce, + ) +} - cpu := p.cpuInfo[cpuID] - tags := map[string]string{ - "package_id": cpu.physicalID, - "core_id": cpu.coreID, - "cpu_id": cpu.cpuID, - } - fields := map[string]interface{}{ - "cpu_busy_frequency_mhz": busyMhzValue, - } +// addCPUC3StateResidency fetches C3 state residency metric for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUC3StateResidency(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[float64]{ + metricCommon: metricCommon{ + metric: cpuC3StateResidency, + units: "percent", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUC3StateResidency, + }, + p.logOnce, + ) +} - acc.AddGauge("powerstat_core", fields, tags) +// addCPUC6StateResidency fetches C6 state residency metric for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUC6StateResidency(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[float64]{ + metricCommon: metricCommon{ + metric: cpuC6StateResidency, + units: "percent", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUC6StateResidency, + }, + p.logOnce, + ) } -func (p *PowerStat) addCPUC1StateResidencyMetric(cpuID string, acc telegraf.Accumulator) { - coresData := p.msr.getCPUCoresData() - timestampDeltaBig := new(big.Int).SetUint64(coresData[cpuID].timeStampCounterDelta) - // Avoid division by 0 - if timestampDeltaBig.Sign() < 1 { - p.Log.Errorf("Timestamp delta value %v should not be lower than 1", timestampDeltaBig) - return - } +// addCPUC7StateResidency fetches C7 state residency metric for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUC7StateResidency(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[float64]{ + metricCommon: metricCommon{ + metric: cpuC7StateResidency, + units: "percent", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUC7StateResidency, + }, + p.logOnce, + ) +} - // Since counter collection is not atomic it may happen that sum of C0, C1, C3, C6 and C7 - // is bigger value than TSC, in such case C1 residency shall be set to 0. - // Operating on big.Int to avoid overflow - mperfDeltaBig := new(big.Int).SetUint64(coresData[cpuID].mperfDelta) - c3DeltaBig := new(big.Int).SetUint64(coresData[cpuID].c3Delta) - c6DeltaBig := new(big.Int).SetUint64(coresData[cpuID].c6Delta) - c7DeltaBig := new(big.Int).SetUint64(coresData[cpuID].c7Delta) +// addCPUBusyFrequency fetches CPU busy frequency metric for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUBusyFrequency(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[float64]{ + metricCommon: metricCommon{ + metric: cpuBusyFrequency, + units: "mhz", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUBusyFrequencyMhz, + }, + p.logOnce, + ) +} - c1Big := new(big.Int).Sub(timestampDeltaBig, mperfDeltaBig) - c1Big.Sub(c1Big, c3DeltaBig) - c1Big.Sub(c1Big, c6DeltaBig) - c1Big.Sub(c1Big, c7DeltaBig) +// addCPUBusyCycles fetches CPU busy cycles metric for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUBusyCycles(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[float64]{ + metricCommon: metricCommon{ + metric: cpuBusyCycles, + units: "percent", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUC0StateResidency, + }, + p.logOnce, + ) +} - if c1Big.Sign() < 0 { - c1Big = c1Big.SetInt64(0) - } - c1Value := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(c1Big.Uint64()) / float64(timestampDeltaBig.Uint64())) +// addCPUC0SubstateC01Percent fetches a value indicating the percentage of time the processor spent in its C0.1 substate +// out of the total time in the C0 state for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUC0SubstateC01Percent(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[float64]{ + metricCommon: metricCommon{ + metric: cpuC0SubstateC01Percent, + units: "percent", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUC0SubstateC01Percent, + }, + p.logOnce, + ) +} - cpu := p.cpuInfo[cpuID] - tags := map[string]string{ - "package_id": cpu.physicalID, - "core_id": cpu.coreID, - "cpu_id": cpu.cpuID, - } - fields := map[string]interface{}{ - "cpu_c1_state_residency_percent": c1Value, - } +// addCPUC0SubstateC02Percent fetches a value indicating the percentage of time the processor spent in its C0.2 substate +// out of the total time in the C0 state for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUC0SubstateC02Percent(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[float64]{ + metricCommon: metricCommon{ + metric: cpuC0SubstateC02Percent, + units: "percent", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUC0SubstateC02Percent, + }, + p.logOnce, + ) +} - acc.AddGauge("powerstat_core", fields, tags) +// addCPUC0SubstateC0WaitPercent fetches a value indicating the percentage of time the processor spent in its C0_Wait substate +// out of the total time in the C0 state for a given CPU ID, and adds it to the accumulator. +func (p *PowerStat) addCPUC0SubstateC0WaitPercent(acc telegraf.Accumulator, cpuID, coreID, packageID int) { + addMetric( + acc, + &cpuMetric[float64]{ + metricCommon: metricCommon{ + metric: cpuC0SubstateC0WaitPercent, + units: "percent", + }, + cpuID: cpuID, + coreID: coreID, + packageID: packageID, + fetchFn: p.fetcher.GetCPUC0SubstateC0WaitPercent, + }, + p.logOnce, + ) } -func (p *PowerStat) addCPUC6StateResidencyMetric(cpuID string, acc telegraf.Accumulator) { - coresData := p.msr.getCPUCoresData() - // Avoid division by 0 - if coresData[cpuID].timeStampCounterDelta == 0 { - p.Log.Errorf("Timestamp counter on offset %d should not equal 0 on cpuID %s", - timestampCounterLocation, cpuID) - return - } - c6Value := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * - float64(coresData[cpuID].c6Delta) / float64(coresData[cpuID].timeStampCounterDelta)) +// addCurrentPackagePower fetches the current package power metric for a given package ID, and adds it to the accumulator. +func (p *PowerStat) addCurrentPackagePower(acc telegraf.Accumulator, packageID int) { + addMetric( + acc, + &packageMetric[float64]{ + metricCommon: metricCommon{ + metric: packageCurrentPowerConsumption, + units: "watts", + }, + packageID: packageID, + fetchFn: p.fetcher.GetCurrentPackagePowerConsumptionWatts, + }, + p.logOnce, + ) +} - cpu := p.cpuInfo[cpuID] - tags := map[string]string{ - "package_id": cpu.physicalID, - "core_id": cpu.coreID, - "cpu_id": cpu.cpuID, - } - fields := map[string]interface{}{ - "cpu_c6_state_residency_percent": c6Value, - } +// addCurrentPackagePower fetches the current dram power metric for a given package ID, and adds it to the accumulator. +func (p *PowerStat) addCurrentDramPower(acc telegraf.Accumulator, packageID int) { + addMetric( + acc, + &packageMetric[float64]{ + metricCommon: metricCommon{ + metric: packageCurrentDramPowerConsumption, + units: "watts", + }, + packageID: packageID, + fetchFn: p.fetcher.GetCurrentDramPowerConsumptionWatts, + }, + p.logOnce, + ) +} - acc.AddGauge("powerstat_core", fields, tags) +// addCurrentPackagePower fetches the thermal design power metric for a given package ID, and adds it to the accumulator. +func (p *PowerStat) addThermalDesignPower(acc telegraf.Accumulator, packageID int) { + addMetric( + acc, + &packageMetric[float64]{ + metricCommon: metricCommon{ + metric: packageThermalDesignPower, + units: "watts", + }, + packageID: packageID, + fetchFn: p.fetcher.GetPackageThermalDesignPowerWatts, + }, + p.logOnce, + ) } -func (p *PowerStat) addCPUC0StateResidencyMetric(cpuID string, acc telegraf.Accumulator) { - coresData := p.msr.getCPUCoresData() - // Avoid division by 0 - if coresData[cpuID].timeStampCounterDelta == 0 { - p.Log.Errorf("Timestamp counter on offset %d should not equal 0 on cpuID %s", - timestampCounterLocation, cpuID) - return - } - c0Value := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * - float64(coresData[cpuID].mperfDelta) / float64(coresData[cpuID].timeStampCounterDelta)) - cpu := p.cpuInfo[cpuID] - tags := map[string]string{ - "package_id": cpu.physicalID, - "core_id": cpu.coreID, - "cpu_id": cpu.cpuID, - } - if p.cpuC0StateResidency { - fields := map[string]interface{}{ - "cpu_c0_state_residency_percent": c0Value, - } - acc.AddGauge("powerstat_core", fields, tags) - } - if p.cpuBusyCycles { - deprecatedFields := map[string]interface{}{ - "cpu_busy_cycles_percent": c0Value, - } - acc.AddGauge("powerstat_core", deprecatedFields, tags) - } +// addCPUBaseFrequency fetches the CPU base frequency metric for a given package ID, and adds it to the accumulator. +func (p *PowerStat) addCPUBaseFrequency(acc telegraf.Accumulator, packageID int) { + addMetric( + acc, + &packageMetric[uint64]{ + metricCommon: metricCommon{ + metric: packageCPUBaseFrequency, + units: "mhz", + }, + packageID: packageID, + fetchFn: p.fetcher.GetCPUBaseFrequency, + }, + p.logOnce, + ) } -func (p *PowerStat) addCPUBaseFreq(socketID string, acc telegraf.Accumulator) { - cpuID, err := p.GetCPUIDFromSocketID(socketID) +// addUncoreFrequency fetches the uncore frequency metrics for a given package ID, and adds it to the accumulator. +func (p *PowerStat) addUncoreFrequency(acc telegraf.Accumulator, packageID int) { + dieIDs, err := p.fetcher.GetPackageDieIDs(packageID) if err != nil { - p.Log.Debugf("Error while getting CPU ID from Socket ID: %v", err) + acc.AddError(fmt.Errorf("failed to get die IDs for package ID %v: %w", packageID, err)) return } - msrPlatformInfoMsr, err := p.msr.readSingleMsr(cpuID, msrPlatformInfoString) - if err != nil { - p.Log.Debugf("Error while reading %s: %v", msrPlatformInfoString, err) - return + for _, dieID := range dieIDs { + // Add initial uncore frequency limits. + p.addUncoreFrequencyInitialLimits(acc, packageID, dieID) + + // Add current uncore frequency limits and value. + p.addUncoreFrequencyCurrentValues(acc, packageID, dieID) } +} - // the value of the freq ratio is saved in bits 15 to 8. - // to get the freq -> ratio * busClock - cpuBaseFreq := float64((msrPlatformInfoMsr>>8)&0xFF) * p.cpuBusClockValue - if cpuBaseFreq == 0 { - p.Log.Debugf("Error while adding CPU base frequency, cpuBaseFreq is zero for the socket: %s", socketID) +// addUncoreFrequencyInitialLimits fetches uncore frequency initial limits for a given pair of package and die ID, +// and adds it to the accumulator. +func (p *PowerStat) addUncoreFrequencyInitialLimits(acc telegraf.Accumulator, packageID, dieID int) { + initMin, initMax, err := getUncoreFreqInitialLimits(p.fetcher, packageID, dieID) + if err == nil { + acc.AddGauge( + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "uncore_frequency_limit_mhz_min": round(initMin), + "uncore_frequency_limit_mhz_max": round(initMax), + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "type": "initial", + "die": strconv.Itoa(dieID), + }, + ) return } - tags := map[string]string{ - "package_id": socketID, - } - fields := map[string]interface{}{ - "cpu_base_frequency_mhz": uint64(cpuBaseFreq), + // Always add to the accumulator errors not related to module not initialized. + var moduleErr *ptel.ModuleNotInitializedError + if !errors.As(err, &moduleErr) { + acc.AddError(fmt.Errorf("failed to get initial uncore frequency limits for package ID %v and die ID %v: %w", packageID, dieID, err)) + return } - acc.AddGauge("powerstat_package", fields, tags) + + // Add only once module not initialized error related to uncore_frequency module and uncore frequency initial limits. + logErrorOnce( + acc, + p.logOnce, + fmt.Sprintf("%s_%s_initial", moduleErr.Name, packageUncoreFrequency), + fmt.Errorf("failed to get %q initial limits: %w", packageUncoreFrequency, moduleErr), + ) } -func (p *PowerStat) getBusClock(cpuID string) float64 { - cpuInfo, ok := p.cpuInfo[cpuID] - if !ok { - p.Log.Debugf("Cannot find cpuInfo for cpu: %s", cpuID) - return 0 +// addUncoreFrequencyCurrentValues fetches uncore frequency current limits and value for a given pair of package and die ID, +// and adds it to the accumulator. +func (p *PowerStat) addUncoreFrequencyCurrentValues(acc telegraf.Accumulator, packageID, dieID int) { + val, err := getUncoreFreqCurrentValues(p.fetcher, packageID, dieID) + if err == nil { + acc.AddGauge( + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "uncore_frequency_limit_mhz_min": round(val.currMin), + "uncore_frequency_limit_mhz_max": round(val.currMax), + "uncore_frequency_mhz_cur": uint64(val.curr), + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "type": "current", + "die": strconv.Itoa(dieID), + }, + ) + return } - model := cpuInfo.model - busClock100 := []int64{0x2A, 0x2D, 0x3A, 0x3E, 0x3C, 0x3F, 0x45, 0x46, 0x3D, 0x47, 0x4F, 0x56, 0x4E, 0x5E, 0x55, 0x8E, 0x9E, 0xA5, 0xA6, 0x66, 0x6A, 0x6C, - 0x7D, 0x7E, 0x9D, 0x8A, 0xA7, 0x8C, 0x8D, 0x8F, 0x97, 0x9A, 0xBE, 0xB7, 0xBA, 0xBF, 0xAC, 0xAA, 0x5C, 0x5F, 0x7A, 0x86, 0x96, 0x9C, 0x57, 0x85} - busClock133 := []int64{0x1E, 0x1F, 0x1A, 0x2E, 0x25, 0x2C, 0x2F, 0x4C} - busClockCalculate := []int64{0x37, 0x4D} - - if contains(convertIntegerArrayToStringArray(busClock100), model) { - return 100.0 - } else if contains(convertIntegerArrayToStringArray(busClock133), model) { - return 133.0 - } else if contains(convertIntegerArrayToStringArray(busClockCalculate), model) { - return p.getSilvermontBusClock(cpuID) + // Always add to the accumulator errors not related to module not initialized. + var moduleErr *ptel.ModuleNotInitializedError + if !errors.As(err, &moduleErr) { + acc.AddError(fmt.Errorf("failed to get current uncore frequency values for package ID %v and die ID %v: %w", packageID, dieID, err)) + return } - p.Log.Debugf("Couldn't find the freq for the model: %s", model) - return 0.0 + // Add only once module not initialized error related to uncore_frequency module and uncore frequency current value and limits. + logErrorOnce( + acc, + p.logOnce, + fmt.Sprintf("%s_%s_current", moduleErr.Name, packageUncoreFrequency), + fmt.Errorf("failed to get %q current value and limits: %w", packageUncoreFrequency, moduleErr), + ) } -func (p *PowerStat) getSilvermontBusClock(cpuID string) float64 { - silvermontFreqTable := []float64{83.3, 100.0, 133.3, 116.7, 80.0} - msr, err := p.msr.readSingleMsr(cpuID, msrFSBFreqString) +// getUncoreFreqInitialLimits returns the initial uncore frequency limits of a given package ID and die ID. +func getUncoreFreqInitialLimits(fetcher metricFetcher, packageID, dieID int) (initialMin float64, initialMax float64, err error) { + initialMin, err = fetcher.GetInitialUncoreFrequencyMin(packageID, dieID) if err != nil { - p.Log.Debugf("Error while reading %s: %v", msrFSBFreqString, err) - return 0.0 + return 0.0, 0.0, fmt.Errorf("failed to get initial minimum uncore frequency limit: %w", err) } - i := int(msr & 0xf) - if i >= len(silvermontFreqTable) { - p.Log.Debugf("Unknown msr value: %d, using default bus clock value: %f", i, silvermontFreqTable[3]) - //same behaviour as in turbostat - i = 3 + initialMax, err = fetcher.GetInitialUncoreFrequencyMax(packageID, dieID) + if err != nil { + return 0.0, 0.0, fmt.Errorf("failed to get initial maximum uncore frequency limit: %w", err) } - return silvermontFreqTable[i] + return initialMin, initialMax, nil } -func (p *PowerStat) parsePackageMetricsConfig() { - if p.PackageMetrics == nil { - // if Package Metric config is empty, use the default settings. - p.packageCurrentPowerConsumption = true - p.packageCurrentDramPowerConsumption = true - p.packageThermalDesignPower = true - return - } +type uncoreFreqValues struct { + currMin float64 + currMax float64 + curr float64 +} - if contains(p.PackageMetrics, packageTurboLimit) { - p.packageTurboLimit = true - } - if contains(p.PackageMetrics, packageCurrentPowerConsumption) { - p.packageCurrentPowerConsumption = true +// getUncoreFreqCurrentValues returns the current uncore frequency value as well as current min and max uncore frequency limits of a given +// package ID and die ID. +func getUncoreFreqCurrentValues(fetcher metricFetcher, packageID, dieID int) (uncoreFreqValues, error) { + currMin, err := fetcher.GetCustomizedUncoreFrequencyMin(packageID, dieID) + if err != nil { + return uncoreFreqValues{}, fmt.Errorf("failed to get current minimum uncore frequency limit: %w", err) } - if contains(p.PackageMetrics, packageCurrentDramPowerConsumption) { - p.packageCurrentDramPowerConsumption = true - } - if contains(p.PackageMetrics, packageThermalDesignPower) { - p.packageThermalDesignPower = true - } - if contains(p.PackageMetrics, packageUncoreFrequency) { - p.packageUncoreFrequency = true + currMax, err := fetcher.GetCustomizedUncoreFrequencyMax(packageID, dieID) + if err != nil { + return uncoreFreqValues{}, fmt.Errorf("failed to get current maximum uncore frequency limit: %w", err) } - if contains(p.PackageMetrics, packageCPUBaseFrequency) { - p.packageCPUBaseFrequency = true + + current, err := fetcher.GetCurrentUncoreFrequency(packageID, dieID) + if err != nil { + return uncoreFreqValues{}, fmt.Errorf("failed to get current uncore frequency: %w", err) } + + return uncoreFreqValues{ + currMin: currMin, + currMax: currMax, + curr: current, + }, nil } -func (p *PowerStat) parseCPUMetricsConfig() { - if len(p.CPUMetrics) == 0 { +// addMaxTurboFreqLimits fetches the max turbo frequency limits metric for a given package ID, and adds it to the accumulator. +func (p *PowerStat) addMaxTurboFreqLimits(acc telegraf.Accumulator, packageID int) { + var moduleErr *ptel.ModuleNotInitializedError + + turboFreqList, err := p.fetcher.GetMaxTurboFreqList(packageID) + if err != nil { + // Always add to the accumulator errors not related to module not initialized. + if !errors.As(err, &moduleErr) { + acc.AddError(fmt.Errorf("failed to get %q for package ID %v: %w", packageTurboLimit, packageID, err)) + return + } + + // Add only once module not initialized error related to msr module and max turbo frequency limits metric. + logErrorOnce( + acc, + p.logOnce, + fmt.Sprintf("%s_%s", moduleErr.Name, packageTurboLimit), + fmt.Errorf("failed to get %q: %w", packageTurboLimit, moduleErr), + ) return } - if contains(p.CPUMetrics, cpuFrequency) { - p.cpuFrequency = true - } + isHybrid := isHybridCPU(turboFreqList) + for _, v := range turboFreqList { + tags := map[string]string{ + "package_id": strconv.Itoa(packageID), + "active_cores": strconv.Itoa(int(v.ActiveCores)), + } + + if isHybrid { + var hybridTag string + if v.Secondary { + hybridTag = "secondary" + } else { + hybridTag = "primary" + } + tags["hybrid"] = hybridTag + } - if contains(p.CPUMetrics, cpuC0StateResidency) { - p.cpuC0StateResidency = true + acc.AddGauge( + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "max_turbo_frequency_mhz": v.Value, + }, + // tags + tags, + ) } +} - if contains(p.CPUMetrics, cpuC1StateResidency) { - p.cpuC1StateResidency = true +// isHybridCPU is a helper function that takes a slice of MaxTurboFreq structs and returns true if the CPU where these values belong to, +// is a hybrid CPU. Otherwise, returns false. +func isHybridCPU(turboFreqList []ptel.MaxTurboFreq) bool { + for _, v := range turboFreqList { + if v.Secondary { + return true + } } + return false +} - if contains(p.CPUMetrics, cpuC6StateResidency) { - p.cpuC6StateResidency = true +// disableUnsupportedMetrics checks whether the processor is capable of gathering specific metrics. +// In case it is not, disableUnsupportedMetrics will disable the option to gather those metrics. +// Error is returned if there is an issue with retrieving processor information. +func (p *PowerStat) disableUnsupportedMetrics() error { + cpus, err := cpuUtil.Info() + if err != nil { + return fmt.Errorf("error occurred while parsing CPU information: %w", err) + } + if len(cpus) == 0 { + return errors.New("no CPUs were found") } - if contains(p.CPUMetrics, cpuBusyCycles) { - p.cpuBusyCycles = true + // First CPU is sufficient for verification + firstCPU := cpus[0] + cpuModel, err := strconv.Atoi(firstCPU.Model) + if err != nil { + return fmt.Errorf("error occurred while parsing CPU model: %w", err) } - if contains(p.CPUMetrics, cpuBusyFrequency) { - p.cpuBusyFrequency = true + if err := ptel.CheckIfCPUC1StateResidencySupported(cpuModel); err != nil { + p.disableCPUMetric(cpuC1StateResidency) } - if contains(p.CPUMetrics, cpuTemperature) { - p.cpuTemperature = true + if err := ptel.CheckIfCPUC3StateResidencySupported(cpuModel); err != nil { + p.disableCPUMetric(cpuC3StateResidency) } -} -func (p *PowerStat) verifyProcessor() error { - allowedProcessorModelsForC1C6 := []int64{0x37, 0x4D, 0x5C, 0x5F, 0x7A, 0x4C, 0x86, 0x96, 0x9C, - 0x1A, 0x1E, 0x1F, 0x2E, 0x25, 0x2C, 0x2F, 0x2A, 0x2D, 0x3A, 0x3E, 0x4E, 0x5E, 0x55, 0x8E, - 0x9E, 0x6A, 0x6C, 0x7D, 0x7E, 0x9D, 0x3C, 0x3F, 0x45, 0x46, 0x3D, 0x47, 0x4F, 0x56, - 0x66, 0x57, 0x85, 0xA5, 0xA6, 0x8A, 0x8F, 0x8C, 0x8D, 0xA7, 0x97, 0x9A, 0xBE, 0xB7, 0xBA, 0xBF, 0xAC, 0xAA} - stats, err := p.fs.getCPUInfoStats() - if err != nil { - return err + if err := ptel.CheckIfCPUC6StateResidencySupported(cpuModel); err != nil { + p.disableCPUMetric(cpuC6StateResidency) } - p.cpuInfo = stats + if err := ptel.CheckIfCPUC7StateResidencySupported(cpuModel); err != nil { + p.disableCPUMetric(cpuC7StateResidency) + } - // First CPU is sufficient for verification - firstCPU := p.cpuInfo["0"] - if firstCPU == nil { - return fmt.Errorf("first core not found while parsing /proc/cpuinfo") + if err := ptel.CheckIfCPUTemperatureSupported(cpuModel); err != nil { + p.disableCPUMetric(cpuTemperature) } - if firstCPU.vendorID != "GenuineIntel" || firstCPU.cpuFamily != "6" { - return fmt.Errorf("Intel processor not found, vendorId: %s", firstCPU.vendorID) + if err := ptel.CheckIfCPUBaseFrequencySupported(cpuModel); err != nil { + p.disablePackageMetric(packageCPUBaseFrequency) } - if !contains(convertIntegerArrayToStringArray(allowedProcessorModelsForC1C6), firstCPU.model) { - p.cpuC1StateResidency = false - p.cpuC6StateResidency = false + allowedModelsForPerfRelated := []int{ + 0x8F, // INTEL_FAM6_SAPPHIRERAPIDS_X + 0xCF, // INTEL_FAM6_EMERALDRAPIDS_X + } + if !slices.Contains(allowedModelsForPerfRelated, cpuModel) { + p.disableCPUMetric(cpuC0SubstateC01Percent) + p.disableCPUMetric(cpuC0SubstateC02Percent) + p.disableCPUMetric(cpuC0SubstateC0WaitPercent) } - if !strings.Contains(firstCPU.flags, "msr") { - p.packageCPUBaseFrequency = false - p.cpuTemperature = false - p.cpuC6StateResidency = false - p.cpuC0StateResidency = false - p.cpuBusyCycles = false - p.cpuBusyFrequency = false - p.cpuC1StateResidency = false + if !slices.Contains(firstCPU.Flags, "msr") { + p.disableCPUMetric(cpuC0StateResidency) + p.disableCPUMetric(cpuC1StateResidency) + p.disableCPUMetric(cpuC3StateResidency) + p.disableCPUMetric(cpuC6StateResidency) + p.disableCPUMetric(cpuC7StateResidency) + p.disableCPUMetric(cpuBusyCycles) + p.disableCPUMetric(cpuBusyFrequency) + p.disableCPUMetric(cpuTemperature) + p.disablePackageMetric(packageCPUBaseFrequency) + p.disablePackageMetric(packageTurboLimit) } - if !strings.Contains(firstCPU.flags, "aperfmperf") { - p.cpuBusyCycles = false - p.cpuBusyFrequency = false - p.cpuC0StateResidency = false - p.cpuC1StateResidency = false + if !slices.Contains(firstCPU.Flags, "aperfmperf") { + p.disableCPUMetric(cpuC0StateResidency) + p.disableCPUMetric(cpuC1StateResidency) + p.disableCPUMetric(cpuBusyCycles) + p.disableCPUMetric(cpuBusyFrequency) } - if !strings.Contains(firstCPU.flags, "dts") { - p.cpuTemperature = false + if !slices.Contains(firstCPU.Flags, "dts") { + p.disableCPUMetric(cpuTemperature) } return nil } -func contains[T comparable](s []T, e T) bool { - for _, v := range s { - if v == e { - return true - } - } - return false -} +// disableCPUMetric removes given cpu metric from cpu_metrics. +func (p *PowerStat) disableCPUMetric(metricToDisable cpuMetricType) { + startLen := len(p.CPUMetrics) + p.CPUMetrics = slices.DeleteFunc(p.CPUMetrics, func(cpuMetric cpuMetricType) bool { + return cpuMetric == metricToDisable + }) -func (p *PowerStat) areCoreMetricsEnabled() bool { - return p.msr != nil && len(p.msr.getCPUCoresData()) > 0 + if len(p.CPUMetrics) < startLen { + p.Log.Warnf("%q is not supported by CPU, metric will not be gathered.", metricToDisable) + } } -func (p *PowerStat) areGlobalMetricsEnabled() bool { - return p.rapl != nil -} +// disablePackageMetric removes given package metric from package_metrics. +func (p *PowerStat) disablePackageMetric(metricToDisable packageMetricType) { + startLen := len(p.PackageMetrics) + p.PackageMetrics = slices.DeleteFunc(p.PackageMetrics, func(packageMetric packageMetricType) bool { + return packageMetric == metricToDisable + }) -func (p *PowerStat) GetCPUIDFromSocketID(socketID string) (string, error) { - for _, v := range p.cpuInfo { - if v.physicalID == socketID { - return v.cpuID, nil - } + if len(p.PackageMetrics) < startLen { + p.Log.Warnf("%q is not supported by CPU, metric will not be gathered.", metricToDisable) } - return "", fmt.Errorf("can't find cpuID for socketID: %s", socketID) } -// newPowerStat creates and returns PowerStat struct -func newPowerStat(fs fileService) *PowerStat { - p := &PowerStat{ - skipFirstIteration: true, - fs: fs, - logOnce: make(map[string]error), +// logErrorOnce takes an accumulator, a key string value error map, a key string and an error. It adds the error to the accumulator only if the +// key is not in the logOnceMap. Additionally, if the key is not in logOnceMap map, adds the key to it. This is to prevent excessive error messages +// from flooding the accumulator. +func logErrorOnce(acc telegraf.Accumulator, logOnceMap map[string]struct{}, key string, err error) { + if _, ok := logOnceMap[key]; !ok { + acc.AddError(err) + logOnceMap[key] = struct{}{} } - - return p } func init() { inputs.Add("intel_powerstat", func() telegraf.Input { - return newPowerStat(newFileService()) + return &PowerStat{} }) } diff --git a/plugins/inputs/intel_powerstat/intel_powerstat_notlinux.go b/plugins/inputs/intel_powerstat/intel_powerstat_notlinux.go index 950140c98df79..4a8c8d9bd6c3e 100644 --- a/plugins/inputs/intel_powerstat/intel_powerstat_notlinux.go +++ b/plugins/inputs/intel_powerstat/intel_powerstat_notlinux.go @@ -1,5 +1,5 @@ //go:generate ../../../tools/readme_config_includer/generator -//go:build !linux +//go:build !linux || !amd64 package intel_powerstat @@ -21,6 +21,7 @@ func (i *IntelPowerstat) Init() error { i.Log.Warn("current platform is not supported") return nil } + func (*IntelPowerstat) SampleConfig() string { return sampleConfig } func (*IntelPowerstat) Gather(_ telegraf.Accumulator) error { return nil } diff --git a/plugins/inputs/intel_powerstat/intel_powerstat_test.go b/plugins/inputs/intel_powerstat/intel_powerstat_test.go index 5c0f2b1b222f4..282e97250479a 100644 --- a/plugins/inputs/intel_powerstat/intel_powerstat_test.go +++ b/plugins/inputs/intel_powerstat/intel_powerstat_test.go @@ -1,851 +1,5204 @@ -//go:build linux +//go:build linux && amd64 package intel_powerstat import ( "errors" + "fmt" "strconv" - "sync" "testing" - "time" + ptel "github.com/intel/powertelemetry" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" - "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/testutil" ) -type MockServices struct { - fs *mockFileService - msr *mockMsrService - rapl *mockRaplService -} - -func TestInitPlugin(t *testing.T) { - cores := []string{"cpu0", "cpu1", "cpu2", "cpu3"} - power, mockServices := getPowerWithMockedServices() - - mockServices.fs.On("getCPUInfoStats", mock.Anything). - Return(nil, errors.New("error getting cpu stats")).Once() - require.Error(t, power.Init()) - - mockServices.fs.On("getCPUInfoStats", mock.Anything). - Return(make(map[string]*cpuInfo), nil).Once() - require.Error(t, power.Init()) - - mockServices.fs.On("getCPUInfoStats", mock.Anything). - Return(map[string]*cpuInfo{"0": { - vendorID: "GenuineIntel", - cpuFamily: "test", - }}, nil).Once() - require.Error(t, power.Init()) - - mockServices.fs.On("getStringsMatchingPatternOnPath", mock.Anything). - Return(cores, nil).Once(). - On("getCPUInfoStats", mock.Anything). - Return(map[string]*cpuInfo{"0": { - vendorID: "GenuineIntel", - cpuFamily: "6", - }}, nil) - // Verify MSR service initialization. - power.cpuFrequency = true - require.NoError(t, power.Init()) - mockServices.fs.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything) - require.Equal(t, len(cores), len(power.msr.getCPUCoresData())) - - mockServices.fs.On("getStringsMatchingPatternOnPath", mock.Anything). - Return(nil, errors.New("error during getStringsMatchingPatternOnPath")).Once() - - // In case of an error when fetching cpu cores plugin should proceed with execution. - require.NoError(t, power.Init()) - mockServices.fs.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything) - require.Empty(t, power.msr.getCPUCoresData()) -} - -func TestParseCPUMetricsConfig(t *testing.T) { - power, _ := getPowerWithMockedServices() - disableCoreMetrics(power) - - power.CPUMetrics = []string{ - "cpu_frequency", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature", - "cpu_busy_frequency", - } - power.parseCPUMetricsConfig() - verifyCoreMetrics(t, power, true) - disableCoreMetrics(power) - verifyCoreMetrics(t, power, false) - - power.CPUMetrics = []string{} - power.parseCPUMetricsConfig() - - power.CPUMetrics = []string{"cpu_c6_state_residency", "#@$sdkjdfsdf3@", "1pu_c1_state_residency"} - power.parseCPUMetricsConfig() - require.False(t, power.cpuC1StateResidency) - require.True(t, power.cpuC6StateResidency) - disableCoreMetrics(power) - verifyCoreMetrics(t, power, false) - - power.CPUMetrics = []string{"#@$sdkjdfsdf3@", "1pu_c1_state_residency", "123"} - power.parseCPUMetricsConfig() - verifyCoreMetrics(t, power, false) +type parsePackageMetricTestCase struct { + name string + metrics []packageMetricType + parsed []packageMetricType + err error } -func verifyCoreMetrics(t *testing.T, power *PowerStat, enabled bool) { - require.Equal(t, enabled, power.cpuFrequency) - require.Equal(t, enabled, power.cpuC1StateResidency) - require.Equal(t, enabled, power.cpuC6StateResidency) - require.Equal(t, enabled, power.cpuC0StateResidency) - require.Equal(t, enabled, power.cpuBusyCycles) - require.Equal(t, enabled, power.cpuBusyFrequency) - require.Equal(t, enabled, power.cpuTemperature) +type parseCPUMetricTestCase struct { + name string + metrics []cpuMetricType + parsed []cpuMetricType + err error } -func TestGather(t *testing.T) { - var acc testutil.Accumulator - packageIDs := []string{"0", "1"} - coreIDs := []string{"0", "1", "2", "3"} - socketCurrentEnergy := 13213852.2 - dramCurrentEnergy := 784552.0 - preparedCPUData := getPreparedCPUData(coreIDs) - raplDataMap := prepareRaplDataMap(packageIDs, socketCurrentEnergy, dramCurrentEnergy) - - power, mockServices := getPowerWithMockedServices() - prepareCPUInfo(power, coreIDs, packageIDs) - enableCoreMetrics(power) - power.skipFirstIteration = false - - mockServices.rapl.On("initializeRaplData", mock.Anything). - On("getRaplData").Return(raplDataMap). - On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)). - On("getConstraintMaxPowerWatts", mock.Anything).Return(546783852.3, nil) - mockServices.msr.On("getCPUCoresData").Return(preparedCPUData). - On("isMsrLoaded", mock.Anything).Return(true). - On("openAndReadMsr", mock.Anything).Return(nil). - On("retrieveCPUFrequencyForCore", mock.Anything).Return(1200000.2, nil) - - require.NoError(t, power.Gather(&acc)) - // Number of global metrics : 3 - // Number of per core metrics : 7 - require.Len(t, acc.GetTelegrafMetrics(), 3*len(packageIDs)+7*len(coreIDs)) -} - -func TestAddGlobalMetricsNegative(t *testing.T) { - var acc testutil.Accumulator - socketCurrentEnergy := 13213852.2 - dramCurrentEnergy := 784552.0 - raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy) - power, mockServices := getPowerWithMockedServices() - power.skipFirstIteration = false - mockServices.rapl.On("initializeRaplData", mock.Anything).Once(). - On("getRaplData").Return(raplDataMap).Once(). - On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Times(len(raplDataMap)) - - power.addGlobalMetrics(&acc) - require.Empty(t, acc.GetTelegrafMetrics()) - mockServices.rapl.AssertNumberOfCalls(t, "retrieveAndCalculateData", len(raplDataMap)) - - mockServices.rapl.On("initializeRaplData", mock.Anything).Once(). - On("getRaplData").Return(make(map[string]*raplData)).Once() - - power.addGlobalMetrics(&acc) - require.Empty(t, acc.GetTelegrafMetrics()) - mockServices.rapl.AssertNotCalled(t, "retrieveAndCalculateData") - - mockServices.rapl.On("initializeRaplData", mock.Anything).Once(). - On("getRaplData").Return(raplDataMap). - On("retrieveAndCalculateData", mock.Anything).Return(nil).Once(). - On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Once(). - On("getConstraintMaxPowerWatts", mock.Anything).Return(12313851.5, nil).Twice() - - power.addGlobalMetrics(&acc) - require.Len(t, acc.GetTelegrafMetrics(), 3) -} - -func TestAddGlobalMetricsPositive(t *testing.T) { - var acc testutil.Accumulator - socketCurrentEnergy := 3644574.4 - dramCurrentEnergy := 124234872.5 - raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy) - maxPower := 546783852.9 - power, mockServices := getPowerWithMockedServices() - power.skipFirstIteration = false - - mockServices.rapl.On("initializeRaplData", mock.Anything). - On("getRaplData").Return(raplDataMap). - On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)). - On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Twice(). - On("getCurrentDramPowerConsumption", mock.Anything).Return(dramCurrentEnergy) - - power.addGlobalMetrics(&acc) - require.Len(t, acc.GetTelegrafMetrics(), 6) - - expectedResults := getGlobalMetrics(maxPower, socketCurrentEnergy, dramCurrentEnergy) - for _, test := range expectedResults { - acc.AssertContainsTaggedFields(t, "powerstat_package", test.fields, test.tags) +func TestParsePackageMetrics(t *testing.T) { + testCases := []parsePackageMetricTestCase{ + { + name: "NilSlice", + metrics: nil, + parsed: []packageMetricType{ + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + }, + }, + { + name: "EmptySlice", + metrics: []packageMetricType{}, + parsed: []packageMetricType{}, + }, + { + name: "HasDuplicates", + metrics: []packageMetricType{ + packageCurrentPowerConsumption, + packageThermalDesignPower, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, // duplicate + }, + err: errors.New("package metrics contains duplicates"), + }, } -} -func TestAddMetricsForSingleCoreNegative(t *testing.T) { - var wg sync.WaitGroup - var acc testutil.Accumulator - core := "0" - power, mockServices := getPowerWithMockedServices() - - mockServices.msr.On("openAndReadMsr", core).Return(errors.New("error reading MSR file")).Once() - - // Skip generating metric for CPU frequency. - power.cpuFrequency = false - - wg.Add(1) - power.addMetricsForSingleCore(core, &acc, &wg) - wg.Wait() - - require.Empty(t, acc.GetTelegrafMetrics()) -} - -func TestAddCPUFrequencyMetric(t *testing.T) { - var acc testutil.Accumulator - cpuID := "1" - coreID := "3" - packageID := "0" - frequency := 1200000.2 - power, mockServices := getPowerWithMockedServices() - prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) - - mockServices.msr.On("retrieveCPUFrequencyForCore", mock.Anything). - Return(float64(0), errors.New("error on reading file")).Once() - - power.addCPUFrequencyMetric(cpuID, &acc) - require.Empty(t, acc.GetTelegrafMetrics()) - - mockServices.msr.On("retrieveCPUFrequencyForCore", mock.Anything).Return(frequency, nil).Once() + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := &PowerStat{ + PackageMetrics: tc.metrics, + } - power.addCPUFrequencyMetric(cpuID, &acc) - require.Len(t, acc.GetTelegrafMetrics(), 1) + err := p.parsePackageMetrics() - expectedFrequency := roundFloatToNearestTwoDecimalPlaces(frequency) - expectedMetric := getPowerCoreMetric("cpu_frequency_mhz", expectedFrequency, coreID, packageID, cpuID) - acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + require.Equal(t, tc.parsed, p.PackageMetrics) + } + }) + } } -func TestReadUncoreFreq(t *testing.T) { - var acc testutil.Accumulator - cpuID := "0" - coreID := "0" - packageID := "0" - die := "0" - power, mockServices := getPowerWithMockedServices() - prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) - preparedData := getPreparedCPUData([]string{cpuID}) - - mockServices.msr.On("getCPUCoresData").Return(preparedData) - - mockServices.msr.On("isMsrLoaded").Return(true) - - mockServices.msr.On("readSingleMsr", "0", msrUncorePerfStatusString).Return(uint64(10), nil) - - mockServices.msr.On("retrieveUncoreFrequency", "0", "initial", "min", "0"). - Return(float64(500), nil) - mockServices.msr.On("retrieveUncoreFrequency", "0", "initial", "max", "0"). - Return(float64(1200), nil) - mockServices.msr.On("retrieveUncoreFrequency", "0", "current", "min", "0"). - Return(float64(600), nil) - mockServices.msr.On("retrieveUncoreFrequency", "0", "current", "max", "0"). - Return(float64(1100), nil) - - power.readUncoreFreq("current", packageID, die, &acc) - power.readUncoreFreq("initial", packageID, die, &acc) - - require.Len(t, acc.GetTelegrafMetrics(), 2) - - expectedMetric := getPowerUncoreFreqMetric("initial", float64(500), float64(1200), nil, packageID, die) - acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags) - - expectedMetric = getPowerUncoreFreqMetric("current", float64(600), float64(1100), uint64(1000), packageID, die) - acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags) -} +func TestParseCPUMetrics(t *testing.T) { + testCases := []parseCPUMetricTestCase{ + { + name: "NilSlice", + metrics: nil, + parsed: nil, + }, + { + name: "EmptySlice", + metrics: []cpuMetricType{}, + parsed: []cpuMetricType{}, + }, + { + name: "HasDuplicates", + metrics: []cpuMetricType{ + cpuC0StateResidency, + cpuC1StateResidency, + cpuTemperature, + cpuC0StateResidency, // duplicate + }, + err: errors.New("cpu metrics contains duplicates"), + }, + } -func TestAddCoreCPUTemperatureMetric(t *testing.T) { - var acc testutil.Accumulator - cpuID := "0" - coreID := "2" - packageID := "1" - power, mockServices := getPowerWithMockedServices() - preparedData := getPreparedCPUData([]string{cpuID}) - expectedTemp := preparedData[cpuID].throttleTemp - preparedData[cpuID].temp - prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := &PowerStat{ + CPUMetrics: tc.metrics, + } - mockServices.msr.On("getCPUCoresData").Return(preparedData).Once() - power.addCPUTemperatureMetric(cpuID, &acc) - require.Len(t, acc.GetTelegrafMetrics(), 1) + err := p.parseCPUMetrics() - expectedMetric := getPowerCoreMetric("cpu_temperature_celsius", expectedTemp, coreID, packageID, cpuID) - acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + require.Equal(t, tc.parsed, p.CPUMetrics) + } + }) + } } -func TestAddC6StateResidencyMetric(t *testing.T) { - var acc testutil.Accumulator - cpuID := "0" - coreID := "2" - packageID := "1" - power, mockServices := getPowerWithMockedServices() - prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) - preparedData := getPreparedCPUData([]string{cpuID}) - expectedC6 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * - float64(preparedData[cpuID].c6Delta) / float64(preparedData[cpuID].timeStampCounterDelta)) - - mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice() - power.addCPUC6StateResidencyMetric(cpuID, &acc) - require.Len(t, acc.GetTelegrafMetrics(), 1) - - expectedMetric := getPowerCoreMetric("cpu_c6_state_residency_percent", expectedC6, coreID, packageID, cpuID) - acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags) - - acc.ClearMetrics() - preparedData[cpuID].timeStampCounterDelta = 0 - - power.addCPUC6StateResidencyMetric(cpuID, &acc) - require.Empty(t, acc.GetTelegrafMetrics()) -} +func TestParseCPUTimeRelatedMsrMetrics(t *testing.T) { + testCases := []parseCPUMetricTestCase{ + { + name: "EmptySlice", + metrics: []cpuMetricType{}, + parsed: []cpuMetricType{}, + }, + { + name: "NotFound", + metrics: []cpuMetricType{ + // Metric not relying on MSR. + cpuFrequency, -func TestAddC0StateResidencyMetric(t *testing.T) { - var acc testutil.Accumulator - cpuID := "0" - coreID := "2" - packageID := "1" - power, mockServices := getPowerWithMockedServices() - prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) - preparedData := getPreparedCPUData([]string{cpuID}) - expectedBusyCycles := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(preparedData[cpuID].mperfDelta) / - float64(preparedData[cpuID].timeStampCounterDelta)) - - mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice() - power.cpuBusyCycles, power.cpuC0StateResidency = true, true - power.addCPUC0StateResidencyMetric(cpuID, &acc) - require.Len(t, acc.GetTelegrafMetrics(), 2) + // Metric relying on single MSR read. + cpuTemperature, - expectedMetric := getPowerCoreMetric("cpu_c0_state_residency_percent", expectedBusyCycles, coreID, packageID, cpuID) - acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags) + // Metrics relying on perf events. + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + cpuC0SubstateC0WaitPercent, + }, + parsed: []cpuMetricType{}, + }, + { + name: "Found", + metrics: []cpuMetricType{ + // Metric not relying on MSR. + cpuFrequency, - // Deprecated - expectedMetric = getPowerCoreMetric("cpu_busy_cycles_percent", expectedBusyCycles, coreID, packageID, cpuID) - acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags) + // Metric relying on single MSR read. + cpuTemperature, - acc.ClearMetrics() - preparedData[cpuID].timeStampCounterDelta = 0 - power.addCPUC0StateResidencyMetric(cpuID, &acc) - require.Empty(t, acc.GetTelegrafMetrics()) -} + // Metrics relying on time-related MSR offset reads. + cpuC0StateResidency, + cpuC1StateResidency, + cpuC3StateResidency, + cpuC6StateResidency, + cpuC7StateResidency, + cpuBusyCycles, + cpuBusyFrequency, -func TestAddProcessorBusyFrequencyMetric(t *testing.T) { - var acc testutil.Accumulator - cpuID := "0" - coreID := "2" - packageID := "1" - power, mockServices := getPowerWithMockedServices() - prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) - preparedData := getPreparedCPUData([]string{cpuID}) - power.skipFirstIteration = false + // Metrics relying on perf events. + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + }, + parsed: []cpuMetricType{ + cpuC0StateResidency, + cpuC1StateResidency, + cpuC3StateResidency, + cpuC6StateResidency, + cpuC7StateResidency, + cpuBusyCycles, + cpuBusyFrequency, + }, + }, + } - mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice() - power.addCPUBusyFrequencyMetric(cpuID, &acc) - require.Len(t, acc.GetTelegrafMetrics(), 1) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := &PowerStat{ + CPUMetrics: tc.metrics, + } - acc.ClearMetrics() - preparedData[cpuID].mperfDelta = 0 - power.addCPUBusyFrequencyMetric(cpuID, &acc) - require.Empty(t, acc.GetTelegrafMetrics()) + p.parseCPUTimeRelatedMsrMetrics() + require.Equal(t, tc.parsed, p.parsedCPUTimedMsrMetrics) + }) + } } -func TestAddC1StateResidencyMetric(t *testing.T) { - var acc testutil.Accumulator - cpuID := "0" - coreID := "2" - packageID := "1" - power, mockServices := getPowerWithMockedServices() - prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) - preparedData := getPreparedCPUData([]string{cpuID}) - c1 := preparedData[cpuID].timeStampCounterDelta - preparedData[cpuID].mperfDelta - preparedData[cpuID].c3Delta - - preparedData[cpuID].c6Delta - preparedData[cpuID].c7Delta - expectedC1 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(c1) / float64(preparedData[cpuID].timeStampCounterDelta)) - - mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice() - - power.addCPUC1StateResidencyMetric(cpuID, &acc) - require.Len(t, acc.GetTelegrafMetrics(), 1) - - expectedMetric := getPowerCoreMetric("cpu_c1_state_residency_percent", expectedC1, coreID, packageID, cpuID) - acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags) +func TestParseCPUPerfMetrics(t *testing.T) { + testCases := []parseCPUMetricTestCase{ + { + name: "EmptySlice", + metrics: []cpuMetricType{}, + parsed: []cpuMetricType{}, + }, + { + name: "NotFound", + metrics: []cpuMetricType{ + // Metric not relying on MSR. + cpuFrequency, - acc.ClearMetrics() - preparedData[cpuID].timeStampCounterDelta = 0 - power.addCPUC1StateResidencyMetric(cpuID, &acc) - require.Empty(t, acc.GetTelegrafMetrics()) -} + // Metric relying on single MSR read. + cpuTemperature, -func TestAddThermalDesignPowerMetric(t *testing.T) { - var acc testutil.Accumulator - sockets := []string{"0"} - maxPower := 195720672.1 - power, mockServices := getPowerWithMockedServices() + // Metrics relying on time-related MSR offset reads. + cpuC3StateResidency, + cpuC6StateResidency, + cpuBusyFrequency, + }, + parsed: []cpuMetricType{}, + }, + { + name: "Found", + metrics: []cpuMetricType{ + // Metric not relying on MSR. + cpuFrequency, - mockServices.rapl.On("getConstraintMaxPowerWatts", mock.Anything). - Return(float64(0), errors.New("getConstraintMaxPowerWatts error")).Once(). - On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Once() + // Metric relying on single MSR read. + cpuTemperature, - power.addThermalDesignPowerMetric(sockets[0], &acc) - require.Empty(t, acc.GetTelegrafMetrics()) + // Metrics relying on perf events. + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + cpuC0SubstateC0WaitPercent, - power.addThermalDesignPowerMetric(sockets[0], &acc) - require.Len(t, acc.GetTelegrafMetrics(), 1) - - expectedTDP := roundFloatToNearestTwoDecimalPlaces(maxPower) - expectedMetric := getPowerGlobalMetric("thermal_design_power_watts", expectedTDP, sockets[0]) - acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags) -} - -func TestCalculateTurboRatioGroup(t *testing.T) { - coreCounts := uint64(0x0807060504030201) - msr := uint64(0x0807060504030201) - turboRatioLimitGroups := make(map[int]uint64) - - calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups) - require.Len(t, turboRatioLimitGroups, 8) - require.Equal(t, uint64(100), turboRatioLimitGroups[1]) - require.Equal(t, uint64(200), turboRatioLimitGroups[2]) - require.Equal(t, uint64(300), turboRatioLimitGroups[3]) - require.Equal(t, uint64(400), turboRatioLimitGroups[4]) - require.Equal(t, uint64(500), turboRatioLimitGroups[5]) - require.Equal(t, uint64(600), turboRatioLimitGroups[6]) - require.Equal(t, uint64(700), turboRatioLimitGroups[7]) - require.Equal(t, uint64(800), turboRatioLimitGroups[8]) - - coreCounts = uint64(0x100e0c0a08060402) - calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups) - require.Len(t, turboRatioLimitGroups, 16) - require.Equal(t, uint64(100), turboRatioLimitGroups[1]) - require.Equal(t, uint64(100), turboRatioLimitGroups[2]) - require.Equal(t, uint64(200), turboRatioLimitGroups[3]) - require.Equal(t, uint64(200), turboRatioLimitGroups[4]) - require.Equal(t, uint64(300), turboRatioLimitGroups[5]) - require.Equal(t, uint64(300), turboRatioLimitGroups[6]) - require.Equal(t, uint64(400), turboRatioLimitGroups[7]) - require.Equal(t, uint64(400), turboRatioLimitGroups[8]) - require.Equal(t, uint64(500), turboRatioLimitGroups[9]) - require.Equal(t, uint64(500), turboRatioLimitGroups[10]) - require.Equal(t, uint64(600), turboRatioLimitGroups[11]) - require.Equal(t, uint64(600), turboRatioLimitGroups[12]) - require.Equal(t, uint64(700), turboRatioLimitGroups[13]) - require.Equal(t, uint64(700), turboRatioLimitGroups[14]) - require.Equal(t, uint64(800), turboRatioLimitGroups[15]) - require.Equal(t, uint64(800), turboRatioLimitGroups[16]) - coreCounts = uint64(0x1211) - msr = uint64(0xfffe) - calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups) - require.Len(t, turboRatioLimitGroups, 18) - require.Equal(t, uint64(25400), turboRatioLimitGroups[17]) - require.Equal(t, uint64(25500), turboRatioLimitGroups[18]) - - coreCounts = uint64(0x1201) - msr = uint64(0x0202) - calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups) - require.Len(t, turboRatioLimitGroups, 18) - require.Equal(t, uint64(200), turboRatioLimitGroups[1]) - require.Equal(t, uint64(200), turboRatioLimitGroups[2]) - require.Equal(t, uint64(200), turboRatioLimitGroups[3]) - require.Equal(t, uint64(200), turboRatioLimitGroups[4]) - require.Equal(t, uint64(200), turboRatioLimitGroups[5]) - require.Equal(t, uint64(200), turboRatioLimitGroups[6]) - require.Equal(t, uint64(200), turboRatioLimitGroups[7]) - require.Equal(t, uint64(200), turboRatioLimitGroups[8]) - require.Equal(t, uint64(200), turboRatioLimitGroups[9]) - require.Equal(t, uint64(200), turboRatioLimitGroups[10]) - require.Equal(t, uint64(200), turboRatioLimitGroups[11]) - require.Equal(t, uint64(200), turboRatioLimitGroups[12]) - require.Equal(t, uint64(200), turboRatioLimitGroups[13]) - require.Equal(t, uint64(200), turboRatioLimitGroups[14]) - require.Equal(t, uint64(200), turboRatioLimitGroups[15]) - require.Equal(t, uint64(200), turboRatioLimitGroups[16]) - require.Equal(t, uint64(200), turboRatioLimitGroups[17]) - require.Equal(t, uint64(200), turboRatioLimitGroups[18]) - - coreCounts = uint64(0x1211) - msr = uint64(0xfffe) - turboRatioLimitGroups = make(map[int]uint64) - calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups) - require.Len(t, turboRatioLimitGroups, 2) - require.Equal(t, uint64(25400), turboRatioLimitGroups[17]) - require.Equal(t, uint64(25500), turboRatioLimitGroups[18]) -} - -func getPreparedCPUData(cores []string) map[string]*msrData { - msrDataMap := make(map[string]*msrData) - - for _, core := range cores { - msrDataMap[core] = &msrData{ - mperf: 43079, - aperf: 82001, - timeStampCounter: 15514, - c3: 52829, - c6: 86930, - c7: 25340, - throttleTemp: 88150, - temp: 40827, - mperfDelta: 23515, - aperfDelta: 33866, - timeStampCounterDelta: 13686000, - c3Delta: 20003, - c6Delta: 44518, - c7Delta: 20979, - } + // Metrics relying on time-related MSR offset reads. + cpuC3StateResidency, + cpuC6StateResidency, + cpuBusyFrequency, + }, + parsed: []cpuMetricType{ + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + cpuC0SubstateC0WaitPercent, + }, + }, } - return msrDataMap -} + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := &PowerStat{ + CPUMetrics: tc.metrics, + } -func getGlobalMetrics(maxPower float64, socketCurrentEnergy float64, dramCurrentEnergy float64) []struct { - fields map[string]interface{} - tags map[string]string -} { - return []struct { - fields map[string]interface{} - tags map[string]string - }{ - getPowerGlobalMetric("thermal_design_power_watts", roundFloatToNearestTwoDecimalPlaces(maxPower), "0"), - getPowerGlobalMetric("thermal_design_power_watts", roundFloatToNearestTwoDecimalPlaces(maxPower), "1"), - getPowerGlobalMetric("current_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(socketCurrentEnergy), "0"), - getPowerGlobalMetric("current_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(socketCurrentEnergy), "1"), - getPowerGlobalMetric("current_dram_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(dramCurrentEnergy), "0"), - getPowerGlobalMetric("current_dram_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(dramCurrentEnergy), "1"), + p.parseCPUPerfMetrics() + require.Equal(t, tc.parsed, p.parsedCPUPerfMetrics) + }) } } -func getPowerCoreMetric(name string, value interface{}, coreID string, packageID string, cpuID string) struct { - fields map[string]interface{} - tags map[string]string -} { - return getPowerMetric(name, value, map[string]string{"package_id": packageID, "core_id": coreID, "cpu_id": cpuID}) -} - -func getPowerGlobalMetric(name string, value interface{}, socketID string) struct { - fields map[string]interface{} - tags map[string]string -} { - return getPowerMetric(name, value, map[string]string{"package_id": socketID}) -} +func TestParsePackageRaplMetrics(t *testing.T) { + testCases := []parsePackageMetricTestCase{ + { + name: "EmptySlice", + metrics: []packageMetricType{}, + parsed: []packageMetricType{}, + }, + { + name: "NotFound", + metrics: []packageMetricType{ + // Metrics not relying on rapl. + packageTurboLimit, + packageCPUBaseFrequency, + packageUncoreFrequency, + }, + parsed: []packageMetricType{}, + }, + { + name: "Found", + metrics: []packageMetricType{ + // Metrics not relying on rapl. + packageTurboLimit, + packageCPUBaseFrequency, + packageUncoreFrequency, -func getPowerUncoreFreqMetric(typeFreq string, limitMin interface{}, limitMax interface{}, current interface{}, socketID string, die string) struct { - fields map[string]interface{} - tags map[string]string -} { - var ret struct { - fields map[string]interface{} - tags map[string]string + // Metrics relying on rapl. + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + }, + parsed: []packageMetricType{ + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + }, + }, } - ret.tags = make(map[string]string) - ret.fields = make(map[string]interface{}) - ret.tags["package_id"] = socketID - ret.tags["die"] = die - ret.tags["type"] = typeFreq - ret.fields["uncore_frequency_limit_mhz_min"] = limitMin - ret.fields["uncore_frequency_limit_mhz_max"] = limitMax - if typeFreq == "current" { - ret.fields["uncore_frequency_mhz_cur"] = current + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := &PowerStat{ + PackageMetrics: tc.metrics, + } + + p.parsePackageRaplMetrics() + require.Equal(t, tc.parsed, p.parsedPackageRaplMetrics) + }) } - return ret } -func getPowerMetric(name string, value interface{}, tags map[string]string) struct { - fields map[string]interface{} - tags map[string]string -} { - return struct { - fields map[string]interface{} - tags map[string]string - }{ - map[string]interface{}{ - name: value, +func TestParsePackageMsrMetrics(t *testing.T) { + testCases := []parsePackageMetricTestCase{ + { + name: "EmptySlice", + metrics: []packageMetricType{}, + parsed: []packageMetricType{}, }, - tags, - } -} + { + name: "NotFound", + metrics: []packageMetricType{ + // Metrics not relying on msr. + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + }, + parsed: []packageMetricType{}, + }, + { + name: "Found", + metrics: []packageMetricType{ + // Metrics not relying on msr. + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, -func prepareCPUInfoForSingleCPU(power *PowerStat, cpuID string, coreID string, packageID string) { - power.cpuInfo = make(map[string]*cpuInfo) - power.cpuInfo[cpuID] = &cpuInfo{ - physicalID: packageID, - coreID: coreID, - cpuID: cpuID, + // Metrics relying uniquely on msr. + packageTurboLimit, + packageCPUBaseFrequency, + }, + parsed: []packageMetricType{ + packageTurboLimit, + packageCPUBaseFrequency, + }, + }, } -} -func prepareCPUInfo(power *PowerStat, coreIDs []string, packageIDs []string) { - power.cpuInfo = make(map[string]*cpuInfo) - currentCPU := 0 - for _, packageID := range packageIDs { - for _, coreID := range coreIDs { - cpuID := strconv.Itoa(currentCPU) - power.cpuInfo[cpuID] = &cpuInfo{ - physicalID: packageID, - cpuID: cpuID, - coreID: coreID, + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := &PowerStat{ + PackageMetrics: tc.metrics, } - currentCPU++ - } - } -} - -func enableCoreMetrics(power *PowerStat) { - power.cpuC0StateResidency = true - power.cpuC1StateResidency = true - power.cpuC6StateResidency = true - power.cpuTemperature = true - power.cpuBusyFrequency = true - power.cpuFrequency = true - power.cpuBusyCycles = true -} - -func disableCoreMetrics(power *PowerStat) { - power.cpuC0StateResidency = false - power.cpuC1StateResidency = false - power.cpuC6StateResidency = false - power.cpuBusyCycles = false - power.cpuTemperature = false - power.cpuBusyFrequency = false - power.cpuFrequency = false -} -func prepareRaplDataMap(socketIDs []string, socketCurrentEnergy float64, dramCurrentEnergy float64) map[string]*raplData { - raplDataMap := make(map[string]*raplData, len(socketIDs)) - for _, socketID := range socketIDs { - raplDataMap[socketID] = &raplData{ - socketCurrentEnergy: socketCurrentEnergy, - dramCurrentEnergy: dramCurrentEnergy, - } + p.parsePackageMsrMetrics() + require.Equal(t, tc.parsed, p.parsedPackageMsrMetrics) + }) } +} - return raplDataMap -} - -func getPowerWithMockedServices() (*PowerStat, *MockServices) { - var mockServices MockServices - mockServices.fs = &mockFileService{} - mockServices.msr = &mockMsrService{} - mockServices.rapl = &mockRaplService{} - p := newPowerStat(mockServices.fs) - p.Log = testutil.Logger{Name: "PowerPluginTest"} - p.rapl = mockServices.rapl - p.msr = mockServices.msr - p.packageCurrentPowerConsumption = true - p.packageCurrentDramPowerConsumption = true - p.packageThermalDesignPower = true - - return p, &mockServices -} - -func TestGetBusClock(t *testing.T) { - tests := []struct { - name string - modelCPU uint64 - socketID string - msrFSBFreqValue uint64 - readSingleMsrErrFSB error - cpuBusClockValue float64 +func TestParseCoreRange(t *testing.T) { + testCases := []struct { + name string + coreRange string + cores []int + err error }{ { - name: "Error_withUnknownCPUmodel", - socketID: "0", - modelCPU: 0xFF, - cpuBusClockValue: 0, - }, - { - name: "OK_withFBS100", - socketID: "0", - modelCPU: 106, - msrFSBFreqValue: 1, - cpuBusClockValue: 100.0, + name: "InvalidFormat", + coreRange: "1,3", + cores: nil, + err: errors.New("invalid core range format"), }, { - name: "OK_withFBS133", - socketID: "0", - modelCPU: 0x1F, - cpuBusClockValue: 133, + name: "LowerBoundNonNumeric", + coreRange: "a-10", + cores: nil, + err: errors.New("failed to parse low bounds' core range"), }, { - name: "Error_withFBSCalculated", - socketID: "0", - modelCPU: 0x37, - msrFSBFreqValue: 0, - readSingleMsrErrFSB: errors.New("something is wrong"), + name: "MissingLowerBound", + coreRange: "-10", + cores: nil, + err: errors.New("failed to parse low bounds' core range"), }, { - name: "OK_withFBSCalculated83.3", - socketID: "0", - modelCPU: 0x37, - msrFSBFreqValue: 0, - cpuBusClockValue: 83.3, + name: "HigherBoundNonNumeric", + coreRange: "0-a", + cores: nil, + err: errors.New("failed to parse high bounds' core range"), }, { - name: "OK_withFBSCalculated100", - socketID: "0", - modelCPU: 0x37, - msrFSBFreqValue: 1, - cpuBusClockValue: 100, + name: "MissingHigherBound", + coreRange: "0-", + cores: nil, + err: errors.New("failed to parse high bounds' core range"), }, { - name: "OK_withFBSCalculated133.3", - socketID: "0", - modelCPU: 0x37, - msrFSBFreqValue: 2, - cpuBusClockValue: 133.3, + name: "InvalidBounds", + coreRange: "10-1", + cores: nil, + err: errors.New("high bound of core range cannot be less than low bound"), }, { - name: "OK_withFBSCalculated116.7", - socketID: "0", - modelCPU: 0x37, - msrFSBFreqValue: 3, - cpuBusClockValue: 116.7, + name: "SingleCore", + coreRange: "1-1", + cores: []int{1}, }, { - name: "OK_withFBSCalculated80", - socketID: "0", - modelCPU: 0x37, - msrFSBFreqValue: 4, - cpuBusClockValue: 80, - }, - { - name: "OK_withFBSCalculatedUnknownFSBFreq", - socketID: "0", - modelCPU: 0x37, - msrFSBFreqValue: 5, - cpuBusClockValue: 116.7, + name: "CoreRange", + coreRange: "5-10", + cores: []int{5, 6, 7, 8, 9, 10}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - p, mockServices := getPowerWithMockedServices() - busClockCalculate := []uint64{0x37, 0x4D} - p.cpuInfo = map[string]*cpuInfo{ - tt.socketID: {cpuID: tt.socketID, physicalID: tt.socketID, model: strconv.FormatUint(tt.modelCPU, 10)}, - } - if contains(busClockCalculate, tt.modelCPU) { - mockServices.msr.On("readSingleMsr", mock.Anything, msrFSBFreqString).Return(tt.msrFSBFreqValue, tt.readSingleMsrErrFSB) - } - defer mockServices.msr.AssertExpectations(t) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + coresOut, err := parseCoreRange(tc.coreRange) - value := p.getBusClock(tt.socketID) - require.Equal(t, tt.cpuBusClockValue, value) + require.Equal(t, tc.cores, coresOut) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } }) } } -func TestFillCPUBusClock(t *testing.T) { - tests := []struct { - name string - modelCPU uint64 - busClockValue float64 - packageCPUBaseFrequencySet bool +func TestParseGroupCores(t *testing.T) { + testCases := []struct { + name string + coreGroup string + cores []int + err error }{ { - name: "NotSet_0", - modelCPU: 0xFF, - busClockValue: 0, + name: "FailedToParseCoreRange", + coreGroup: "1-a,7,9,11", + cores: nil, + err: errors.New("failed to parse core range \"1-a\""), + }, + { + name: "FailedToParseSingleCore", + coreGroup: "1-5,7,b,11", + cores: nil, + err: errors.New("failed to parse single core"), }, { - name: "Set_100", - modelCPU: 0x2A, - busClockValue: 100, - packageCPUBaseFrequencySet: true, + name: "Ok", + coreGroup: "1-5,7,9,11", + cores: []int{1, 2, 3, 4, 5, 7, 9, 11}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - p, _ := getPowerWithMockedServices() - p.packageCPUBaseFrequency = true - p.cpuInfo = map[string]*cpuInfo{ - "0": {cpuID: "0", physicalID: "0", model: strconv.FormatUint(tt.modelCPU, 10)}, - } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + coresOut, err := parseGroupCores(tc.coreGroup) - p.fillCPUBusClock() - require.Equal(t, tt.busClockValue, p.cpuBusClockValue) - require.Equal(t, tt.packageCPUBaseFrequencySet, p.packageCPUBaseFrequency) + require.Equal(t, tc.cores, coresOut) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } }) } } -func TestAddCPUBaseFreq(t *testing.T) { - tests := []struct { - name string - socketID string - readSingleMsrErrRatio error - msrPlatformInfoValue uint64 - setupPowerstat func(t *testing.T) - clockBusValue float64 - nonTurboRatio float64 - metricExpected bool +func TestHasDuplicate(t *testing.T) { + t.Run("Int", func(t *testing.T) { + t.Run("False", func(t *testing.T) { + nums := []int{0, 1, 2, 3} + require.False(t, hasDuplicate(nums)) + }) + + t.Run("True", func(t *testing.T) { + nums := []uint32{0, 1, 2, 3, 4, 5, 6, 1} + require.True(t, hasDuplicate(nums)) + }) + }) + + t.Run("String", func(t *testing.T) { + t.Run("False", func(t *testing.T) { + strs := []string{"1", "2", "3", "4"} + require.False(t, hasDuplicate(strs)) + }) + + t.Run("True", func(t *testing.T) { + strs := []string{"1", "2", "3", "1"} + require.True(t, hasDuplicate(strs)) + }) + }) +} + +func TestParseCores(t *testing.T) { + testCases := []struct { + name string + coreGroups []string + cores []int + err error }{ { - name: "Error_reading_msr", - socketID: "0", - clockBusValue: 100, - readSingleMsrErrRatio: errors.New("can't read msr"), - metricExpected: false, + name: "InvalidCoreGroup", + coreGroups: []string{"1-4,11", "10-b"}, + cores: nil, + err: errors.New("failed to parse core group"), + }, + { + name: "FoundDuplicates", + coreGroups: []string{"1-4,11", "10-12"}, + cores: nil, + err: errors.New("core values cannot be duplicated"), }, { - name: "NoMetric_Ratio_is_0", - socketID: "0", - msrPlatformInfoValue: 0x8008082FF2810000, - clockBusValue: 100, - nonTurboRatio: 0, - metricExpected: false, + name: "CoresIsNil", + coreGroups: nil, + cores: []int{}, }, { - name: "OK_Ratio_is_24", - socketID: "0", - msrPlatformInfoValue: 0x8008082FF2811800, - clockBusValue: 100, - nonTurboRatio: 24, - metricExpected: true, + name: "CoresIsEmpty", + coreGroups: []string{}, + cores: []int{}, + }, + { + name: "Ok", + coreGroups: []string{"1-4,6", "8", "10-12", "15,20"}, + cores: []int{1, 2, 3, 4, 6, 8, 10, 11, 12, 15, 20}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - var acc testutil.Accumulator - p, mockServices := getPowerWithMockedServices() - - p.cpuInfo = map[string]*cpuInfo{ - tt.socketID: {cpuID: tt.socketID, physicalID: tt.socketID}, - } - p.cpuBusClockValue = tt.clockBusValue - mockServices.msr.On("readSingleMsr", mock.Anything, msrPlatformInfoString).Return(tt.msrPlatformInfoValue, tt.readSingleMsrErrRatio) - defer mockServices.msr.AssertExpectations(t) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + coresOut, err := parseCores(tc.coreGroups) - p.addCPUBaseFreq(tt.socketID, &acc) - actual := acc.GetTelegrafMetrics() - if !tt.metricExpected { - require.Empty(t, actual) - return + require.Equal(t, tc.cores, coresOut) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) } - - require.Len(t, actual, 1) - expected := []telegraf.Metric{ - testutil.MustMetric( - "powerstat_package", - map[string]string{ - "package_id": tt.socketID, - }, - map[string]interface{}{ - "cpu_base_frequency_mhz": uint64(tt.nonTurboRatio * tt.clockBusValue), - }, - time.Unix(0, 0), - telegraf.Gauge, - ), - } - testutil.RequireMetricsEqual(t, expected, actual, testutil.IgnoreTime()) }) } } + +func TestParseConfig(t *testing.T) { + t.Run("BothCPUOptionsProvided", func(t *testing.T) { + p := &PowerStat{ + IncludedCPUs: []string{"0-10,20-22"}, + ExcludedCPUs: []string{"0"}, + } + + require.ErrorContains(t, p.parseConfig(), "both 'included_cpus' and 'excluded_cpus' configured; provide only one or none of the two") + }) + + t.Run("FailedToParseIncludedCPUs", func(t *testing.T) { + p := &PowerStat{ + // has duplicates + IncludedCPUs: []string{"1-4,11", "10-12"}, + } + + require.ErrorContains(t, p.parseConfig(), "failed to parse included CPUs") + }) + + t.Run("FailedToParseExcludedCPUs", func(t *testing.T) { + p := &PowerStat{ + // has non-numeric CPU ID + ExcludedCPUs: []string{"1-4,b"}, + } + + require.ErrorContains(t, p.parseConfig(), "failed to parse excluded CPUs") + }) + + t.Run("FailedToParseCPUMetrics", func(t *testing.T) { + p := &PowerStat{ + // has duplicates + CPUMetrics: []cpuMetricType{ + cpuFrequency, + cpuTemperature, + cpuFrequency, // duplicate + }, + } + + require.ErrorContains(t, p.parseConfig(), "failed to parse cpu metrics") + }) + + t.Run("EventDefinitionsNotProvidedForPerf", func(t *testing.T) { + p := &PowerStat{ + // has duplicates + CPUMetrics: []cpuMetricType{ + cpuC0SubstateC01Percent, + }, + } + + require.ErrorContains(t, p.parseConfig(), "'event_definitions' contains an empty path") + }) + + t.Run("EventDefinitionsDoesNotExist", func(t *testing.T) { + p := &PowerStat{ + // has duplicates + CPUMetrics: []cpuMetricType{ + cpuC0SubstateC02Percent, + }, + EventDefinitions: "./testdata/doesNotExist.json", + } + + require.ErrorContains(t, p.parseConfig(), "'event_definitions' file \"./testdata/doesNotExist.json\" does not exist") + }) + + t.Run("NoMetricsProvided", func(t *testing.T) { + p := &PowerStat{ + // Disable default package metrics. + PackageMetrics: []packageMetricType{}, + } + + require.ErrorContains(t, p.parseConfig(), "no metrics were found in the configuration file") + }) + + t.Run("DisablePackageMetrics", func(t *testing.T) { + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + cpuBusyFrequency, + }, + // Disable default package metrics. + PackageMetrics: []packageMetricType{}, + } + + require.NoError(t, p.parseConfig()) + require.Empty(t, p.PackageMetrics) + require.Len(t, p.CPUMetrics, 1) + }) + + t.Run("DefaultPackageMetrics", func(t *testing.T) { + p := &PowerStat{ + PackageMetrics: nil, // default package metrics + } + + require.NoError(t, p.parseConfig()) + require.Equal(t, + []packageMetricType{ + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + }, + p.PackageMetrics) + }) + + t.Run("IncludedCPUs", func(t *testing.T) { + p := &PowerStat{ + IncludedCPUs: []string{"0-5"}, + } + + require.NoError(t, p.parseConfig()) + require.Equal(t, []int{0, 1, 2, 3, 4, 5}, p.parsedIncludedCores) + require.Nil(t, p.parsedExcludedCores) + }) + + t.Run("ExcludedCPUs", func(t *testing.T) { + p := &PowerStat{ + ExcludedCPUs: []string{"2-6", "8", "10"}, + } + + require.NoError(t, p.parseConfig()) + require.Equal(t, []int{2, 3, 4, 5, 6, 8, 10}, p.parsedExcludedCores) + require.Nil(t, p.parsedIncludedCores) + }) + + t.Run("MetricsWithIncludedCPUs", func(t *testing.T) { + p := &PowerStat{ + IncludedCPUs: []string{"0-3,6"}, + CPUMetrics: []cpuMetricType{ + cpuC0StateResidency, + cpuC1StateResidency, + cpuC3StateResidency, + }, + PackageMetrics: []packageMetricType{ + packageUncoreFrequency, + packageTurboLimit, + }, + } + + require.NoError(t, p.parseConfig()) + require.Equal(t, []int{0, 1, 2, 3, 6}, p.parsedIncludedCores) + + // Check flags + require.True(t, p.needsMsrCPU) + require.True(t, p.needsTimeRelatedMsr) + require.False(t, p.needsCoreFreq) + require.False(t, p.needsPerf) + }) +} + +type mockOptGenerator struct { + mock.Mock +} + +func (m *mockOptGenerator) generate(cfg optConfig) []ptel.Option { + args := m.Called(cfg) + return args.Get(0).([]ptel.Option) +} + +func TestSampleConfig(t *testing.T) { + p := &PowerStat{} + require.NotZero(t, p.SampleConfig()) +} + +func TestInit(t *testing.T) { + t.Run("FailedToDisableUnsupportedMetrics", func(t *testing.T) { + t.Setenv("HOST_PROC", "./testdata/cpu_model_missing") + + p := &PowerStat{} + + require.ErrorContains(t, p.Init(), "error occurred while parsing CPU model") + }) + + t.Run("FailedToParseConfigWithDuplicates", func(t *testing.T) { + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + // has duplicates + IncludedCPUs: []string{"1-4,11", "10-12"}, + Log: logger, + } + + require.ErrorContains(t, p.Init(), "failed to parse included CPUs") + require.Empty(t, logger.Warnings()) + }) + + t.Run("FailedToParseConfigWithNegativeTimeout", func(t *testing.T) { + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + // negative value + MsrReadTimeout: -2, + Log: logger, + } + + require.ErrorContains(t, p.Init(), "msr_read_timeout should be positive number or equal to 0 (to disable timeouts)") + require.Empty(t, logger.Warnings()) + }) +} + +func TestStart(t *testing.T) { + t.Run("FailedToStart", func(t *testing.T) { + t.Setenv("HOST_PROC", "./testdata") + + acc := &testutil.Accumulator{} + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + // has CPU ID out of bounds + parsedIncludedCores: []int{0, 9}, + Log: logger, + + option: &optGenerator{}, + } + + require.ErrorContains(t, p.Start(acc), "failed to initialize metric fetcher interface") + require.Empty(t, logger.Warnings()) + }) + + t.Run("WithWarning", func(t *testing.T) { + acc := &testutil.Accumulator{} + logger := &testutil.CaptureLogger{} + + mOptGenerator := &mockOptGenerator{} + mOptGenerator.On("generate", mock.AnythingOfType("optConfig")).Return( + []ptel.Option{ + ptel.WithRapl("/dummy/path"), + }, + ) + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + packageCurrentPowerConsumption, // needs rapl + }, + Log: logger, + + option: mOptGenerator, + } + + require.NoError(t, p.Start(acc)) + require.Len(t, logger.Warnings(), 1) + require.Contains(t, logger.Warnings()[0], "Plugin started with errors") + }) +} + +func TestGather(t *testing.T) { + t.Run("WithoutMetrics", func(t *testing.T) { + acc := &testutil.Accumulator{} + + p := &PowerStat{ + PackageMetrics: []packageMetricType{}, + } + + require.NoError(t, p.Gather(acc)) + + require.Empty(t, acc.Errors) + require.Empty(t, acc.GetTelegrafMetrics()) + }) + + t.Run("WithDefaultPackageMetrics", func(t *testing.T) { + packageID := 0 + + packagePower := 10.0 + dramPower := 5.0 + thermalDesignPower := 20.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available package IDs. + mFetcher.On("GetPackageIDs").Return([]int{packageID}).Once() + + // mock getting current package power consumption metric. + mFetcher.On("GetCurrentPackagePowerConsumptionWatts", packageID).Return(packagePower, nil).Once() + + // mock getting current dram power consumption metric. + mFetcher.On("GetCurrentDramPowerConsumptionWatts", packageID).Return(dramPower, nil).Once() + + // mock getting package thermal design power metric. + mFetcher.On("GetPackageThermalDesignPowerWatts", packageID).Return(thermalDesignPower, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + require.NoError(t, p.Gather(acc)) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 3) + require.True(t, acc.HasField("powerstat_package", "current_power_consumption_watts")) + require.True(t, acc.HasField("powerstat_package", "current_dram_power_consumption_watts")) + require.True(t, acc.HasField("powerstat_package", "thermal_design_power_watts")) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithPackageMetrics", func(t *testing.T) { + packageIDs := []int{0, 1, 2, 3} + + baseFreq := uint64(200) + packagePower := 30.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available package IDs. + mFetcher.On("GetPackageIDs").Return(packageIDs).Once() + + // mock getting CPU base frequency metric. + mFetcher.On("GetCPUBaseFrequency", mock.AnythingOfType("int")).Return(baseFreq, nil).Times(len(packageIDs)) + + // mock getting current package power consumption metric. + mFetcher.On("GetCurrentPackagePowerConsumptionWatts", mock.AnythingOfType("int")).Return(packagePower, nil).Times(len(packageIDs)) + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + packageCurrentPowerConsumption, + packageCPUBaseFrequency, + }, + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + require.NoError(t, p.Gather(acc)) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 8) + require.True(t, acc.HasField("powerstat_package", "cpu_base_frequency_mhz")) + require.True(t, acc.HasField("powerstat_package", "current_power_consumption_watts")) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithCPUMetrics", func(t *testing.T) { + cpuIDs := []int{0, 1, 2, 3} + + cpuFreq := 123.5 + cpuTemp := uint64(20) + cpuBusyFreq := 456.7 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available CPU IDs with access to msr registers and coreFreq. + mFetcher.On("GetMsrCPUIDs").Return(cpuIDs).Once() + + // mock getting core ID for CPU IDs. + mFetcher.On("GetCPUCoreID", mock.AnythingOfType("int")).Return(1, nil).Times(len(cpuIDs)) + + // mock getting package ID for CPU IDs. + mFetcher.On("GetCPUPackageID", mock.AnythingOfType("int")).Return(1, nil).Times(len(cpuIDs)) + + // mock updating msr time-related metrics for CPU IDs. + mFetcher.On("UpdatePerCPUMetrics", mock.AnythingOfType("int")).Return(nil).Times(len(cpuIDs)) + + // mock getting CPU frequency for CPU IDs. + mFetcher.On("GetCPUFrequency", mock.AnythingOfType("int")).Return(cpuFreq, nil).Times(len(cpuIDs)) + + // mock getting CPU temperature metric for CPU IDs. + mFetcher.On("GetCPUTemperature", mock.AnythingOfType("int")).Return(cpuTemp, nil).Times(len(cpuIDs)) + + // mock getting CPU busy frequency metric for CPU IDs. + mFetcher.On("GetCPUBusyFrequencyMhz", mock.AnythingOfType("int")).Return(cpuBusyFreq, nil).Times(len(cpuIDs)) + + p := &PowerStat{ + // Disables package metrics + PackageMetrics: []packageMetricType{}, + CPUMetrics: []cpuMetricType{ + cpuFrequency, + cpuTemperature, + cpuBusyFrequency, + }, + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + require.NoError(t, p.Gather(acc)) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 12) + require.True(t, acc.HasField("powerstat_core", "cpu_frequency_mhz")) + require.True(t, acc.HasField("powerstat_core", "cpu_temperature_celsius")) + require.True(t, acc.HasField("powerstat_core", "cpu_busy_frequency_mhz")) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithPerfMetrics", func(t *testing.T) { + cpuIDs := []int{0, 1, 2} + + c01 := 0.1 + c02 := 1.2 + c0Wait := 2.3 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock reading perf events. + mFetcher.On("ReadPerfEvents").Return(nil).Once() + + // mock getting available CPU IDs with access to msr registers and coreFreq. + mFetcher.On("GetPerfCPUIDs").Return(cpuIDs).Once() + + // mock getting core ID for CPU IDs. + mFetcher.On("GetCPUCoreID", mock.AnythingOfType("int")).Return(1, nil).Times(len(cpuIDs)) + + // mock getting package ID for CPU IDs. + mFetcher.On("GetCPUPackageID", mock.AnythingOfType("int")).Return(1, nil).Times(len(cpuIDs)) + + // mock getting CPU C01 metric. + mFetcher.On("GetCPUC0SubstateC01Percent", mock.AnythingOfType("int")).Return(c01, nil).Times(len(cpuIDs)) + + // mock getting CPU C02 metric. + mFetcher.On("GetCPUC0SubstateC02Percent", mock.AnythingOfType("int")).Return(c02, nil).Times(len(cpuIDs)) + + // mock getting CPU C0Wait metric. + mFetcher.On("GetCPUC0SubstateC0WaitPercent", mock.AnythingOfType("int")).Return(c0Wait, nil).Times(len(cpuIDs)) + + p := &PowerStat{ + // Disables package metrics + PackageMetrics: []packageMetricType{}, + CPUMetrics: []cpuMetricType{ + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + cpuC0SubstateC0WaitPercent, + }, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + require.NoError(t, p.Gather(acc)) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 9) + require.True(t, acc.HasField("powerstat_core", "cpu_c0_substate_c01_percent")) + require.True(t, acc.HasField("powerstat_core", "cpu_c0_substate_c02_percent")) + require.True(t, acc.HasField("powerstat_core", "cpu_c0_substate_c0_wait_percent")) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithPerfAndMsrCPUMetrics", func(t *testing.T) { + cpuIDsMsr := []int{0, 1, 2, 3} + cpuIDsPerf := []int{0, 1} + + c1 := 0.5 + c6 := 1.5 + c01 := 0.1 + c02 := 1.2 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available CPU IDs with access to msr registers. + mFetcher.On("GetMsrCPUIDs").Return(cpuIDsMsr).Once() + + // mock getting core ID for CPU IDs. + mFetcher.On("GetCPUCoreID", mock.AnythingOfType("int")).Return(1, nil).Times(len(cpuIDsMsr) + len(cpuIDsPerf)) + + // mock getting package ID for CPU IDs. + mFetcher.On("GetCPUPackageID", mock.AnythingOfType("int")).Return(1, nil).Times(len(cpuIDsMsr) + len(cpuIDsPerf)) + + // mock updating msr time-related metrics for CPU IDs. + mFetcher.On("UpdatePerCPUMetrics", mock.AnythingOfType("int")).Return(nil).Times(len(cpuIDsMsr)) + + // mock getting CPU C1 state residency metric for CPU IDs. + mFetcher.On("GetCPUC1StateResidency", mock.AnythingOfType("int")).Return(c1, nil).Times(len(cpuIDsMsr)) + + // mock getting CPU C6 state residency metric for CPU IDs. + mFetcher.On("GetCPUC6StateResidency", mock.AnythingOfType("int")).Return(c6, nil).Times(len(cpuIDsMsr)) + + // mock reading perf events. + mFetcher.On("ReadPerfEvents").Return(nil).Once() + + // mock getting available CPU IDs with access to msr registers and coreFreq. + mFetcher.On("GetPerfCPUIDs").Return(cpuIDsPerf).Once() + + // mock getting CPU C01 metric. + mFetcher.On("GetCPUC0SubstateC01Percent", mock.AnythingOfType("int")).Return(c01, nil).Times(len(cpuIDsPerf)) + + // mock getting CPU C02 metric. + mFetcher.On("GetCPUC0SubstateC02Percent", mock.AnythingOfType("int")).Return(c02, nil).Times(len(cpuIDsPerf)) + + p := &PowerStat{ + // Disables package metrics + PackageMetrics: []packageMetricType{}, + CPUMetrics: []cpuMetricType{ + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + cpuC1StateResidency, + cpuC6StateResidency, + }, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + require.NoError(t, p.Gather(acc)) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 12) + require.True(t, acc.HasField("powerstat_core", "cpu_c0_substate_c01_percent")) + require.True(t, acc.HasField("powerstat_core", "cpu_c0_substate_c02_percent")) + require.True(t, acc.HasField("powerstat_core", "cpu_c1_state_residency_percent")) + require.True(t, acc.HasField("powerstat_core", "cpu_c6_state_residency_percent")) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithCPUAndPackageMetrics", func(t *testing.T) { + cpuIDs := []int{10, 12} + packageIDs := []int{0, 1, 2, 3} + dieIDs := []int{0, 1} + + c7 := 0.12 + + initMin := 200.0 + initMax := 1200.0 + currMin := 300.0 + currMax := 1300.0 + curr := 800.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available CPU IDs with access to msr registers and coreFreq. + mFetcher.On("GetMsrCPUIDs").Return(cpuIDs).Once() + + // mock getting core ID for CPU IDs. + mFetcher.On("GetCPUCoreID", mock.AnythingOfType("int")).Return(1, nil).Times(len(cpuIDs)) + + // mock getting package ID for CPU IDs. + mFetcher.On("GetCPUPackageID", mock.AnythingOfType("int")).Return(1, nil).Times(len(cpuIDs)) + + // mock updating msr time-related metrics for CPU IDs. + mFetcher.On("UpdatePerCPUMetrics", mock.AnythingOfType("int")).Return(nil).Times(len(cpuIDs)) + + // mock getting C7 state residency metric for CPU IDs. + mFetcher.On("GetCPUC7StateResidency", mock.AnythingOfType("int")).Return(c7, nil).Times(len(cpuIDs)) + + // mock getting available package IDs. + mFetcher.On("GetPackageIDs").Return(packageIDs).Once() + + // mock getting die IDs for package ID. + mFetcher.On("GetPackageDieIDs", mock.AnythingOfType("int")).Return(dieIDs, nil).Times(len(packageIDs)) + + // mock getting initial minimum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMin", mock.AnythingOfType("int"), mock.AnythingOfType("int")).Return(initMin, nil). + Times(len(packageIDs) * len(dieIDs)) + + // mock getting initial maximum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMax", mock.AnythingOfType("int"), mock.AnythingOfType("int")).Return(initMax, nil). + Times(len(packageIDs) * len(dieIDs)) + + // mock getting custom minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", mock.AnythingOfType("int"), mock.AnythingOfType("int")).Return(currMin, nil). + Times(len(packageIDs) * len(dieIDs)) + + // mock getting custom maximum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMax", mock.AnythingOfType("int"), mock.AnythingOfType("int")).Return(currMax, nil). + Times(len(packageIDs) * len(dieIDs)) + + // mock getting current uncore frequency value. + mFetcher.On("GetCurrentUncoreFrequency", mock.AnythingOfType("int"), mock.AnythingOfType("int")).Return(curr, nil).Times(len(packageIDs) * len(dieIDs)) + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + packageUncoreFrequency, + }, + CPUMetrics: []cpuMetricType{ + cpuC7StateResidency, + }, + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + require.NoError(t, p.Gather(acc)) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 18) + require.True(t, acc.HasField("powerstat_core", "cpu_c7_state_residency_percent")) + require.True(t, acc.HasField("powerstat_package", "uncore_frequency_limit_mhz_min")) + require.True(t, acc.HasField("powerstat_package", "uncore_frequency_limit_mhz_max")) + require.True(t, acc.HasField("powerstat_package", "uncore_frequency_mhz_cur")) + require.True(t, acc.HasTag("powerstat_package", "type")) + mFetcher.AssertExpectations(t) + }) +} + +func TestStop(t *testing.T) { + t.Run("NoErrorWithoutPerf", func(t *testing.T) { + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + Log: logger, + + needsPerf: false, + } + + p.Stop() + + require.Empty(t, logger.Errors()) + }) + + t.Run("FailedToDeactivatePerfEvents", func(t *testing.T) { + logger := &testutil.CaptureLogger{} + + mFetcher := &fetcherMock{} + + // mock deactivating perf events. + mFetcher.On("DeactivatePerfEvents").Return(errors.New("mock error")).Once() + + p := &PowerStat{ + Log: logger, + + fetcher: mFetcher, + + needsPerf: true, + } + + p.Stop() + + require.Len(t, logger.Errors(), 1) + require.Contains(t, logger.Errors()[0], "Failed to deactivate perf events") + }) + + t.Run("NoErrorWithPerf", func(t *testing.T) { + logger := &testutil.CaptureLogger{} + + mFetcher := &fetcherMock{} + + // mock deactivating perf events. + mFetcher.On("DeactivatePerfEvents").Return(nil).Once() + + p := &PowerStat{ + Log: logger, + + fetcher: mFetcher, + + needsPerf: true, + } + + p.Stop() + + require.Empty(t, logger.Errors()) + }) +} + +func TestDisableUnsupportedMetrics(t *testing.T) { + t.Run("ModelMissing", func(t *testing.T) { + t.Setenv("HOST_PROC", "./testdata/cpu_model_missing") + + p := &PowerStat{} + + err := p.disableUnsupportedMetrics() + + require.Error(t, err, "error occurred while parsing CPU model") + }) + + t.Run("MsrFlagNotFound", func(t *testing.T) { + t.Setenv("HOST_PROC", "./testdata/msr_flag_not_found") + + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics relying on msr flag + cpuC0StateResidency, + cpuC1StateResidency, + cpuC6StateResidency, + cpuBusyFrequency, + cpuBusyCycles, + cpuTemperature, + }, + PackageMetrics: []packageMetricType{ + // Metrics relying on msr flag + packageCPUBaseFrequency, + packageTurboLimit, + + // Metrics not relying on msr flag + packageCurrentPowerConsumption, + packageThermalDesignPower, + }, + + Log: logger, + } + + err := p.disableUnsupportedMetrics() + + require.NoError(t, err) + require.Empty(t, p.CPUMetrics) + require.Len(t, p.PackageMetrics, 2) + require.Contains(t, p.PackageMetrics, packageCurrentPowerConsumption) + require.Contains(t, p.PackageMetrics, packageThermalDesignPower) + require.Len(t, logger.Warnings(), 8) + }) + + t.Run("AperfMperfFlagNotFound", func(t *testing.T) { + t.Setenv("HOST_PROC", "./testdata/aperfmperf_flag_not_found") + + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics relying on aperfmperf flag + cpuC0StateResidency, + cpuC1StateResidency, + cpuBusyFrequency, + cpuBusyCycles, + + // Metrics not relying on aperfmperf flag + cpuTemperature, + }, + + Log: logger, + } + + err := p.disableUnsupportedMetrics() + + require.NoError(t, err) + require.Len(t, p.CPUMetrics, 1) + require.Contains(t, p.CPUMetrics, cpuTemperature) + require.Len(t, logger.Warnings(), 4) + }) + + t.Run("DtsFlagNotFound", func(t *testing.T) { + t.Setenv("HOST_PROC", "./testdata/dts_flag_not_found") + + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics relying on dts flag + cpuTemperature, + + // Metrics not relying on dts flag + cpuBusyFrequency, + }, + PackageMetrics: []packageMetricType{}, + + Log: logger, + } + + err := p.disableUnsupportedMetrics() + + require.NoError(t, err) + require.Len(t, p.CPUMetrics, 1) + require.Contains(t, p.CPUMetrics, cpuBusyFrequency) + require.Len(t, logger.Warnings(), 1) + }) + + t.Run("ModelNotSupported", func(t *testing.T) { + t.Setenv("HOST_PROC", "./testdata/model_not_supported") + + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics not supported by CPU + cpuTemperature, + cpuC1StateResidency, + cpuC3StateResidency, + cpuC6StateResidency, + cpuC7StateResidency, + }, + PackageMetrics: []packageMetricType{ + // Metrics not supported by CPU + packageCPUBaseFrequency, + + packageUncoreFrequency, + }, + + Log: logger, + } + + err := p.disableUnsupportedMetrics() + + require.NoError(t, err) + require.Empty(t, p.CPUMetrics) + require.Contains(t, p.PackageMetrics, packageUncoreFrequency) + require.Len(t, logger.Warnings(), 6) + }) +} + +func TestDisableCPUMetric(t *testing.T) { + t.Run("NoMetricsRemoved", func(t *testing.T) { + expStartLen := 1 + expEndLen := 1 + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{cpuC1StateResidency}, + Log: logger, + } + + require.Len(t, p.CPUMetrics, expStartLen) + p.disableCPUMetric(cpuC3StateResidency) + require.Len(t, p.CPUMetrics, expEndLen) + + require.Empty(t, logger.Warnings()) + }) + t.Run("TwoMetricsRemoved", func(t *testing.T) { + expStartLen := 3 + expEndLen := 1 + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{cpuC1StateResidency, cpuC3StateResidency, cpuC6StateResidency}, + Log: logger, + } + + require.Len(t, p.CPUMetrics, expStartLen) + p.disableCPUMetric(cpuC3StateResidency) + p.disableCPUMetric(cpuC1StateResidency) + require.Len(t, p.CPUMetrics, expEndLen) + + require.Len(t, logger.Warnings(), 2) + require.Contains(t, logger.Warnings()[0], "\"cpu_c3_state_residency\" is not supported by CPU, metric will not be gathered") + require.Contains(t, logger.Warnings()[1], "\"cpu_c1_state_residency\" is not supported by CPU, metric will not be gathered") + }) +} + +func TestDisablePackageMetric(t *testing.T) { + t.Run("NoMetricsRemoved", func(t *testing.T) { + expStartLen := 1 + expEndLen := 1 + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + PackageMetrics: []packageMetricType{packageCurrentPowerConsumption}, + Log: logger, + } + + require.Len(t, p.PackageMetrics, expStartLen) + p.disablePackageMetric(packageCPUBaseFrequency) + require.Len(t, p.PackageMetrics, expEndLen) + + require.Empty(t, logger.Warnings()) + }) + t.Run("TwoMetricsRemoved", func(t *testing.T) { + expStartLen := 3 + expEndLen := 1 + logger := &testutil.CaptureLogger{} + + p := &PowerStat{ + PackageMetrics: []packageMetricType{packageCurrentPowerConsumption, packageTurboLimit, packageCPUBaseFrequency}, + Log: logger, + } + + require.Len(t, p.PackageMetrics, expStartLen) + p.disablePackageMetric(packageCPUBaseFrequency) + p.disablePackageMetric(packageTurboLimit) + require.Len(t, p.PackageMetrics, expEndLen) + + require.Len(t, logger.Warnings(), 2) + require.Contains(t, logger.Warnings()[0], "\"cpu_base_frequency\" is not supported by CPU, metric will not be gathered") + require.Contains(t, logger.Warnings()[1], "\"max_turbo_frequency\" is not supported by CPU, metric will not be gathered") + }) +} + +type fetcherMock struct { + mock.Mock +} + +func (m *fetcherMock) GetMsrCPUIDs() []int { + args := m.Called() + if args.Get(0) == nil { + return nil + } + return args.Get(0).([]int) +} + +func (m *fetcherMock) GetPerfCPUIDs() []int { + args := m.Called() + if args.Get(0) == nil { + return nil + } + return args.Get(0).([]int) +} + +func (m *fetcherMock) GetPackageIDs() []int { + args := m.Called() + if args.Get(0) == nil { + return nil + } + return args.Get(0).([]int) +} + +func (m *fetcherMock) GetCPUPackageID(cpuID int) (int, error) { + args := m.Called(cpuID) + return args.Int(0), args.Error(1) +} + +func (m *fetcherMock) GetCPUCoreID(cpuID int) (int, error) { + args := m.Called(cpuID) + return args.Int(0), args.Error(1) +} + +func (m *fetcherMock) GetPackageDieIDs(packageID int) ([]int, error) { + args := m.Called(packageID) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]int), args.Error(1) +} + +func (m *fetcherMock) GetCPUFrequency(cpuID int) (float64, error) { + args := m.Called(cpuID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) UpdatePerCPUMetrics(cpuID int) error { + args := m.Called(cpuID) + return args.Error(0) +} + +func (m *fetcherMock) GetCPUTemperature(cpuID int) (uint64, error) { + args := m.Called(cpuID) + return args.Get(0).(uint64), args.Error(1) +} + +func (m *fetcherMock) GetCPUC0StateResidency(cpuID int) (float64, error) { + args := m.Called(cpuID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCPUC1StateResidency(cpuID int) (float64, error) { + args := m.Called(cpuID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCPUC3StateResidency(cpuID int) (float64, error) { + args := m.Called(cpuID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCPUC6StateResidency(cpuID int) (float64, error) { + args := m.Called(cpuID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCPUC7StateResidency(cpuID int) (float64, error) { + args := m.Called(cpuID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCPUBusyFrequencyMhz(cpuID int) (float64, error) { + args := m.Called(cpuID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) ReadPerfEvents() error { + args := m.Called() + return args.Error(0) +} + +func (m *fetcherMock) DeactivatePerfEvents() error { + args := m.Called() + return args.Error(0) +} + +func (m *fetcherMock) GetCPUC0SubstateC01Percent(cpuID int) (float64, error) { + args := m.Called(cpuID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCPUC0SubstateC02Percent(cpuID int) (float64, error) { + args := m.Called(cpuID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCPUC0SubstateC0WaitPercent(cpuID int) (float64, error) { + args := m.Called(cpuID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCPUBaseFrequency(packageID int) (uint64, error) { + args := m.Called(packageID) + return args.Get(0).(uint64), args.Error(1) +} + +func (m *fetcherMock) GetInitialUncoreFrequencyMin(packageID, dieID int) (float64, error) { + args := m.Called(packageID, dieID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCustomizedUncoreFrequencyMin(packageID, dieID int) (float64, error) { + args := m.Called(packageID, dieID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetInitialUncoreFrequencyMax(packageID, dieID int) (float64, error) { + args := m.Called(packageID, dieID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCustomizedUncoreFrequencyMax(packageID, dieID int) (float64, error) { + args := m.Called(packageID, dieID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCurrentUncoreFrequency(packageID, dieID int) (float64, error) { + args := m.Called(packageID, dieID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCurrentPackagePowerConsumptionWatts(packageID int) (float64, error) { + args := m.Called(packageID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetCurrentDramPowerConsumptionWatts(packageID int) (float64, error) { + args := m.Called(packageID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetPackageThermalDesignPowerWatts(packageID int) (float64, error) { + args := m.Called(packageID) + return args.Get(0).(float64), args.Error(1) +} + +func (m *fetcherMock) GetMaxTurboFreqList(packageID int) ([]ptel.MaxTurboFreq, error) { + args := m.Called(packageID) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]ptel.MaxTurboFreq), args.Error(1) +} + +func TestAddCPUMetrics(t *testing.T) { + // Disable package metrics when parseConfig method is called. + packageMetrics := []packageMetricType{} + + t.Run("NoAvailableCPUs", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available CPU IDs with access to msr registers. + mFetcher.On("GetMsrCPUIDs").Return(nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + p.addCPUMetrics(acc) + + require.Empty(t, acc.Errors) + require.Empty(t, acc.GetTelegrafMetrics()) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithDataCPUIDErrors", func(t *testing.T) { + t.Run("SingleCPUID", func(t *testing.T) { + cpuID := 0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available CPU IDs with access to msr registers and coreFreq. + mFetcher.On("GetMsrCPUIDs").Return([]int{cpuID}).Once() + + // mock getting core ID for CPU ID. + mFetcher.On("GetCPUCoreID", cpuID).Return(0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + p.addCPUMetrics(acc) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get coreFreq and/or msr metrics for CPU ID %v", cpuID)) + require.Empty(t, acc.GetTelegrafMetrics()) + mFetcher.AssertExpectations(t) + }) + + t.Run("MultipleCPUIDs", func(t *testing.T) { + cpuID := 1 + coreID := 2 + packageID := 3 + cpuFreq := 500.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available CPU IDs with access to msr registers and coreFreq. + mFetcher.On("GetMsrCPUIDs").Return([]int{0, cpuID}).Once() + + // mock getting core ID for CPU ID 0. + mFetcher.On("GetCPUCoreID", 0).Return(0, errors.New("mock error")).Once() + + // mock getting core ID for CPU ID 1. + mFetcher.On("GetCPUCoreID", cpuID).Return(coreID, nil).Once() + + // mock getting package ID for CPU ID 1. + mFetcher.On("GetCPUPackageID", cpuID).Return(packageID, nil).Once() + + // mock getting CPU frequency for CPU ID 1. + mFetcher.On("GetCPUFrequency", cpuID).Return(cpuFreq, nil).Once() + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metric which relies on coreFreq. + cpuFrequency, + + // Metrics which do not rely on coreFreq nor msr. + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + }, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addCPUMetrics(acc) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), "failed to get coreFreq and/or msr metrics for CPU ID 0") + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_frequency_mhz": cpuFreq, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) + }) + + t.Run("WithCoreFreqMetrics", func(t *testing.T) { + cpuFreq := 500.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available CPU IDs with access to coreFreq. + mFetcher.On("GetMsrCPUIDs").Return([]int{0, 1}).Once() + + // mock getting corresponding core ID to CPU IDs 0 and 1. + mFetcher.On("GetCPUCoreID", mock.AnythingOfType("int")).Return(0, nil).Twice() + + // mock getting corresponding package ID to CPU IDs 0 and 1. + mFetcher.On("GetCPUPackageID", mock.AnythingOfType("int")).Return(1, nil).Twice() + + // mock getting CPU frequency for CPU ID 0. + mFetcher.On("GetCPUFrequency", 0).Return(cpuFreq, nil).Once() + + // mock getting CPU frequency for CPU ID 1. + mFetcher.On("GetCPUFrequency", 1).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metric which relies on coreFreq. + cpuFrequency, + + // Metrics which do not rely on coreFreq nor msr + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + }, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addCPUMetrics(acc) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID 1", cpuFrequency)) + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_frequency_mhz": cpuFreq, + }, + // tags + map[string]string{ + "cpu_id": "0", + "core_id": "0", + "package_id": "1", + }, + ) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithMsrMetrics", func(t *testing.T) { + t.Run("SingleRead", func(t *testing.T) { + cpuTemp := uint64(18) + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available CPU IDs with access to msr registers. + mFetcher.On("GetMsrCPUIDs").Return([]int{0, 1}).Once() + + // mock getting corresponding core ID to CPU IDs 0 and 1. + mFetcher.On("GetCPUCoreID", mock.AnythingOfType("int")).Return(0, nil).Twice() + + // mock getting corresponding package ID to CPU IDs 0 and 1. + mFetcher.On("GetCPUPackageID", mock.AnythingOfType("int")).Return(1, nil).Twice() + + // mock getting CPU temperature metric for CPU ID 0. + mFetcher.On("GetCPUTemperature", 0).Return(cpuTemp, nil).Once() + + // mock getting CPU temperature metric for CPU ID 1. + mFetcher.On("GetCPUTemperature", 1).Return(uint64(0), errors.New("mock error")).Once() + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics which rely on single-read msr registers. + cpuTemperature, + + // Metrics which do not rely on coreFreq nor msr + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + }, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addCPUMetrics(acc) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID 1", cpuTemperature)) + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_temperature_celsius": cpuTemp, + }, + // tags + map[string]string{ + "cpu_id": "0", + "core_id": "0", + "package_id": "1", + }, + ) + mFetcher.AssertExpectations(t) + }) + + t.Run("TimeRelated", func(t *testing.T) { + cpuBusyFreq := 750.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available CPU IDs with access to msr registers. + mFetcher.On("GetMsrCPUIDs").Return([]int{0, 1}).Once() + + // mock getting corresponding core ID to CPU IDs 0 and 1. + mFetcher.On("GetCPUCoreID", mock.AnythingOfType("int")).Return(0, nil).Twice() + + // mock getting corresponding package ID to CPU IDs 0 and 1. + mFetcher.On("GetCPUPackageID", mock.AnythingOfType("int")).Return(1, nil).Twice() + + // mock updating msr time-related metrics for CPU ID 0. + mFetcher.On("UpdatePerCPUMetrics", 0).Return(errors.New("mock error")).Once() + + // mock updating msr time-related metrics for CPU ID 1. + mFetcher.On("UpdatePerCPUMetrics", 1).Return(nil).Once() + + // mock getting CPU busy frequency metric for CPU ID 1. + mFetcher.On("GetCPUBusyFrequencyMhz", 1).Return(cpuBusyFreq, nil).Once() + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics which rely on time-related msr reads. + cpuBusyFrequency, + + // Metrics which do not rely on coreFreq nor msr + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + cpuC0SubstateC0WaitPercent, + }, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addCPUMetrics(acc) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), "failed to update MSR time-related metrics for CPU ID 0") + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_busy_frequency_mhz": cpuBusyFreq, + }, + // tags + map[string]string{ + "cpu_id": "1", + "core_id": "0", + "package_id": "1", + }, + ) + mFetcher.AssertExpectations(t) + }) + }) +} + +func TestAddPerCPUMsrMetrics(t *testing.T) { + // Disable package metrics when parseConfig method is called. + packageMetrics := []packageMetricType{} + + t.Run("WithoutMsrMetrics", func(t *testing.T) { + cpuID := 0 + coreID := 1 + packageID := 0 + + acc := &testutil.Accumulator{} + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // metrics which do not rely on msr + cpuFrequency, + cpuC0SubstateC01Percent, + }, + } + + p.addPerCPUMsrMetrics(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.Errors) + require.Empty(t, acc.GetTelegrafMetrics()) + }) + + t.Run("WithSingleMsrReadMetrics", func(t *testing.T) { + cpuID := 0 + coreID := 1 + packageID := 0 + cpuMetrics := []cpuMetricType{ + // metric that relies on a single msr read. + cpuTemperature, + + // metrics that do not rely on msr. + cpuFrequency, + cpuC0SubstateC01Percent, + } + + t.Run("WithError", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting CPU temperature metric. + mFetcher.On("GetCPUTemperature", cpuID).Return(uint64(0), errors.New("mock error")).Once() + + p := &PowerStat{ + CPUMetrics: cpuMetrics, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + + logOnce: map[string]struct{}{}, + } + + require.NoError(t, p.parseConfig()) + + p.addPerCPUMsrMetrics(acc, cpuID, coreID, packageID) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuTemperature, cpuID)) + require.Empty(t, p.logOnce) + require.Empty(t, acc.GetTelegrafMetrics()) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithModuleNotInitializedError", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mErr := &ptel.ModuleNotInitializedError{Name: "msr"} + mFetcher := &fetcherMock{} + + // mock getting CPU temperature metric. + mFetcher.On("GetCPUTemperature", cpuID).Return(uint64(0), mErr).Twice() + + p := &PowerStat{ + CPUMetrics: cpuMetrics, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + + logOnce: map[string]struct{}{}, + } + + require.NoError(t, p.parseConfig()) + + // First call adds the error to the accumulator and logOnce map. + p.addPerCPUMsrMetrics(acc, cpuID, coreID, packageID) + + // Second call detects previous error in logOnce map and skips adding it to the accumulator. + p.addPerCPUMsrMetrics(acc, cpuID, coreID, packageID) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q: %v", cpuTemperature, mErr)) + require.Empty(t, acc.GetTelegrafMetrics()) + + require.Len(t, p.logOnce, 1) + require.Contains(t, p.logOnce, "msr_cpu_temperature") + + mFetcher.AssertExpectations(t) + }) + + t.Run("WithoutErrors", func(t *testing.T) { + cpuTemp := uint64(20) + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting CPU temperature metric. + mFetcher.On("GetCPUTemperature", cpuID).Return(cpuTemp, nil).Once() + + p := &PowerStat{ + CPUMetrics: cpuMetrics, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addPerCPUMsrMetrics(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_temperature_celsius": cpuTemp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) + }) + + t.Run("WithTimeRelatedMsrMetrics", func(t *testing.T) { + cpuID := 0 + coreID := 1 + packageID := 0 + + c1State := 5.15 + c6State := 8.10 + + cpuMetrics := []cpuMetricType{ + // metrics that rely on a time-related msr. + cpuC1StateResidency, + cpuC6StateResidency, + + // metrics which do not rely on msr. + cpuFrequency, + cpuC0SubstateC01Percent, + } + + t.Run("FailedToUpdate", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock updating msr time-related metrics. + mFetcher.On("UpdatePerCPUMetrics", cpuID).Return(errors.New("mock error")).Once() + + p := &PowerStat{ + CPUMetrics: cpuMetrics, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addPerCPUMsrMetrics(acc, cpuID, coreID, packageID) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to update MSR time-related metrics for CPU ID %v", cpuID)) + require.Empty(t, acc.GetTelegrafMetrics()) + mFetcher.AssertExpectations(t) + }) + + t.Run("FailedToUpdateModuleNotInitializedError", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mErr := &ptel.ModuleNotInitializedError{Name: "msr"} + mFetcher := &fetcherMock{} + + // mock updating msr time-related metrics. + mFetcher.On("UpdatePerCPUMetrics", cpuID).Return(mErr).Twice() + + p := &PowerStat{ + CPUMetrics: cpuMetrics, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + + logOnce: map[string]struct{}{}, + } + + require.NoError(t, p.parseConfig()) + + // First call adds the error to the accumulator and key to logOnce map. + p.addPerCPUMsrMetrics(acc, cpuID, coreID, packageID) + + // Second call detects previous error in logOnce map and skips adding it to the accumulator. + p.addPerCPUMsrMetrics(acc, cpuID, coreID, packageID) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to update MSR time-related metrics: %v", mErr)) + require.Empty(t, acc.GetTelegrafMetrics()) + + require.Len(t, p.logOnce, 1) + require.Contains(t, p.logOnce, "msr_time_related") + + mFetcher.AssertExpectations(t) + }) + + t.Run("WithoutErrors", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock updating msr time-related metrics. + mFetcher.On("UpdatePerCPUMetrics", cpuID).Return(nil).Once() + + // mock getting C1 state residency. + mFetcher.On("GetCPUC1StateResidency", cpuID).Return(c1State, nil).Once() + + // mock getting C6 state residency. + mFetcher.On("GetCPUC6StateResidency", cpuID).Return(c6State, nil).Once() + + p := &PowerStat{ + CPUMetrics: cpuMetrics, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addPerCPUMsrMetrics(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 2) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c1_state_residency_percent": c1State, + }, + // flags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c6_state_residency_percent": c6State, + }, + // flags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) + }) +} + +func TestAddCPUTimeRelatedMsrMetrics(t *testing.T) { + cpuID := 0 + coreID := 1 + packageID := 0 + + c0State := 3.0 + c1State := 2.0 + c3State := 1.5 + c6State := 1.0 + busyCycles := 0.5 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting CPU C0 state residency value. + mFetcher.On("GetCPUC0StateResidency", cpuID).Return(c0State, nil).Once() + + // mock getting CPU C1 state residency value. + mFetcher.On("GetCPUC1StateResidency", cpuID).Return(c1State, nil).Once() + + // mock getting CPU C3 state residency value. + mFetcher.On("GetCPUC3StateResidency", cpuID).Return(c3State, nil).Once() + + // mock getting CPU C6 state residency value. + mFetcher.On("GetCPUC6StateResidency", cpuID).Return(c6State, nil).Once() + + // mock getting CPU C0 state residency value, triggered when calling add CPU busy cycle metric. + mFetcher.On("GetCPUC0StateResidency", cpuID).Return(busyCycles, nil).Once() + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics which are not time-related MSR. + cpuFrequency, + cpuTemperature, + cpuC0SubstateC01Percent, + + // Time-related MSR metrics. + cpuC0StateResidency, + cpuC1StateResidency, + cpuC3StateResidency, + cpuC6StateResidency, + cpuBusyCycles, + }, + PackageMetrics: []packageMetricType{}, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUTimeRelatedMsrMetrics(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 5) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c0_state_residency_percent": c0State, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c1_state_residency_percent": c1State, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c6_state_residency_percent": c6State, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c3_state_residency_percent": c3State, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_busy_cycles_percent": busyCycles, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) +} + +func TestAddCPUPerfMetrics(t *testing.T) { + // Disable package metrics when parseConfig method is called. + packageMetrics := []packageMetricType{} + + t.Run("FailedToReadPerfEvents", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock reading perf events. + mFetcher.On("ReadPerfEvents").Return(errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + p.addCPUPerfMetrics(acc) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), "failed to read perf events") + mFetcher.AssertExpectations(t) + }) + + t.Run("FailedToReadPerfEventsModuleNotInitializedError", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mErr := &ptel.ModuleNotInitializedError{Name: "perf"} + mFetcher := &fetcherMock{} + + // mock reading perf events. + mFetcher.On("ReadPerfEvents").Return(mErr).Twice() + + p := &PowerStat{ + fetcher: mFetcher, + + logOnce: map[string]struct{}{}, + } + + // First call adds the error to the accumulator and key to logOnce map. + p.addCPUPerfMetrics(acc) + + // Second call detects previous error in logOnce map and skips adding it to the accumulator. + p.addCPUPerfMetrics(acc) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to read perf events: %v", mErr)) + require.Empty(t, acc.GetTelegrafMetrics()) + + require.Len(t, p.logOnce, 1) + require.Contains(t, p.logOnce, "perf_read") + + mFetcher.AssertExpectations(t) + }) + + t.Run("NoAvailableCPUs", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock reading perf events. + mFetcher.On("ReadPerfEvents").Return(nil).Once() + + // mock getting available CPU IDs for perf events. + mFetcher.On("GetPerfCPUIDs").Return(nil).Once() + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics which do not rely on perf. + cpuFrequency, + cpuTemperature, + cpuBusyCycles, + + // Metrics which rely on perf. + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + cpuC0SubstateC0WaitPercent, + }, + PackageMetrics: []packageMetricType{}, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addCPUPerfMetrics(acc) + + require.Empty(t, acc.Errors) + require.Empty(t, acc.GetTelegrafMetrics()) + mFetcher.AssertExpectations(t) + }) + + t.Run("FailedToGetDataCPUID", func(t *testing.T) { + t.Run("SingleCPUID", func(t *testing.T) { + cpuID := 0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock reading perf events. + mFetcher.On("ReadPerfEvents").Return(nil).Once() + + // mock getting available CPU IDs for perf events. + mFetcher.On("GetPerfCPUIDs").Return([]int{cpuID}).Once() + + // mock getting corresponding core ID to CPU ID 0. + mFetcher.On("GetCPUCoreID", cpuID).Return(1, nil).Once() + + // mock getting corresponding package ID to CPU ID 0. + mFetcher.On("GetCPUPackageID", cpuID).Return(0, errors.New("mock error")).Once() + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics which do not rely on perf. + cpuFrequency, + cpuTemperature, + cpuBusyCycles, + + // Metrics which rely on perf. + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + cpuC0SubstateC0WaitPercent, + }, + PackageMetrics: []packageMetricType{}, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addCPUPerfMetrics(acc) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get perf metrics for CPU ID %v", cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("MultipleCPUIDs", func(t *testing.T) { + cpuID := 0 + coreID := 2 + packageID := 3 + + c01Percent := 0.2 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock reading perf events. + mFetcher.On("ReadPerfEvents").Return(nil).Once() + + // mock getting available CPU IDs for perf events. + mFetcher.On("GetPerfCPUIDs").Return([]int{cpuID, 1}).Once() + + // mock getting corresponding core ID to CPU ID 0. + mFetcher.On("GetCPUCoreID", cpuID).Return(coreID, nil).Once() + + // mock getting corresponding package ID to CPU ID 0. + mFetcher.On("GetCPUPackageID", cpuID).Return(packageID, nil).Once() + + // mock getting CPU C01 metric. + mFetcher.On("GetCPUC0SubstateC01Percent", cpuID).Return(c01Percent, nil).Once() + + // mock getting corresponding core ID to CPU ID 1. + mFetcher.On("GetCPUCoreID", 1).Return(5, nil).Once() + + // mock getting corresponding package ID to CPU ID 1. + mFetcher.On("GetCPUPackageID", 1).Return(0, errors.New("mock error")).Once() + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics which do not rely on perf. + cpuFrequency, + cpuTemperature, + cpuC6StateResidency, + + // Metrics which rely on perf. + cpuC0SubstateC01Percent, + }, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addCPUPerfMetrics(acc) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), "failed to get perf metrics for CPU ID 1") + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c0_substate_c01_percent": c01Percent, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) + }) + + t.Run("WithError", func(t *testing.T) { + cpuID := 0 + coreID := 1 + packageID := 0 + + c01Percent := 0.5 + c0Wait := 2.5 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock reading perf events. + mFetcher.On("ReadPerfEvents").Return(nil).Once() + + // mock getting available CPU IDs for perf events. + mFetcher.On("GetPerfCPUIDs").Return([]int{cpuID}).Once() + + // mock getting corresponding core ID to CPU ID 0. + mFetcher.On("GetCPUCoreID", cpuID).Return(coreID, nil).Once() + + // mock getting corresponding package ID to CPU ID 0. + mFetcher.On("GetCPUPackageID", cpuID).Return(packageID, nil).Once() + + // mock getting CPU C01 metric. + mFetcher.On("GetCPUC0SubstateC01Percent", cpuID).Return(c01Percent, nil).Once() + + // mock getting CPU C02 metric. + mFetcher.On("GetCPUC0SubstateC02Percent", cpuID).Return(0.0, errors.New("mock error")).Once() + + // mock getting CPU C0Wait metric. + mFetcher.On("GetCPUC0SubstateC0WaitPercent", cpuID).Return(c0Wait, nil).Once() + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics which do not rely on perf. + cpuFrequency, + cpuTemperature, + cpuC6StateResidency, + + // Metrics which rely on perf. + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + cpuC0SubstateC0WaitPercent, + }, + PackageMetrics: packageMetrics, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addCPUPerfMetrics(acc) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuC0SubstateC02Percent, cpuID)) + require.Len(t, acc.GetTelegrafMetrics(), 2) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c0_substate_c01_percent": c01Percent, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c0_substate_c0_wait_percent": c0Wait, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + }) +} + +func TestAddPerCPUPerfMetrics(t *testing.T) { + cpuID := 0 + coreID := 1 + packageID := 0 + + c01Percent := 1.09 + c02Percent := 2.12 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting CPU C01 metric. + mFetcher.On("GetCPUC0SubstateC01Percent", cpuID).Return(c01Percent, nil).Once() + + // mock getting CPU C02 metric. + mFetcher.On("GetCPUC0SubstateC02Percent", cpuID).Return(c02Percent, nil).Once() + + p := &PowerStat{ + CPUMetrics: []cpuMetricType{ + // Metrics which do not rely on perf. + cpuFrequency, + cpuTemperature, + cpuC6StateResidency, + + // Metrics which rely on perf. + cpuC0SubstateC01Percent, + cpuC0SubstateC02Percent, + }, + PackageMetrics: []packageMetricType{}, + EventDefinitions: "./testdata/sapphirerapids_core.json", + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addPerCPUPerfMetrics(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 2) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c0_substate_c01_percent": c01Percent, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c0_substate_c02_percent": c02Percent, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) +} + +func TestGetDataCPUID(t *testing.T) { + t.Run("FailedToGetCoreID", func(t *testing.T) { + cpuID := 1 + + mFetcher := &fetcherMock{} + + // mock getting core ID corresponding to the CPU ID. + mFetcher.On("GetCPUCoreID", cpuID).Return(0, errors.New("mock error")).Once() + + coreID, packageID, err := getDataCPUID(mFetcher, cpuID) + + require.Equal(t, 0, coreID) + require.Equal(t, 0, packageID) + require.ErrorContains(t, err, fmt.Sprintf("failed to get core ID from CPU ID %v", cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("FailedToGetPackageID", func(t *testing.T) { + cpuID := 1 + + mFetcher := &fetcherMock{} + + // mock getting core ID corresponding to the CPU ID. + mFetcher.On("GetCPUCoreID", cpuID).Return(1, nil).Once() + + // mock getting package ID corresponding to the CPU ID. + mFetcher.On("GetCPUPackageID", cpuID).Return(0, errors.New("mock error")).Once() + + coreID, packageID, err := getDataCPUID(mFetcher, cpuID) + + require.Equal(t, 0, coreID) + require.Equal(t, 0, packageID) + require.ErrorContains(t, err, fmt.Sprintf("failed to get package ID from CPU ID %v", cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + cpuID := 1 + + mFetcher := &fetcherMock{} + + // mock getting core ID corresponding to the CPU ID. + mFetcher.On("GetCPUCoreID", cpuID).Return(1, nil).Once() + + // mock getting package ID corresponding to the CPU ID. + mFetcher.On("GetCPUPackageID", cpuID).Return(2, nil).Once() + + coreID, packageID, err := getDataCPUID(mFetcher, cpuID) + + require.Equal(t, 1, coreID) + require.Equal(t, 2, packageID) + require.NoError(t, err) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddPackageMetrics(t *testing.T) { + t.Run("NoPackageIDs", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available package IDs. + mFetcher.On("GetPackageIDs").Return(nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + p.addPackageMetrics(acc) + + require.Empty(t, acc.Errors) + require.Empty(t, acc.GetTelegrafMetrics()) + }) + + t.Run("WithRaplMetrics", func(t *testing.T) { + tdp := 80.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available package IDs. + mFetcher.On("GetPackageIDs").Return([]int{0, 1}).Once() + + // mock getting package thermal design power metric for CPU ID 0. + mFetcher.On("GetPackageThermalDesignPowerWatts", 0).Return(tdp, nil).Once() + + // mock getting package thermal design power metric for CPU ID 1. + mFetcher.On("GetPackageThermalDesignPowerWatts", 1).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + // metrics which rely on rapl + packageThermalDesignPower, + }, + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addPackageMetrics(acc) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for package ID 1", packageThermalDesignPower)) + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "thermal_design_power_watts": tdp, + }, + // tags + map[string]string{ + "package_id": "0", + }, + ) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithMsrMetrics", func(t *testing.T) { + baseFreq := uint64(400) + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available package IDs. + mFetcher.On("GetPackageIDs").Return([]int{0, 1}).Once() + + // mock getting CPU base frequency metric, for package ID 0. + mFetcher.On("GetCPUBaseFrequency", 0).Return(uint64(0), errors.New("mock error")).Once() + + // mock getting CPU base frequency metric, for package ID 1. + mFetcher.On("GetCPUBaseFrequency", 1).Return(baseFreq, nil).Once() + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + // metrics which rely on msr + packageCPUBaseFrequency, + }, + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addPackageMetrics(acc) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for package ID 0", packageCPUBaseFrequency)) + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "cpu_base_frequency_mhz": baseFreq, + }, + // tags + map[string]string{ + "package_id": "1", + }, + ) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithUncoreFreqMetric", func(t *testing.T) { + dieID := 0 + + initMin := 500.0 + initMax := 2500.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting available package IDs. + mFetcher.On("GetPackageIDs").Return([]int{0, 1}).Once() + + // mock getting die IDs for package ID. + mFetcher.On("GetPackageDieIDs", mock.AnythingOfType("int")).Return([]int{dieID}, nil).Twice() + + // mock getting initial minimum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMin", mock.AnythingOfType("int"), dieID).Return(initMin, nil).Twice() + + // mock getting initial maximum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMax", mock.AnythingOfType("int"), dieID).Return(initMax, nil).Twice() + + // mock getting custom minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", mock.AnythingOfType("int"), dieID).Return(600.0, nil).Twice() + + // mock getting custom maximum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMax", mock.AnythingOfType("int"), dieID).Return(0.0, errors.New("mock error")).Twice() + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + packageUncoreFrequency, + }, + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addPackageMetrics(acc) + + require.Len(t, acc.Errors, 2) + require.ErrorContains(t, acc.Errors[0], fmt.Sprintf("failed to get current uncore frequency values for package ID 0 and die ID %v", dieID)) + require.ErrorContains(t, acc.Errors[1], fmt.Sprintf("failed to get current uncore frequency values for package ID 1 and die ID %v", dieID)) + require.Len(t, acc.GetTelegrafMetrics(), 2) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "uncore_frequency_limit_mhz_min": initMin, + "uncore_frequency_limit_mhz_max": initMax, + }, + // tags + map[string]string{ + "package_id": "0", + "type": "initial", + "die": strconv.Itoa(dieID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "uncore_frequency_limit_mhz_min": initMin, + "uncore_frequency_limit_mhz_max": initMax, + }, + // tags + map[string]string{ + "package_id": "1", + "type": "initial", + "die": strconv.Itoa(dieID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddPerPackageRaplMetrics(t *testing.T) { + t.Run("WithoutRaplMetrics", func(t *testing.T) { + packageID := 0 + + acc := &testutil.Accumulator{} + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + // metrics which do not rely on rapl + packageCPUBaseFrequency, + packageUncoreFrequency, + packageTurboLimit, + }, + } + + p.addPerPackageRaplMetrics(acc, packageID) + + require.Empty(t, acc.Errors) + require.Empty(t, acc.GetTelegrafMetrics()) + }) + + t.Run("WithModuleNotInitializedError", func(t *testing.T) { + packageID := 0 + + acc := &testutil.Accumulator{} + + raplNotInitErr := &ptel.ModuleNotInitializedError{Name: "rapl"} + mError := fmt.Errorf("mock error: %w", raplNotInitErr) + mFetcher := &fetcherMock{} + + // mock getting current dram power consumption metric. + mFetcher.On("GetCurrentDramPowerConsumptionWatts", packageID).Return(0.0, mError).Twice() + + // mock getting package thermal design power metric. + mFetcher.On("GetPackageThermalDesignPowerWatts", packageID).Return(0.0, mError).Twice() + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + // metrics which rely on rapl + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + + // metrics which do not rely on rapl + packageCPUBaseFrequency, + packageUncoreFrequency, + packageTurboLimit, + }, + + fetcher: mFetcher, + + logOnce: map[string]struct{}{}, + } + + require.NoError(t, p.parseConfig()) + + // First call adds the error to the accumulator and logOnce map. + p.addPerPackageRaplMetrics(acc, packageID) + + // Second call detects previous error in logOnce map and skips adding it to the accumulator. + p.addPerPackageRaplMetrics(acc, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 2) + require.ErrorContains(t, acc.Errors[0], fmt.Sprintf("failed to get %q: %v", packageCurrentDramPowerConsumption, raplNotInitErr)) + require.ErrorContains(t, acc.Errors[1], fmt.Sprintf("failed to get %q: %v", packageThermalDesignPower, raplNotInitErr)) + + require.Len(t, p.logOnce, 2) + require.Contains(t, p.logOnce, "rapl_current_dram_power_consumption") + require.Contains(t, p.logOnce, "rapl_thermal_design_power") + mFetcher.AssertExpectations(t) + }) + + t.Run("WithErrors", func(t *testing.T) { + packageID := 0 + currPower := 30.0 + tdp := 80.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting current package power consumption metric. + mFetcher.On("GetCurrentPackagePowerConsumptionWatts", packageID).Return(currPower, nil).Once() + + // mock getting current dram power consumption metric. + mFetcher.On("GetCurrentDramPowerConsumptionWatts", packageID).Return(0.0, errors.New("mock error")).Once() + + // mock getting package thermal design power metric. + mFetcher.On("GetPackageThermalDesignPowerWatts", packageID).Return(tdp, nil).Once() + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + // metrics which rely on rapl + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + + // metrics which do not rely on rapl + packageCPUBaseFrequency, + packageUncoreFrequency, + packageTurboLimit, + }, + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addPerPackageRaplMetrics(acc, packageID) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for package ID %v", packageCurrentDramPowerConsumption, packageID)) + require.Len(t, acc.GetTelegrafMetrics(), 2) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "current_power_consumption_watts": currPower, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "thermal_design_power_watts": tdp, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithoutErrors", func(t *testing.T) { + packageID := 0 + currPower := 10.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting current dram power consumption metric. + mFetcher.On("GetCurrentDramPowerConsumptionWatts", packageID).Return(currPower, nil).Once() + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + // metrics which rely on rapl + packageCurrentDramPowerConsumption, + + // metrics which do not rely on rapl + packageCPUBaseFrequency, + packageUncoreFrequency, + packageTurboLimit, + }, + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addPerPackageRaplMetrics(acc, packageID) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "current_dram_power_consumption_watts": currPower, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddPerPackageMsrMetrics(t *testing.T) { + t.Run("WithoutMsrMetrics", func(t *testing.T) { + packageID := 0 + + acc := &testutil.Accumulator{} + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + // metrics which do not rely on msr + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + }, + } + + p.addPerPackageMsrMetrics(acc, packageID) + + require.Empty(t, acc.Errors) + require.Empty(t, acc.GetTelegrafMetrics()) + }) + + t.Run("WithModuleNotInitializedError", func(t *testing.T) { + packageID := 0 + + acc := &testutil.Accumulator{} + + msrNotInitErr := &ptel.ModuleNotInitializedError{Name: "msr"} + mError := fmt.Errorf("mock error: %w", msrNotInitErr) + mFetcher := &fetcherMock{} + + // mock getting CPU base frequency metric. + mFetcher.On("GetCPUBaseFrequency", packageID).Return(uint64(400), mError).Twice() + + // mock getting max turbo frequency list. + mFetcher.On("GetMaxTurboFreqList", packageID).Return(nil, mError).Twice() + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + // metrics which rely on msr + packageCPUBaseFrequency, + packageTurboLimit, + + // metrics which do not rely on msr + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + }, + + fetcher: mFetcher, + + logOnce: map[string]struct{}{}, + } + + require.NoError(t, p.parseConfig()) + + // First call adds the error to the accumulator and logOnce map. + p.addPerPackageMsrMetrics(acc, packageID) + + // Second call detects previous error in logOnce map and skips adding it to the accumulator. + p.addPerPackageMsrMetrics(acc, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 2) + require.ErrorContains(t, acc.Errors[0], fmt.Sprintf("failed to get %q: %v", packageCPUBaseFrequency, msrNotInitErr)) + require.ErrorContains(t, acc.Errors[1], fmt.Sprintf("failed to get %q: %v", packageTurboLimit, msrNotInitErr)) + + require.Len(t, p.logOnce, 2) + require.Contains(t, p.logOnce, "msr_cpu_base_frequency") + require.Contains(t, p.logOnce, "msr_max_turbo_frequency") + mFetcher.AssertExpectations(t) + }) + + t.Run("WithErrors", func(t *testing.T) { + packageID := 0 + baseFreq := uint64(400) + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting CPU base frequency metric. + mFetcher.On("GetCPUBaseFrequency", packageID).Return(baseFreq, nil).Once() + + // mock getting max turbo frequency list. + mFetcher.On("GetMaxTurboFreqList", packageID).Return(nil, errors.New("mock error")).Once() + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + // metrics which rely on msr + packageCPUBaseFrequency, + packageTurboLimit, + + // metrics which do not rely on msr + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + }, + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addPerPackageMsrMetrics(acc, packageID) + + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for package ID %v", packageTurboLimit, packageID)) + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "cpu_base_frequency_mhz": baseFreq, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithoutErrors", func(t *testing.T) { + packageID := 0 + baseFreq := uint64(400) + maxTurboFreqList := []ptel.MaxTurboFreq{ + { + Value: 1000, + ActiveCores: 10, + }, + } + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting CPU base frequency metric. + mFetcher.On("GetCPUBaseFrequency", packageID).Return(baseFreq, nil).Once() + + // mock getting max turbo frequency list. + mFetcher.On("GetMaxTurboFreqList", packageID).Return(maxTurboFreqList, nil).Once() + + p := &PowerStat{ + PackageMetrics: []packageMetricType{ + // metrics which rely on msr + packageCPUBaseFrequency, + packageTurboLimit, + + // metrics which do not rely on msr + packageCurrentPowerConsumption, + packageCurrentDramPowerConsumption, + packageThermalDesignPower, + }, + + fetcher: mFetcher, + } + + require.NoError(t, p.parseConfig()) + + p.addPerPackageMsrMetrics(acc, packageID) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 2) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "cpu_base_frequency_mhz": baseFreq, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "cpu_base_frequency_mhz": baseFreq, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "max_turbo_frequency_mhz": maxTurboFreqList[0].Value, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "active_cores": strconv.Itoa(int(maxTurboFreqList[0].ActiveCores)), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUFrequency(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU frequency metric. + mFetcher.On("GetCPUFrequency", cpuID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUFrequency(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuFrequency, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + cpuID := 0 + coreID := 1 + packageID := 0 + cpuFreq := 800.001 + cpuFreqExp := 800.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting CPU frequency metric. + mFetcher.On("GetCPUFrequency", cpuID).Return(cpuFreq, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUFrequency(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_core", "cpu_frequency_mhz")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_frequency_mhz": cpuFreqExp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUTemperature(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU temperature metric. + mFetcher.On("GetCPUTemperature", cpuID).Return(uint64(0), errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUTemperature(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuTemperature, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + cpuTemp := uint64(25) + + mFetcher := &fetcherMock{} + + // mock getting cpu temperature metric. + mFetcher.On("GetCPUTemperature", cpuID).Return(cpuTemp, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUTemperature(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasUIntField("powerstat_core", "cpu_temperature_celsius")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_temperature_celsius": cpuTemp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUC0StateResidency(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU C0 state residency metric. + mFetcher.On("GetCPUC0StateResidency", cpuID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC0StateResidency(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuC0StateResidency, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + cpuID := 0 + coreID := 1 + packageID := 0 + c0State := 10.1199 + c0StateExp := 10.12 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting CPU C0 state residency metric. + mFetcher.On("GetCPUC0StateResidency", cpuID).Return(c0State, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC0StateResidency(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_core", "cpu_c0_state_residency_percent")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c0_state_residency_percent": c0StateExp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUC1StateResidency(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU C1 state residency metric. + mFetcher.On("GetCPUC1StateResidency", cpuID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC1StateResidency(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuC1StateResidency, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + c1State := 10.1144 + c1StateExp := 10.11 + + mFetcher := &fetcherMock{} + + // mock getting CPU C1 state residency metric. + mFetcher.On("GetCPUC1StateResidency", cpuID).Return(c1State, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC1StateResidency(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_core", "cpu_c1_state_residency_percent")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c1_state_residency_percent": c1StateExp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUC3StateResidency(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU C3 state residency metric. + mFetcher.On("GetCPUC3StateResidency", cpuID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC3StateResidency(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuC3StateResidency, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + c3State := 20.1178 + c3StateExp := 20.12 + + mFetcher := &fetcherMock{} + + // mock getting CPU C3 state residency metric. + mFetcher.On("GetCPUC3StateResidency", cpuID).Return(c3State, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC3StateResidency(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_core", "cpu_c3_state_residency_percent")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c3_state_residency_percent": c3StateExp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUC6StateResidency(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU C6 state residency metric. + mFetcher.On("GetCPUC6StateResidency", cpuID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC6StateResidency(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuC6StateResidency, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + c6State := 9.115 + c6StateExp := 9.12 + + mFetcher := &fetcherMock{} + + // mock getting CPU C6 state residency metric. + mFetcher.On("GetCPUC6StateResidency", cpuID).Return(c6State, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC6StateResidency(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_core", "cpu_c6_state_residency_percent")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c6_state_residency_percent": c6StateExp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUC7StateResidency(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU C7 state residency metric. + mFetcher.On("GetCPUC7StateResidency", cpuID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC7StateResidency(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuC7StateResidency, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + c7State := 9.1149 + c7StateExp := 9.11 + + mFetcher := &fetcherMock{} + + // mock getting CPU C7 state residency metric. + mFetcher.On("GetCPUC7StateResidency", cpuID).Return(c7State, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC7StateResidency(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_core", "cpu_c7_state_residency_percent")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c7_state_residency_percent": c7StateExp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUBusyFrequency(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU busy frequency metric. + mFetcher.On("GetCPUBusyFrequencyMhz", cpuID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUBusyFrequency(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuBusyFrequency, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + cpuBusyFreq := 800.119 + cpuBusyFreqExp := 800.12 + + mFetcher := &fetcherMock{} + + // mock getting CPU busy frequency metric. + mFetcher.On("GetCPUBusyFrequencyMhz", cpuID).Return(cpuBusyFreq, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUBusyFrequency(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_core", "cpu_busy_frequency_mhz")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_busy_frequency_mhz": cpuBusyFreqExp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUBusyCycles(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU busy cycles metric. + mFetcher.On("GetCPUC0StateResidency", cpuID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUBusyCycles(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuBusyCycles, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + cpuBusyCycles := 10.1149 + cpuBusyCyclesExp := 10.11 + + mFetcher := &fetcherMock{} + + // mock getting CPU C0 state residency metric. + mFetcher.On("GetCPUC0StateResidency", cpuID).Return(cpuBusyCycles, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUBusyCycles(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_core", "cpu_busy_cycles_percent")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_busy_cycles_percent": cpuBusyCyclesExp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUC0SubstateC01Percent(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU C01 metric. + mFetcher.On("GetCPUC0SubstateC01Percent", cpuID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC0SubstateC01Percent(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuC0SubstateC01Percent, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + c01Percent := 5.9229 + c01PercentExp := 5.92 + + mFetcher := &fetcherMock{} + + // mock getting CPU C01 metric. + mFetcher.On("GetCPUC0SubstateC01Percent", cpuID).Return(c01Percent, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC0SubstateC01Percent(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_core", "cpu_c0_substate_c01_percent")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c0_substate_c01_percent": c01PercentExp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUC0SubstateC02Percent(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU C02 metric. + mFetcher.On("GetCPUC0SubstateC02Percent", cpuID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC0SubstateC02Percent(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuC0SubstateC02Percent, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + c02Percent := 0.001 + c02PercentExp := 0.0 + + mFetcher := &fetcherMock{} + + // mock getting CPU C02 metric. + mFetcher.On("GetCPUC0SubstateC02Percent", cpuID).Return(c02Percent, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC0SubstateC02Percent(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_core", "cpu_c0_substate_c02_percent")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c0_substate_c02_percent": c02PercentExp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUC0SubstateC0WaitPercent(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU C0Wait metric. + mFetcher.On("GetCPUC0SubstateC0WaitPercent", cpuID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC0SubstateC0WaitPercent(acc, cpuID, coreID, packageID) + + require.Empty(t, acc.GetTelegrafMetrics(), 0) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for CPU ID %v", cpuC0SubstateC0WaitPercent, cpuID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + cpuID := 0 + coreID := 1 + packageID := 0 + c0WaitPercent := 0.995 + c0WaitPercentExp := 1.0 + + mFetcher := &fetcherMock{} + + // mock getting CPU C0Wait metric. + mFetcher.On("GetCPUC0SubstateC0WaitPercent", cpuID).Return(c0WaitPercent, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUC0SubstateC0WaitPercent(acc, cpuID, coreID, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_core", "cpu_c0_substate_c0_wait_percent")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_core", + // fields + map[string]interface{}{ + "cpu_c0_substate_c0_wait_percent": c0WaitPercentExp, + }, + // tags + map[string]string{ + "cpu_id": strconv.Itoa(cpuID), + "core_id": strconv.Itoa(coreID), + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCurrentPackagePowerConsumption(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting current package power consumption metric. + mFetcher.On("GetCurrentPackagePowerConsumptionWatts", packageID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCurrentPackagePower(acc, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for package ID %v", packageCurrentPowerConsumption, packageID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + packageID := 0 + currPower := float64(30.1999) + currPowerRounded := float64(30.2) + + mFetcher := &fetcherMock{} + + // mock getting current package power consumption metric. + mFetcher.On("GetCurrentPackagePowerConsumptionWatts", packageID).Return(currPower, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCurrentPackagePower(acc, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_package", "current_power_consumption_watts")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "current_power_consumption_watts": currPowerRounded, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCurrentDramPowerConsumption(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting current dram power consumption metric. + mFetcher.On("GetCurrentDramPowerConsumptionWatts", packageID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCurrentDramPower(acc, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for package ID %v", packageCurrentDramPowerConsumption, packageID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + packageID := 0 + currPower := float64(30.8235) + currPowerRounded := float64(30.82) + + mFetcher := &fetcherMock{} + + // mock getting current dram power consumption metric. + mFetcher.On("GetCurrentDramPowerConsumptionWatts", packageID).Return(currPower, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCurrentDramPower(acc, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_package", "current_dram_power_consumption_watts")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "current_dram_power_consumption_watts": currPowerRounded, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddThermalDesignPower(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting package thermal design power metric. + mFetcher.On("GetPackageThermalDesignPowerWatts", packageID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addThermalDesignPower(acc, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for package ID %v", packageThermalDesignPower, packageID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Rounded", func(t *testing.T) { + acc := &testutil.Accumulator{} + + packageID := 0 + tdp := float64(80.1999) + tdpRounded := float64(80.2) + + mFetcher := &fetcherMock{} + + // mock getting package thermal design power metric. + mFetcher.On("GetPackageThermalDesignPowerWatts", packageID).Return(tdp, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addThermalDesignPower(acc, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasFloatField("powerstat_package", "thermal_design_power_watts")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "thermal_design_power_watts": tdpRounded, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddCPUBaseFrequency(t *testing.T) { + t.Run("FailedToGetMetric", func(t *testing.T) { + acc := &testutil.Accumulator{} + + packageID := 0 + + mFetcher := &fetcherMock{} + + // mock getting CPU base frequency metric. + mFetcher.On("GetCPUBaseFrequency", packageID).Return(uint64(0), errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUBaseFrequency(acc, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for package ID %v", packageCPUBaseFrequency, packageID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + acc := &testutil.Accumulator{} + + packageID := 0 + baseFreq := uint64(700) + + mFetcher := &fetcherMock{} + + // mock getting CPU base frequency metric. + mFetcher.On("GetCPUBaseFrequency", packageID).Return(baseFreq, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addCPUBaseFrequency(acc, packageID) + + require.Len(t, acc.GetTelegrafMetrics(), 1) + require.True(t, acc.HasUIntField("powerstat_package", "cpu_base_frequency_mhz")) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "cpu_base_frequency_mhz": baseFreq, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddUncoreFrequency(t *testing.T) { + packageID, dieID := 1, 0 + + t.Run("FailedToGetDieIDs", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting die IDs for package ID. + mFetcher.On("GetPackageDieIDs", packageID).Return(nil, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addUncoreFrequency(acc, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains( + t, + acc.FirstError(), + fmt.Sprintf("failed to get die IDs for package ID %v", packageID), + ) + }) + + t.Run("FailedToGetInitialLimits", func(t *testing.T) { + currMin := 500.0 + currMax := 2500.0 + curr := 1000.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting die IDs for package ID. + mFetcher.On("GetPackageDieIDs", packageID).Return([]int{dieID}, nil).Once() + + // mock getting initial minimum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMin", packageID, dieID).Return(800.0, nil).Once() + + // mock getting initial maximum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMax", packageID, dieID).Return(0.0, errors.New("mock error")).Once() + + // mock getting custom minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", packageID, dieID).Return(currMin, nil).Once() + + // mock getting custom maximum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMax", packageID, dieID).Return(currMax, nil).Once() + + // mock getting current uncore frequency value. + mFetcher.On("GetCurrentUncoreFrequency", packageID, dieID).Return(curr, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addUncoreFrequency(acc, packageID) + + require.Len(t, acc.Errors, 1) + require.ErrorContains( + t, + acc.FirstError(), + fmt.Sprintf("failed to get initial uncore frequency limits for package ID %v and die ID %v", packageID, dieID), + ) + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "uncore_frequency_limit_mhz_min": currMin, + "uncore_frequency_limit_mhz_max": currMax, + "uncore_frequency_mhz_cur": uint64(curr), + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "type": "current", + "die": strconv.Itoa(dieID), + }, + ) + mFetcher.AssertExpectations(t) + }) + + t.Run("FailedToGetCurrentValues", func(t *testing.T) { + initMin := 300.0 + initMax := 1200.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting die IDs for package ID. + mFetcher.On("GetPackageDieIDs", packageID).Return([]int{dieID}, nil).Once() + + // mock getting initial minimum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMin", packageID, dieID).Return(initMin, nil).Once() + + // mock getting initial maximum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMax", packageID, dieID).Return(initMax, nil).Once() + + // mock getting custom minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", packageID, dieID).Return(500.0, nil).Once() + + // mock getting custom maximum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMax", packageID, dieID).Return(1300.0, nil).Once() + + // mock getting current uncore frequency value. + mFetcher.On("GetCurrentUncoreFrequency", packageID, dieID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addUncoreFrequency(acc, packageID) + + require.Len(t, acc.Errors, 1) + require.ErrorContains( + t, + acc.FirstError(), + fmt.Sprintf("failed to get current uncore frequency values for package ID %v and die ID %v", packageID, dieID), + ) + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "uncore_frequency_limit_mhz_min": initMin, + "uncore_frequency_limit_mhz_max": initMax, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "type": "initial", + "die": strconv.Itoa(dieID), + }, + ) + mFetcher.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + initMin := 300.0 + initMax := 1200.0 + currMin := 500.0 + currMax := 2500.0 + curr := 1000.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting die IDs for package ID. + mFetcher.On("GetPackageDieIDs", packageID).Return([]int{dieID}, nil).Once() + + // mock getting initial minimum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMin", packageID, dieID).Return(initMin, nil).Once() + + // mock getting initial maximum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMax", packageID, dieID).Return(initMax, nil).Once() + + // mock getting custom minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", packageID, dieID).Return(currMin, nil).Once() + + // mock getting custom maximum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMax", packageID, dieID).Return(currMax, nil).Once() + + // mock getting current uncore frequency value. + mFetcher.On("GetCurrentUncoreFrequency", packageID, dieID).Return(curr, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addUncoreFrequency(acc, packageID) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 2) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "uncore_frequency_limit_mhz_min": initMin, + "uncore_frequency_limit_mhz_max": initMax, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "type": "initial", + "die": strconv.Itoa(dieID), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "uncore_frequency_limit_mhz_min": currMin, + "uncore_frequency_limit_mhz_max": currMax, + "uncore_frequency_mhz_cur": uint64(curr), + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "type": "current", + "die": strconv.Itoa(dieID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddUncoreFrequencyInitialLimits(t *testing.T) { + packageID, dieID := 0, 0 + + t.Run("WithModuleNotInitializedError", func(t *testing.T) { + acc := &testutil.Accumulator{} + + uncoreFreqErr := &ptel.ModuleNotInitializedError{Name: "uncore_frequency"} + mError := fmt.Errorf("mock error: %w", uncoreFreqErr) + mFetcher := &fetcherMock{} + + // mock getting initial minimum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMin", packageID, dieID).Return(0.0, mError).Twice() + + p := &PowerStat{ + fetcher: mFetcher, + + logOnce: map[string]struct{}{}, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + // First call adds the error to the accumulator and logOnce map. + p.addUncoreFrequencyInitialLimits(acc, packageID, dieID) + + // Second call detects previous error in logOnce map and skips adding it to the accumulator. + p.addUncoreFrequencyInitialLimits(acc, packageID, dieID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q initial limits", packageUncoreFrequency)) + require.ErrorContains(t, acc.FirstError(), uncoreFreqErr.Error()) + + require.Len(t, p.logOnce, 1) + require.Contains(t, p.logOnce, fmt.Sprintf("%s_%s_initial", "uncore_frequency", packageUncoreFrequency)) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithError", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting initial minimum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMin", packageID, dieID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + + logOnce: map[string]struct{}{}, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addUncoreFrequencyInitialLimits(acc, packageID, dieID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains( + t, + acc.FirstError(), + fmt.Sprintf("failed to get initial uncore frequency limits for package ID %v and die ID %v", packageID, dieID), + ) + require.Empty(t, p.logOnce) + mFetcher.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + initMin := 300.0 + initMax := 1200.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting initial minimum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMin", packageID, dieID).Return(initMin, nil).Once() + + // mock getting initial maximum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMax", packageID, dieID).Return(initMax, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addUncoreFrequencyInitialLimits(acc, packageID, dieID) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "uncore_frequency_limit_mhz_min": initMin, + "uncore_frequency_limit_mhz_max": initMax, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "type": "initial", + "die": strconv.Itoa(dieID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddUncoreFrequencyCurrentValues(t *testing.T) { + packageID, dieID := 0, 0 + + t.Run("WithModuleNotInitializedError", func(t *testing.T) { + acc := &testutil.Accumulator{} + + uncoreFreqErr := &ptel.ModuleNotInitializedError{Name: "uncore_frequency"} + mError := fmt.Errorf("mock error: %w", uncoreFreqErr) + mFetcher := &fetcherMock{} + + // mock getting custom minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", packageID, dieID).Return(0.0, mError).Twice() + + p := &PowerStat{ + fetcher: mFetcher, + + logOnce: map[string]struct{}{}, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + // First call adds the error to the accumulator and logOnce map. + p.addUncoreFrequencyCurrentValues(acc, packageID, dieID) + + // Second call detects previous error in logOnce map and skips adding it to the accumulator. + p.addUncoreFrequencyCurrentValues(acc, packageID, dieID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q current value and limits", packageUncoreFrequency)) + require.ErrorContains(t, acc.FirstError(), uncoreFreqErr.Error()) + + require.Len(t, p.logOnce, 1) + require.Contains(t, p.logOnce, fmt.Sprintf("%s_%s_current", "uncore_frequency", packageUncoreFrequency)) + mFetcher.AssertExpectations(t) + }) + + t.Run("WithError", func(t *testing.T) { + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting custom minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", packageID, dieID).Return(0.0, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + + logOnce: map[string]struct{}{}, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addUncoreFrequencyCurrentValues(acc, packageID, dieID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains( + t, + acc.FirstError(), + fmt.Sprintf("failed to get current uncore frequency values for package ID %v and die ID %v", packageID, dieID), + ) + require.Empty(t, p.logOnce) + mFetcher.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + currMin := 500.0 + currMax := 2500.0 + curr := 1000.0 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting custom minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", packageID, dieID).Return(currMin, nil).Once() + + // mock getting custom maximum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMax", packageID, dieID).Return(currMax, nil).Once() + + // mock getting current uncore frequency value. + mFetcher.On("GetCurrentUncoreFrequency", packageID, dieID).Return(curr, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addUncoreFrequencyCurrentValues(acc, packageID, dieID) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 1) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "uncore_frequency_limit_mhz_min": currMin, + "uncore_frequency_limit_mhz_max": currMax, + "uncore_frequency_mhz_cur": uint64(curr), + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "type": "current", + "die": strconv.Itoa(dieID), + }, + ) + mFetcher.AssertExpectations(t) + }) +} + +func TestGetUncoreFreqInitialLimits(t *testing.T) { + packageID, dieID := 0, 0 + + t.Run("FailsToGetInitialMinLimit", func(t *testing.T) { + mFetcher := &fetcherMock{} + + // mock getting initial minimum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMin", packageID, dieID).Return(0.0, errors.New("mock error")).Once() + + initMin, initMax, err := getUncoreFreqInitialLimits(mFetcher, packageID, dieID) + + require.ErrorContains(t, err, "failed to get initial minimum uncore frequency limit") + require.Zero(t, initMin) + require.Zero(t, initMax) + mFetcher.AssertExpectations(t) + }) + + t.Run("FailsToGetInitialMaxLimit", func(t *testing.T) { + mFetcher := &fetcherMock{} + + // mock getting initial minimum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMin", packageID, dieID).Return(800.0, nil).Once() + + // mock getting initial maximum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMax", packageID, dieID).Return(0.0, errors.New("mock error")).Once() + + initMin, initMax, err := getUncoreFreqInitialLimits(mFetcher, packageID, dieID) + + require.ErrorContains(t, err, "failed to get initial maximum uncore frequency limit") + require.Zero(t, initMin) + require.Zero(t, initMax) + mFetcher.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + initMinExp := 300.0 + initMaxExp := 1500.0 + + mFetcher := &fetcherMock{} + + // mock getting initial minimum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMin", packageID, dieID).Return(initMinExp, nil).Once() + + // mock getting initial maximum uncore frequency limit. + mFetcher.On("GetInitialUncoreFrequencyMax", packageID, dieID).Return(initMaxExp, nil).Once() + + initMin, initMax, err := getUncoreFreqInitialLimits(mFetcher, packageID, dieID) + + require.NoError(t, err) + require.Equal(t, initMinExp, initMin) + require.Equal(t, initMaxExp, initMax) + mFetcher.AssertExpectations(t) + }) +} + +func TestGetUncoreFreqCurrentValues(t *testing.T) { + packageID, dieID := 0, 0 + + t.Run("FailsToGetCurrentMinLimit", func(t *testing.T) { + mFetcher := &fetcherMock{} + + // mock getting current minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", packageID, dieID).Return(0.0, errors.New("mock error")).Once() + + values, err := getUncoreFreqCurrentValues(mFetcher, packageID, dieID) + + require.ErrorContains(t, err, "failed to get current minimum uncore frequency limit") + require.Equal(t, uncoreFreqValues{}, values) + mFetcher.AssertExpectations(t) + }) + + t.Run("FailsToGetCurrentMaxLimit", func(t *testing.T) { + mFetcher := &fetcherMock{} + + // mock getting current minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", packageID, dieID).Return(1000.0, nil).Once() + + // mock getting current maximum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMax", packageID, dieID).Return(0.0, errors.New("mock error")).Once() + + values, err := getUncoreFreqCurrentValues(mFetcher, packageID, dieID) + + require.ErrorContains(t, err, "failed to get current maximum uncore frequency limit") + require.Equal(t, uncoreFreqValues{}, values) + mFetcher.AssertExpectations(t) + }) + + t.Run("FailsToGetCurrentValue", func(t *testing.T) { + mFetcher := &fetcherMock{} + + // mock getting custom minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", packageID, dieID).Return(1000.0, nil).Once() + + // mock getting custom maximum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMax", packageID, dieID).Return(2000.0, nil).Once() + + // mock getting current uncore frequency value. + mFetcher.On("GetCurrentUncoreFrequency", packageID, dieID).Return(0.0, errors.New("mock error")).Once() + + values, err := getUncoreFreqCurrentValues(mFetcher, packageID, dieID) + + require.ErrorContains(t, err, "failed to get current uncore frequency") + require.Equal(t, uncoreFreqValues{}, values) + mFetcher.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + minUncore := 500.0 + maxUncore := 1500.0 + current := 750.0 + + uncoreFreqValExp := uncoreFreqValues{ + currMin: minUncore, + currMax: maxUncore, + curr: current, + } + + mFetcher := &fetcherMock{} + + // mock getting custom minimum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMin", packageID, dieID).Return(minUncore, nil).Once() + + // mock getting custom maximum uncore frequency limit. + mFetcher.On("GetCustomizedUncoreFrequencyMax", packageID, dieID).Return(maxUncore, nil).Once() + + // mock getting current uncore frequency value. + mFetcher.On("GetCurrentUncoreFrequency", packageID, dieID).Return(current, nil).Once() + + values, err := getUncoreFreqCurrentValues(mFetcher, packageID, dieID) + + require.NoError(t, err) + require.Equal(t, uncoreFreqValExp, values) + mFetcher.AssertExpectations(t) + }) +} + +func TestAddMaxTurboFreqLimits(t *testing.T) { + t.Run("FailedToGetMetricModuleNotInitializedError", func(t *testing.T) { + packageID := 1 + + acc := &testutil.Accumulator{} + + mErr := &ptel.ModuleNotInitializedError{Name: "msr"} + mFetcher := &fetcherMock{} + + // mock getting max turbo frequency list. + mFetcher.On("GetMaxTurboFreqList", packageID).Return(nil, mErr).Twice() + + p := &PowerStat{ + fetcher: mFetcher, + + logOnce: map[string]struct{}{}, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + // First call adds the error to the accumulator and key to logOnce map. + p.addMaxTurboFreqLimits(acc, packageID) + + // Second call detects previous error in logOnce map and skips adding it to the accumulator. + p.addMaxTurboFreqLimits(acc, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q: %v", packageTurboLimit, mErr)) + + require.Len(t, p.logOnce, 1) + require.Contains(t, p.logOnce, "msr_max_turbo_frequency") + + mFetcher.AssertExpectations(t) + }) + + t.Run("FailedToGetMetric", func(t *testing.T) { + packageID := 1 + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting max turbo frequency list. + mFetcher.On("GetMaxTurboFreqList", packageID).Return(nil, errors.New("mock error")).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addMaxTurboFreqLimits(acc, packageID) + + require.Empty(t, acc.GetTelegrafMetrics()) + require.Len(t, acc.Errors, 1) + require.ErrorContains(t, acc.FirstError(), fmt.Sprintf("failed to get %q for package ID %v", packageTurboLimit, packageID)) + mFetcher.AssertExpectations(t) + }) + + t.Run("CPUIsHybrid", func(t *testing.T) { + packageID := 1 + + maxTurboFreqList := []ptel.MaxTurboFreq{ + { + Value: 1000, + ActiveCores: 10, + Secondary: true, + }, + { + Value: 2000, + ActiveCores: 20, + }, + } + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting max turbo frequency list. + mFetcher.On("GetMaxTurboFreqList", packageID).Return(maxTurboFreqList, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addMaxTurboFreqLimits(acc, packageID) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 2) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "max_turbo_frequency_mhz": maxTurboFreqList[0].Value, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "active_cores": strconv.Itoa(int(maxTurboFreqList[0].ActiveCores)), + "hybrid": "secondary", + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "max_turbo_frequency_mhz": maxTurboFreqList[1].Value, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "active_cores": strconv.Itoa(int(maxTurboFreqList[1].ActiveCores)), + "hybrid": "primary", + }, + ) + mFetcher.AssertExpectations(t) + }) + + t.Run("CPUIsNotHybrid", func(t *testing.T) { + packageID := 1 + + maxTurboFreqList := []ptel.MaxTurboFreq{ + { + Value: 1000, + ActiveCores: 10, + }, + { + Value: 2000, + ActiveCores: 20, + }, + } + + acc := &testutil.Accumulator{} + + mFetcher := &fetcherMock{} + + // mock getting max turbo frequency list. + mFetcher.On("GetMaxTurboFreqList", packageID).Return(maxTurboFreqList, nil).Once() + + p := &PowerStat{ + fetcher: mFetcher, + } + + require.Empty(t, acc.GetTelegrafMetrics()) + + p.addMaxTurboFreqLimits(acc, packageID) + + require.Empty(t, acc.Errors) + require.Len(t, acc.GetTelegrafMetrics(), 2) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "max_turbo_frequency_mhz": maxTurboFreqList[0].Value, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "active_cores": strconv.Itoa(int(maxTurboFreqList[0].ActiveCores)), + }, + ) + acc.AssertContainsTaggedFields( + t, + // measurement + "powerstat_package", + // fields + map[string]interface{}{ + "max_turbo_frequency_mhz": maxTurboFreqList[1].Value, + }, + // tags + map[string]string{ + "package_id": strconv.Itoa(packageID), + "active_cores": strconv.Itoa(int(maxTurboFreqList[1].ActiveCores)), + }, + ) + mFetcher.AssertExpectations(t) + }) +} diff --git a/plugins/inputs/intel_powerstat/metrics.go b/plugins/inputs/intel_powerstat/metrics.go new file mode 100644 index 0000000000000..166459ee1322f --- /dev/null +++ b/plugins/inputs/intel_powerstat/metrics.go @@ -0,0 +1,322 @@ +//go:build linux && amd64 + +package intel_powerstat + +import ( + "errors" + "fmt" + "math" + "strconv" + + ptel "github.com/intel/powertelemetry" + + "github.com/influxdata/telegraf" +) + +// cpuMetricType is an enum type to identify core metrics. +type cpuMetricType int + +// cpuMetricType enum defines supported core metrics. +const ( + // metric relying on cpuFreq + cpuFrequency cpuMetricType = iota + + // metric relying on msr + cpuTemperature + + // metrics relying on msr with storage + cpuC0StateResidency + cpuC1StateResidency + cpuC3StateResidency + cpuC6StateResidency + cpuC7StateResidency + cpuBusyCycles // alias of cpuC0StateResidency + cpuBusyFrequency + + // metrics relying on perf + cpuC0SubstateC01Percent + cpuC0SubstateC02Percent + cpuC0SubstateC0WaitPercent +) + +// Helper method to return a string representation of a core metric. +func (m cpuMetricType) String() string { + switch m { + case cpuFrequency: + return "cpu_frequency" + case cpuTemperature: + return "cpu_temperature" + case cpuBusyFrequency: + return "cpu_busy_frequency" + case cpuC0StateResidency: + return "cpu_c0_state_residency" + case cpuC1StateResidency: + return "cpu_c1_state_residency" + case cpuC3StateResidency: + return "cpu_c3_state_residency" + case cpuC6StateResidency: + return "cpu_c6_state_residency" + case cpuC7StateResidency: + return "cpu_c7_state_residency" + case cpuBusyCycles: + return "cpu_busy_cycles" + case cpuC0SubstateC01Percent: + return "cpu_c0_substate_c01" + case cpuC0SubstateC02Percent: + return "cpu_c0_substate_c02" + case cpuC0SubstateC0WaitPercent: + return "cpu_c0_substate_c0_wait" + } + return "" +} + +// UnmarshalText parses the cpu metric from the TOML config file +func (m *cpuMetricType) UnmarshalText(data []byte) (err error) { + parsedMetric, err := cpuMetricTypeFromString(string(data)) + if err != nil { + return err + } + *m = parsedMetric + return nil +} + +func cpuMetricTypeFromString(metric string) (cpuMetricType, error) { + switch metric { + case "cpu_frequency": + return cpuFrequency, nil + case "cpu_temperature": + return cpuTemperature, nil + case "cpu_busy_frequency": + return cpuBusyFrequency, nil + case "cpu_c0_state_residency": + return cpuC0StateResidency, nil + case "cpu_c1_state_residency": + return cpuC1StateResidency, nil + case "cpu_c3_state_residency": + return cpuC3StateResidency, nil + case "cpu_c6_state_residency": + return cpuC6StateResidency, nil + case "cpu_c7_state_residency": + return cpuC7StateResidency, nil + case "cpu_busy_cycles": + return cpuBusyCycles, nil + case "cpu_c0_substate_c01": + return cpuC0SubstateC01Percent, nil + case "cpu_c0_substate_c02": + return cpuC0SubstateC02Percent, nil + case "cpu_c0_substate_c0_wait": + return cpuC0SubstateC0WaitPercent, nil + } + + return -1, fmt.Errorf("invalid cpu metric specified: %q", metric) +} + +// packageMetricType is an enum type to identify package metrics. +type packageMetricType int + +// packageMetricType enum defines supported package metrics. +const ( + // metrics relying on rapl + packageCurrentPowerConsumption packageMetricType = iota + packageCurrentDramPowerConsumption + packageThermalDesignPower + + // metrics relying on msr + packageCPUBaseFrequency + + // hybrid metric relying on uncoreFreq as a primary mechanism and on msr as fallback mechanism. + packageUncoreFrequency + + // metrics relying on msr + packageTurboLimit +) + +// Helper method to return a string representation of a package metric. +func (m packageMetricType) String() string { + switch m { + case packageCurrentPowerConsumption: + return "current_power_consumption" + case packageCurrentDramPowerConsumption: + return "current_dram_power_consumption" + case packageThermalDesignPower: + return "thermal_design_power" + case packageCPUBaseFrequency: + return "cpu_base_frequency" + case packageUncoreFrequency: + return "uncore_frequency" + case packageTurboLimit: + return "max_turbo_frequency" + } + return "" +} + +// UnmarshalText parses the package metric from the TOML config file +func (m *packageMetricType) UnmarshalText(data []byte) (err error) { + parsedMetric, err := packageMetricTypeFromString(string(data)) + if err != nil { + return err + } + *m = parsedMetric + return nil +} + +func packageMetricTypeFromString(metric string) (packageMetricType, error) { + switch metric { + case "current_power_consumption": + return packageCurrentPowerConsumption, nil + case "current_dram_power_consumption": + return packageCurrentDramPowerConsumption, nil + case "thermal_design_power": + return packageThermalDesignPower, nil + case "cpu_base_frequency": + return packageCPUBaseFrequency, nil + case "uncore_frequency": + return packageUncoreFrequency, nil + case "max_turbo_frequency": + return packageTurboLimit, nil + } + + return -1, fmt.Errorf("invalid package metric specified: %q", metric) +} + +// numeric is a type constraint definition. +type numeric interface { + float64 | uint64 +} + +// metricInfoProvider provides measurement name, fields, and tags needed by the accumulator to add a metric. +type metricInfoProvider interface { + // measurement returns a string with the name of measurement. + measurement() string + + // fields returns a map of string keys with metric name and metric values. + fields() (map[string]interface{}, error) + + // tags returns a map of string key and string value to add additional metric-specific information. + tags() map[string]string + + // name returns the name of a metric. + name() string +} + +// addMetric takes a metricInfoProvider interface and adds metric information to an accumulator. +func addMetric(acc telegraf.Accumulator, m metricInfoProvider, logOnceMap map[string]struct{}) { + fields, err := m.fields() + if err == nil { + acc.AddGauge( + m.measurement(), + fields, + m.tags(), + ) + return + } + + // Always add to the accumulator errors not related to module not initialized. + var moduleErr *ptel.ModuleNotInitializedError + if !errors.As(err, &moduleErr) { + acc.AddError(err) + return + } + + // Add only once module not initialized error related to module and metric name. + logErrorOnce( + acc, + logOnceMap, + fmt.Sprintf("%s_%s", moduleErr.Name, m.name()), + fmt.Errorf("failed to get %q: %w", m.name(), moduleErr), + ) +} + +// metricCommon has metric information common to different types. +type metricCommon struct { + metric interface{} + units string +} + +func (m *metricCommon) name() string { + switch m.metric.(type) { + case cpuMetricType: + return m.metric.(cpuMetricType).String() + case packageMetricType: + return m.metric.(packageMetricType).String() + default: + return "" + } +} + +func (m *metricCommon) measurement() string { + switch m.metric.(type) { + case cpuMetricType: + return "powerstat_core" + case packageMetricType: + return "powerstat_package" + default: + return "" + } +} + +// cpuMetric is a generic type that has the information to identify a CPU-related metric, +// as well as function to retrieve its value at any time. Implements metricAdder interface. +type cpuMetric[T numeric] struct { + metricCommon + + cpuID int + coreID int + packageID int + fetchFn func(cpuID int) (T, error) +} + +func (m *cpuMetric[T]) fields() (map[string]interface{}, error) { + val, err := m.fetchFn(m.cpuID) + if err != nil { + return nil, fmt.Errorf("failed to get %q for CPU ID %v: %w", m.metric, m.cpuID, err) + } + + return map[string]interface{}{ + fmt.Sprintf("%s_%s", m.metric, m.units): round(val), + }, nil +} + +func (m *cpuMetric[T]) tags() map[string]string { + return map[string]string{ + "core_id": strconv.Itoa(m.coreID), + "cpu_id": strconv.Itoa(m.cpuID), + "package_id": strconv.Itoa(m.packageID), + } +} + +// packageMetric is a generic type that has the information to identify a package-related metric, +// as well as the function to retrieve its value at any time. Implements metricAdder interface. +type packageMetric[T numeric] struct { + metricCommon + + packageID int + fetchFn func(packageID int) (T, error) +} + +//nolint:revive // Confusing-naming caused by a generic type that implements this interface method. +func (m *packageMetric[T]) fields() (map[string]interface{}, error) { + val, err := m.fetchFn(m.packageID) + if err != nil { + return nil, fmt.Errorf("failed to get %q for package ID %v: %w", m.metric, m.packageID, err) + } + + return map[string]interface{}{ + fmt.Sprintf("%s_%s", m.metric, m.units): round(val), + }, nil +} + +//nolint:revive // Confusing-naming caused by a generic type that implements this interface method. +func (m *packageMetric[T]) tags() map[string]string { + return map[string]string{ + "package_id": strconv.Itoa(m.packageID), + } +} + +// round returns the result of rounding the argument, only if it's a 64 bit floating-point type. +func round[T numeric](val T) T { + if v, ok := any(val).(float64); ok { + val = T(math.Round(v*100) / 100) + } + return val +} diff --git a/plugins/inputs/intel_powerstat/metrics_test.go b/plugins/inputs/intel_powerstat/metrics_test.go new file mode 100644 index 0000000000000..fd6fa79fbe7c4 --- /dev/null +++ b/plugins/inputs/intel_powerstat/metrics_test.go @@ -0,0 +1,151 @@ +//go:build linux && amd64 + +package intel_powerstat + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCoreMetric_String(t *testing.T) { + testCases := []struct { + name string + metricName string + }{ + { + name: "CPUFrequency", + metricName: "cpu_frequency", + }, + { + name: "CPUTemperature", + metricName: "cpu_temperature", + }, + { + name: "CPUC0StateResidency", + metricName: "cpu_c0_state_residency", + }, + { + name: "CPUC1StateResidency", + metricName: "cpu_c1_state_residency", + }, + { + name: "CPUC3StateResidency", + metricName: "cpu_c3_state_residency", + }, + { + name: "CPUC6StateResidency", + metricName: "cpu_c6_state_residency", + }, + { + name: "CPUC7StateResidency", + metricName: "cpu_c7_state_residency", + }, + { + name: "CPUBusyCycles", + metricName: "cpu_busy_cycles", + }, + { + name: "CPUBusyFrequency", + metricName: "cpu_busy_frequency", + }, + { + name: "CPUC0SubstateC01Percent", + metricName: "cpu_c0_substate_c01", + }, + { + name: "CPUC0SubstateC02Percent", + metricName: "cpu_c0_substate_c02", + }, + { + name: "CPUC0SubstateC0WaitPercent", + metricName: "cpu_c0_substate_c0_wait", + }, + { + name: "Invalid", + metricName: "", + }, + } + + for i, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + metric := cpuMetricType(i) + require.Equal(t, tc.metricName, metric.String()) + }) + } +} + +func TestPackageMetric_String(t *testing.T) { + testCases := []struct { + name string + metricName string + }{ + { + name: "PackageCurrentPowerConsumption", + metricName: "current_power_consumption", + }, + { + name: "PackageCurrentDramPowerConsumption", + metricName: "current_dram_power_consumption", + }, + { + name: "PackageThermalDesignPower", + metricName: "thermal_design_power", + }, + { + name: "PackageCPUBaseFrequency", + metricName: "cpu_base_frequency", + }, + { + name: "PackageUncoreFrequency", + metricName: "uncore_frequency", + }, + { + name: "PackageTurboLimit", + metricName: "max_turbo_frequency", + }, + { + name: "Invalid", + metricName: "", + }, + } + + for i, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + metric := packageMetricType(i) + require.Equal(t, tc.metricName, metric.String()) + }) + } +} + +func TestCPUMetricTypeFromString(t *testing.T) { + t.Run("Valid", func(t *testing.T) { + for m := cpuMetricType(0); m < cpuC0SubstateC0WaitPercent+1; m++ { + val, err := cpuMetricTypeFromString(m.String()) + require.NoError(t, err) + require.Equal(t, m, val) + } + }) + + t.Run("Invalid", func(t *testing.T) { + val, err := cpuMetricTypeFromString("invalid") + require.Error(t, err) + require.Equal(t, cpuMetricType(-1), val) + }) +} + +func TestPackageMetricTypeFromString(t *testing.T) { + t.Run("Valid", func(t *testing.T) { + for m := packageMetricType(0); m < packageTurboLimit+1; m++ { + val, err := packageMetricTypeFromString(m.String()) + require.NoError(t, err) + require.Equal(t, m, val) + } + }) + + t.Run("Invalid", func(t *testing.T) { + val, err := packageMetricTypeFromString("invalid") + require.Error(t, err) + require.Equal(t, packageMetricType(-1), val) + }) +} diff --git a/plugins/inputs/intel_powerstat/msr.go b/plugins/inputs/intel_powerstat/msr.go deleted file mode 100644 index 8d30f954e2830..0000000000000 --- a/plugins/inputs/intel_powerstat/msr.go +++ /dev/null @@ -1,327 +0,0 @@ -//go:build linux - -package intel_powerstat - -import ( - "context" - "fmt" - "io" - "os" - "path/filepath" - "strings" - - "golang.org/x/sync/errgroup" - - "github.com/influxdata/telegraf" -) - -const ( - systemCPUPath = "/sys/devices/system/cpu/" - cpuCurrentFreqPartialPath = "/sys/devices/system/cpu/cpu%s/cpufreq/scaling_cur_freq" - msrPartialPath = "/dev/cpu/%s/msr" - uncoreFreqPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_%s_die_%s/%s%s_freq_khz" - c3StateResidencyLocation = 0x3FC - c6StateResidencyLocation = 0x3FD - c7StateResidencyLocation = 0x3FE - maximumFrequencyClockCountLocation = 0xE7 - actualFrequencyClockCountLocation = 0xE8 - throttleTemperatureLocation = 0x1A2 - temperatureLocation = 0x19C - timestampCounterLocation = 0x10 - turboRatioLimitLocation = 0x1AD - turboRatioLimit1Location = 0x1AE - turboRatioLimit2Location = 0x1AF - atomCoreTurboRatiosLocation = 0x66C - uncorePerfStatusLocation = 0x621 - platformInfo = 0xCE - fsbFreq = 0xCD -) - -const ( - msrTurboRatioLimitString = "MSR_TURBO_RATIO_LIMIT" - msrTurboRatioLimit1String = "MSR_TURBO_RATIO_LIMIT1" - msrTurboRatioLimit2String = "MSR_TURBO_RATIO_LIMIT2" - msrAtomCoreTurboRatiosString = "MSR_ATOM_CORE_TURBO_RATIOS" - msrUncorePerfStatusString = "MSR_UNCORE_PERF_STATUS" - msrPlatformInfoString = "MSR_PLATFORM_INFO" - msrFSBFreqString = "MSR_FSB_FREQ" -) - -// msrService is responsible for interactions with MSR. -type msrService interface { - getCPUCoresData() map[string]*msrData - retrieveCPUFrequencyForCore(core string) (float64, error) - retrieveUncoreFrequency(socketID string, typeFreq string, kind string, die string) (float64, error) - openAndReadMsr(core string) error - readSingleMsr(core string, msr string) (uint64, error) - isMsrLoaded() bool -} - -type msrServiceImpl struct { - cpuCoresData map[string]*msrData - msrOffsets []int64 - fs fileService - log telegraf.Logger -} - -func (m *msrServiceImpl) getCPUCoresData() map[string]*msrData { - return m.cpuCoresData -} - -func (m *msrServiceImpl) isMsrLoaded() bool { - for cpuID := range m.getCPUCoresData() { - err := m.openAndReadMsr(cpuID) - if err == nil { - return true - } - } - return false -} -func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, error) { - cpuFreqPath := fmt.Sprintf(cpuCurrentFreqPartialPath, core) - err := checkFile(cpuFreqPath) - if err != nil { - return 0, err - } - cpuFreqFile, err := os.Open(cpuFreqPath) - if err != nil { - return 0, fmt.Errorf("error opening scaling_cur_freq file on path %q: %w", cpuFreqPath, err) - } - defer cpuFreqFile.Close() - - cpuFreq, _, err := m.fs.readFileToFloat64(cpuFreqFile) - return convertKiloHertzToMegaHertz(cpuFreq), err -} - -func (m *msrServiceImpl) retrieveUncoreFrequency(socketID string, typeFreq string, kind string, die string) (float64, error) { - uncoreFreqPath, err := createUncoreFreqPath(socketID, typeFreq, kind, die) - if err != nil { - return 0, fmt.Errorf("unable to create uncore freq read path for socketID %q, and frequency type %q: %w", socketID, typeFreq, err) - } - err = checkFile(uncoreFreqPath) - if err != nil { - return 0, err - } - uncoreFreqFile, err := os.Open(uncoreFreqPath) - if err != nil { - return 0, fmt.Errorf("error opening uncore frequncy file on %q: %w", uncoreFreqPath, err) - } - defer uncoreFreqFile.Close() - - uncoreFreq, _, err := m.fs.readFileToFloat64(uncoreFreqFile) - return convertKiloHertzToMegaHertz(uncoreFreq), err -} - -func createUncoreFreqPath(socketID string, typeFreq string, kind string, die string) (string, error) { - if socketID >= "0" && socketID <= "9" { - socketID = fmt.Sprintf("0%s", socketID) - } - if die >= "0" && die <= "9" { - die = fmt.Sprintf("0%s", die) - } - var prefix string - - switch typeFreq { - case "initial": - prefix = "initial_" - case "current": - prefix = "" - default: - return "", fmt.Errorf("unknown frequency type %s, only 'initial' and 'current' are supported", typeFreq) - } - - if kind != "min" && kind != "max" { - return "", fmt.Errorf("unknown frequency type %s, only 'min' and 'max' are supported", kind) - } - return fmt.Sprintf(uncoreFreqPath, socketID, die, prefix, kind), nil -} - -func (m *msrServiceImpl) openAndReadMsr(core string) error { - path := fmt.Sprintf(msrPartialPath, core) - err := checkFile(path) - if err != nil { - return err - } - msrFile, err := os.Open(path) - if err != nil { - return fmt.Errorf("error opening MSR file on path %q: %w", path, err) - } - defer msrFile.Close() - - err = m.readDataFromMsr(core, msrFile) - if err != nil { - return fmt.Errorf("error reading data from MSR for core %q: %w", core, err) - } - return nil -} - -func (m *msrServiceImpl) readSingleMsr(core string, msr string) (uint64, error) { - path := fmt.Sprintf(msrPartialPath, core) - err := checkFile(path) - if err != nil { - return 0, err - } - msrFile, err := os.Open(path) - if err != nil { - return 0, fmt.Errorf("error opening MSR file on path %q: %w", path, err) - } - defer msrFile.Close() - - var msrAddress int64 - switch msr { - case msrTurboRatioLimitString: - msrAddress = turboRatioLimitLocation - case msrTurboRatioLimit1String: - msrAddress = turboRatioLimit1Location - case msrTurboRatioLimit2String: - msrAddress = turboRatioLimit2Location - case msrAtomCoreTurboRatiosString: - msrAddress = atomCoreTurboRatiosLocation - case msrUncorePerfStatusString: - msrAddress = uncorePerfStatusLocation - case msrPlatformInfoString: - msrAddress = platformInfo - case msrFSBFreqString: - msrAddress = fsbFreq - default: - return 0, fmt.Errorf("incorrect name of MSR %s", msr) - } - - value, err := m.fs.readFileAtOffsetToUint64(msrFile, msrAddress) - if err != nil { - return 0, err - } - - return value, nil -} - -func (m *msrServiceImpl) readDataFromMsr(core string, reader io.ReaderAt) error { - g, ctx := errgroup.WithContext(context.Background()) - - // Create and populate a map that contains msr offsets along with their respective channels - msrOffsetsWithChannels := make(map[int64]chan uint64) - for _, offset := range m.msrOffsets { - msrOffsetsWithChannels[offset] = make(chan uint64) - } - - // Start a goroutine for each msr offset - for offset, channel := range msrOffsetsWithChannels { - // Wrap around function to avoid race on loop counter - func(off int64, ch chan uint64) { - g.Go(func() error { - defer close(ch) - - err := m.readValueFromFileAtOffset(ctx, ch, reader, off) - if err != nil { - return fmt.Errorf("error reading MSR file: %w", err) - } - - return nil - }) - }(offset, channel) - } - - newC3 := <-msrOffsetsWithChannels[c3StateResidencyLocation] - newC6 := <-msrOffsetsWithChannels[c6StateResidencyLocation] - newC7 := <-msrOffsetsWithChannels[c7StateResidencyLocation] - newMperf := <-msrOffsetsWithChannels[maximumFrequencyClockCountLocation] - newAperf := <-msrOffsetsWithChannels[actualFrequencyClockCountLocation] - newTsc := <-msrOffsetsWithChannels[timestampCounterLocation] - newThrottleTemp := <-msrOffsetsWithChannels[throttleTemperatureLocation] - newTemp := <-msrOffsetsWithChannels[temperatureLocation] - - if err := g.Wait(); err != nil { - return fmt.Errorf("received error during reading MSR values in goroutines: %w", err) - } - - m.cpuCoresData[core].c3Delta = newC3 - m.cpuCoresData[core].c3 - m.cpuCoresData[core].c6Delta = newC6 - m.cpuCoresData[core].c6 - m.cpuCoresData[core].c7Delta = newC7 - m.cpuCoresData[core].c7 - m.cpuCoresData[core].mperfDelta = newMperf - m.cpuCoresData[core].mperf - m.cpuCoresData[core].aperfDelta = newAperf - m.cpuCoresData[core].aperf - m.cpuCoresData[core].timeStampCounterDelta = newTsc - m.cpuCoresData[core].timeStampCounter - - m.cpuCoresData[core].c3 = newC3 - m.cpuCoresData[core].c6 = newC6 - m.cpuCoresData[core].c7 = newC7 - m.cpuCoresData[core].mperf = newMperf - m.cpuCoresData[core].aperf = newAperf - m.cpuCoresData[core].timeStampCounter = newTsc - // MSR (1A2h) IA32_TEMPERATURE_TARGET bits 23:16. - m.cpuCoresData[core].throttleTemp = int64((newThrottleTemp >> 16) & 0xFF) - // MSR (19Ch) IA32_THERM_STATUS bits 22:16. - m.cpuCoresData[core].temp = int64((newTemp >> 16) & 0x7F) - - return nil -} - -func (m *msrServiceImpl) readValueFromFileAtOffset(ctx context.Context, ch chan uint64, reader io.ReaderAt, offset int64) error { - value, err := m.fs.readFileAtOffsetToUint64(reader, offset) - if err != nil { - return err - } - - // Detect context cancellation and return an error if other goroutine fails - select { - case <-ctx.Done(): - return ctx.Err() - case ch <- value: - } - - return nil -} - -// setCPUCores initialize cpuCoresData map. -func (m *msrServiceImpl) setCPUCores() error { - m.cpuCoresData = make(map[string]*msrData) - cpuPrefix := "cpu" - cpuCore := fmt.Sprintf("%s%s", cpuPrefix, "[0-9]*") - cpuCorePattern := fmt.Sprintf("%s/%s", systemCPUPath, cpuCore) - cpuPaths, err := m.fs.getStringsMatchingPatternOnPath(cpuCorePattern) - if err != nil { - return err - } - if len(cpuPaths) == 0 { - m.log.Debugf("CPU core data wasn't found using pattern: %s", cpuCorePattern) - return nil - } - - for _, cpuPath := range cpuPaths { - core := strings.TrimPrefix(filepath.Base(cpuPath), cpuPrefix) - m.cpuCoresData[core] = &msrData{ - mperf: 0, - aperf: 0, - timeStampCounter: 0, - c3: 0, - c6: 0, - c7: 0, - throttleTemp: 0, - temp: 0, - mperfDelta: 0, - aperfDelta: 0, - timeStampCounterDelta: 0, - c3Delta: 0, - c6Delta: 0, - c7Delta: 0, - } - } - - return nil -} - -func newMsrServiceWithFs(logger telegraf.Logger, fs fileService) *msrServiceImpl { - msrService := &msrServiceImpl{ - fs: fs, - log: logger, - } - err := msrService.setCPUCores() - if err != nil { - // This error does not prevent plugin from working thus it is not returned. - msrService.log.Error(err) - } - - msrService.msrOffsets = []int64{c3StateResidencyLocation, c6StateResidencyLocation, c7StateResidencyLocation, - maximumFrequencyClockCountLocation, actualFrequencyClockCountLocation, timestampCounterLocation, - throttleTemperatureLocation, temperatureLocation} - return msrService -} diff --git a/plugins/inputs/intel_powerstat/msr_mock_test.go b/plugins/inputs/intel_powerstat/msr_mock_test.go deleted file mode 100644 index f4b99f0e6dfcf..0000000000000 --- a/plugins/inputs/intel_powerstat/msr_mock_test.go +++ /dev/null @@ -1,134 +0,0 @@ -//go:build linux - -// Code generated by mockery v2.12.3. DO NOT EDIT. - -package intel_powerstat - -import mock "github.com/stretchr/testify/mock" - -// mockMsrService is an autogenerated mock type for the mockMsrService type -type mockMsrService struct { - mock.Mock -} - -// isMsrLoaded provides a mock function with given fields: -func (_m *mockMsrService) isMsrLoaded() bool { - ret := _m.Called() - - var r0 bool - if rf, ok := ret.Get(0).(func() bool); ok { - r0 = rf() - } else { - r0 = ret.Get(0).(bool) - } - - return r0 -} - -// getCPUCoresData provides a mock function with given fields: -func (_m *mockMsrService) getCPUCoresData() map[string]*msrData { - ret := _m.Called() - - var r0 map[string]*msrData - if rf, ok := ret.Get(0).(func() map[string]*msrData); ok { - r0 = rf() - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(map[string]*msrData) - } - } - - return r0 -} - -// openAndReadMsr provides a mock function with given fields: core -func (_m *mockMsrService) openAndReadMsr(core string) error { - ret := _m.Called(core) - - var r0 error - if rf, ok := ret.Get(0).(func(string) error); ok { - r0 = rf(core) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// readSingleMsr provides a mock function with given fields: core, msr -func (_m *mockMsrService) readSingleMsr(core string, msr string) (uint64, error) { - ret := _m.Called(core, msr) - - var r0 uint64 - if rf, ok := ret.Get(0).(func(string, string) uint64); ok { - r0 = rf(core, msr) - } else { - r0 = ret.Get(0).(uint64) - } - - var r1 error - if rf, ok := ret.Get(1).(func(string, string) error); ok { - r1 = rf(core, msr) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// retrieveCPUFrequencyForCore provides a mock function with given fields: core -func (_m *mockMsrService) retrieveCPUFrequencyForCore(core string) (float64, error) { - ret := _m.Called(core) - - var r0 float64 - if rf, ok := ret.Get(0).(func(string) float64); ok { - r0 = rf(core) - } else { - r0 = ret.Get(0).(float64) - } - - var r1 error - if rf, ok := ret.Get(1).(func(string) error); ok { - r1 = rf(core) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// retrieveUncoreFrequency provides a mock function with given fields: socketID, typeFreq, kind, die -func (_m *mockMsrService) retrieveUncoreFrequency(socketID string, typeFreq string, kind string, die string) (float64, error) { - ret := _m.Called(socketID, typeFreq, kind, die) - - var r0 float64 - if rf, ok := ret.Get(0).(func(string, string, string, string) float64); ok { - r0 = rf(socketID, typeFreq, kind, die) - } else { - r0 = ret.Get(0).(float64) - } - - var r1 error - if rf, ok := ret.Get(1).(func(string, string, string, string) error); ok { - r1 = rf(socketID, typeFreq, kind, die) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -type newmockMsrServiceT interface { - mock.TestingT - Cleanup(func()) -} - -// newmockMsrService creates a new instance of mockMsrService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func newmockMsrService(t newmockMsrServiceT) *mockMsrService { - mock := &mockMsrService{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/plugins/inputs/intel_powerstat/msr_test.go b/plugins/inputs/intel_powerstat/msr_test.go deleted file mode 100644 index c9e61187779e1..0000000000000 --- a/plugins/inputs/intel_powerstat/msr_test.go +++ /dev/null @@ -1,188 +0,0 @@ -//go:build linux - -package intel_powerstat - -import ( - "context" - "errors" - "strings" - "testing" - - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - - "github.com/influxdata/telegraf/testutil" -) - -func TestReadDataFromMsrPositive(t *testing.T) { - firstValue := uint64(1000000) - secondValue := uint64(5000000) - delta := secondValue - firstValue - cpuCores := []string{"cpu0", "cpu1"} - msr, fsMock := getMsrServiceWithMockedFs() - prepareTestData(fsMock, cpuCores, msr, t) - cores := trimCPUFromCores(cpuCores) - - methodCallNumberForFirstValue := len(msr.msrOffsets) * len(cores) - methodCallNumberForSecondValue := methodCallNumberForFirstValue * 2 - - fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything). - Return(firstValue, nil).Times(methodCallNumberForFirstValue) - for _, core := range cores { - require.NoError(t, msr.readDataFromMsr(core, nil)) - } - fsMock.AssertNumberOfCalls(t, "readFileAtOffsetToUint64", methodCallNumberForFirstValue) - verifyCPUCoresData(cores, t, msr, firstValue, false, 0) - - fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything). - Return(secondValue, nil).Times(methodCallNumberForFirstValue) - for _, core := range cores { - require.NoError(t, msr.readDataFromMsr(core, nil)) - } - fsMock.AssertNumberOfCalls(t, "readFileAtOffsetToUint64", methodCallNumberForSecondValue) - verifyCPUCoresData(cores, t, msr, secondValue, true, delta) -} - -func trimCPUFromCores(cpuCores []string) []string { - cores := make([]string, 0) - for _, core := range cpuCores { - cores = append(cores, strings.TrimPrefix(core, "cpu")) - } - return cores -} - -func TestReadDataFromMsrNegative(t *testing.T) { - firstValue := uint64(1000000) - cpuCores := []string{"cpu0", "cpu1"} - msr, fsMock := getMsrServiceWithMockedFs() - - prepareTestData(fsMock, cpuCores, msr, t) - cores := trimCPUFromCores(cpuCores) - - methodCallNumberPerCore := len(msr.msrOffsets) - - // Normal execution for first core. - fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything). - Return(firstValue, nil).Times(methodCallNumberPerCore). - // Fail to read file for second core. - On("readFileAtOffsetToUint64", mock.Anything, mock.Anything). - Return(uint64(0), errors.New("error reading file")).Times(methodCallNumberPerCore) - - require.NoError(t, msr.readDataFromMsr(cores[0], nil)) - require.Error(t, msr.readDataFromMsr(cores[1], nil)) -} - -func TestReadValueFromFileAtOffset(t *testing.T) { - cores := []string{"cpu0", "cpu1"} - msr, fsMock := getMsrServiceWithMockedFs() - ctx := context.Background() - testChannel := make(chan uint64, 1) - defer close(testChannel) - zero := uint64(0) - - prepareTestData(fsMock, cores, msr, t) - - fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything). - Return(zero, errors.New("error reading file")).Once() - require.Error(t, msr.readValueFromFileAtOffset(ctx, testChannel, nil, 0)) - - fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything). - Return(zero, nil).Once() - require.NoError(t, msr.readValueFromFileAtOffset(ctx, testChannel, nil, 0)) - require.Equal(t, zero, <-testChannel) -} - -func TestCreateUncoreFreqPath(t *testing.T) { - path, err := createUncoreFreqPath("0", "initial", "min", "0") - expectedPath := "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/initial_min_freq_khz" - require.NoError(t, err) - require.Equal(t, expectedPath, path) - - path, err = createUncoreFreqPath("0", "initial", "max", "0") - expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/initial_max_freq_khz" - require.NoError(t, err) - require.Equal(t, expectedPath, path) - - path, err = createUncoreFreqPath("0", "current", "min", "0") - expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/min_freq_khz" - require.NoError(t, err) - require.Equal(t, expectedPath, path) - - path, err = createUncoreFreqPath("0", "current", "max", "0") - expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/max_freq_khz" - require.NoError(t, err) - require.Equal(t, expectedPath, path) - - path, err = createUncoreFreqPath("9", "current", "max", "0") - expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_09_die_00/max_freq_khz" - require.NoError(t, err) - require.Equal(t, expectedPath, path) - - path, err = createUncoreFreqPath("99", "current", "max", "0") - expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_99_die_00/max_freq_khz" - require.NoError(t, err) - require.Equal(t, expectedPath, path) - - path, err = createUncoreFreqPath("0", "current", "max", "9") - expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_09/max_freq_khz" - require.NoError(t, err) - require.Equal(t, expectedPath, path) - - path, err = createUncoreFreqPath("0", "current", "max", "99") - expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_99/max_freq_khz" - require.NoError(t, err) - require.Equal(t, expectedPath, path) - - path, err = createUncoreFreqPath("0", "foo", "max", "0") - expectedPath = "" - expectedError := errors.New("unknown frequency type foo, only 'initial' and 'current' are supported") - require.Equal(t, expectedError, err) - require.Equal(t, expectedPath, path) - - path, err = createUncoreFreqPath("0", "current", "bar", "0") - expectedPath = "" - expectedError = errors.New("unknown frequency type bar, only 'min' and 'max' are supported") - require.Equal(t, expectedError, err) - require.Equal(t, expectedPath, path) -} - -func prepareTestData(fsMock *mockFileService, cores []string, msr *msrServiceImpl, t *testing.T) { - // Prepare MSR offsets and CPUCoresData for test. - fsMock.On("getStringsMatchingPatternOnPath", mock.Anything). - Return(cores, nil).Once() - require.NoError(t, msr.setCPUCores()) - fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything) -} - -func verifyCPUCoresData(cores []string, t *testing.T, msr *msrServiceImpl, expectedValue uint64, verifyDelta bool, delta uint64) { - for _, core := range cores { - require.Equal(t, expectedValue, msr.cpuCoresData[core].c3) - require.Equal(t, expectedValue, msr.cpuCoresData[core].c6) - require.Equal(t, expectedValue, msr.cpuCoresData[core].c7) - require.Equal(t, expectedValue, msr.cpuCoresData[core].mperf) - require.Equal(t, expectedValue, msr.cpuCoresData[core].aperf) - require.Equal(t, expectedValue, msr.cpuCoresData[core].timeStampCounter) - require.Equal(t, int64((expectedValue>>16)&0xFF), msr.cpuCoresData[core].throttleTemp) - require.Equal(t, int64((expectedValue>>16)&0x7F), msr.cpuCoresData[core].temp) - - if verifyDelta { - require.Equal(t, delta, msr.cpuCoresData[core].c3Delta) - require.Equal(t, delta, msr.cpuCoresData[core].c6Delta) - require.Equal(t, delta, msr.cpuCoresData[core].c7Delta) - require.Equal(t, delta, msr.cpuCoresData[core].mperfDelta) - require.Equal(t, delta, msr.cpuCoresData[core].aperfDelta) - require.Equal(t, delta, msr.cpuCoresData[core].timeStampCounterDelta) - } - } -} - -func getMsrServiceWithMockedFs() (*msrServiceImpl, *mockFileService) { - cores := []string{"cpu0", "cpu1", "cpu2", "cpu3"} - logger := testutil.Logger{Name: "PowerPluginTest"} - fsMock := &mockFileService{} - fsMock.On("getStringsMatchingPatternOnPath", mock.Anything). - Return(cores, nil).Once() - msr := newMsrServiceWithFs(logger, fsMock) - - return msr, fsMock -} diff --git a/plugins/inputs/intel_powerstat/options.go b/plugins/inputs/intel_powerstat/options.go new file mode 100644 index 0000000000000..7e422b26bc01f --- /dev/null +++ b/plugins/inputs/intel_powerstat/options.go @@ -0,0 +1,184 @@ +//go:build linux && amd64 + +package intel_powerstat + +import ( + "slices" + "time" + + ptel "github.com/intel/powertelemetry" + + "github.com/influxdata/telegraf" +) + +// optConfig represents plugin configuration fields needed to generate options. +type optConfig struct { + cpuMetrics []cpuMetricType + packageMetrics []packageMetricType + includedCPUs []int + excludedCPUs []int + perfEventFile string + msrReadTimeout time.Duration + log telegraf.Logger +} + +// optionGenerator takes a struct with the plugin configuration, and generates options +// needed to gather metrics. +type optionGenerator interface { + generate(cfg optConfig) []ptel.Option +} + +// optGenerator implements optionGenerator interface. +type optGenerator struct{} + +// generate takes plugin configuration options and generates options needed +// to gather requested metrics. +func (g *optGenerator) generate(cfg optConfig) []ptel.Option { + opts := make([]ptel.Option, 0) + if len(cfg.includedCPUs) != 0 { + opts = append(opts, ptel.WithIncludedCPUs(cfg.includedCPUs)) + } + + if len(cfg.excludedCPUs) != 0 { + opts = append(opts, ptel.WithExcludedCPUs(cfg.excludedCPUs)) + } + + if needsMsrCPU(cfg.cpuMetrics) || needsMsrPackage(cfg.packageMetrics) { + if cfg.msrReadTimeout == 0 { + opts = append(opts, ptel.WithMsr()) + } else { + opts = append(opts, ptel.WithMsrTimeout(cfg.msrReadTimeout)) + } + } + + if needsRapl(cfg.packageMetrics) { + opts = append(opts, ptel.WithRapl()) + } + + if needsCoreFreq(cfg.cpuMetrics) { + opts = append(opts, ptel.WithCoreFrequency()) + } + + if needsUncoreFreq(cfg.packageMetrics) { + opts = append(opts, ptel.WithUncoreFrequency()) + } + + if needsPerf(cfg.cpuMetrics) { + opts = append(opts, ptel.WithPerf(cfg.perfEventFile)) + } + + if cfg.log != nil { + opts = append(opts, ptel.WithLogger(cfg.log)) + } + + return opts +} + +// needsMsr takes a slice of strings, representing supported metrics, and +// returns true if any relies on msr registers. +func needsMsrCPU(metrics []cpuMetricType) bool { + for _, m := range metrics { + switch m { + case cpuTemperature: + case cpuC0StateResidency: + case cpuC1StateResidency: + case cpuC3StateResidency: + case cpuC6StateResidency: + case cpuC7StateResidency: + case cpuBusyCycles: + case cpuBusyFrequency: + default: + continue + } + + return true + } + return false +} + +// needsMsrPackage takes a slice of strings, representing supported metrics, and +// returns true if any relies on msr registers. +func needsMsrPackage(metrics []packageMetricType) bool { + for _, m := range metrics { + switch m { + case packageCPUBaseFrequency: + case packageTurboLimit: + case packageUncoreFrequency: + // Fallback mechanism retrieves this metric from MSR registers. + default: + continue + } + + return true + } + return false +} + +// needsTimeRelatedMsr takes a slice of strings, representing supported metrics, and +// returns true if any relies on time-related reads of msr registers. +func needsTimeRelatedMsr(metrics []cpuMetricType) bool { + for _, m := range metrics { + switch m { + case cpuC0StateResidency: + case cpuC1StateResidency: + case cpuC3StateResidency: + case cpuC6StateResidency: + case cpuC7StateResidency: + case cpuBusyCycles: + case cpuBusyFrequency: + default: + continue + } + + return true + } + return false +} + +// needsRapl takes a slice of strings, representing supported metrics, and +// returns true if any relies on intel-rapl control zone. +func needsRapl(metrics []packageMetricType) bool { + for _, m := range metrics { + switch m { + case packageCurrentPowerConsumption: + case packageCurrentDramPowerConsumption: + case packageThermalDesignPower: + default: + continue + } + + return true + } + return false +} + +// needsCoreFreq takes a slice of strings, representing supported metrics, and +// returns true if any relies on sysfs "/sys/devices/system/cpu/" with global and +// individual CPU attributes. +func needsCoreFreq(metrics []cpuMetricType) bool { + return slices.Contains(metrics, cpuFrequency) +} + +// needsUncoreFreq takes a slice of strings, representing supported metrics, and returns +// true if any relies on sysfs interface "/sys/devices/system/cpu/intel_uncore_frequency/" +// provided by intel_uncore_frequency kernel module. +func needsUncoreFreq(metrics []packageMetricType) bool { + return slices.Contains(metrics, packageUncoreFrequency) +} + +// needsPerf takes a slice of strings, representing supported metrics, and +// returns true if any relies on perf_events interface. +func needsPerf(metrics []cpuMetricType) bool { + for _, m := range metrics { + switch m { + case cpuC0SubstateC01Percent: + case cpuC0SubstateC02Percent: + case cpuC0SubstateC0WaitPercent: + default: + continue + } + + return true + } + return false +} diff --git a/plugins/inputs/intel_powerstat/options_test.go b/plugins/inputs/intel_powerstat/options_test.go new file mode 100644 index 0000000000000..a6def747a25ff --- /dev/null +++ b/plugins/inputs/intel_powerstat/options_test.go @@ -0,0 +1,305 @@ +//go:build linux && amd64 + +package intel_powerstat + +import ( + "reflect" + "runtime" + "strings" + "testing" + "time" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/require" +) + +func TestGenerate(t *testing.T) { + t.Run("NoCPUsSpecified", func(t *testing.T) { + g := &optGenerator{} + opts := g.generate(optConfig{ + cpuMetrics: []cpuMetricType{ + cpuFrequency, // needs coreFreq + cpuC0SubstateC01Percent, // needs perf + }, + packageMetrics: []packageMetricType{ + packageCurrentPowerConsumption, // needs rapl + packageUncoreFrequency, // needs uncoreFreq and msr + }, + }) + + require.Len(t, opts, 5) + }) + + t.Run("ExcludedCPUs", func(t *testing.T) { + g := &optGenerator{} + opts := g.generate(optConfig{ + excludedCPUs: []int{0, 1, 2, 3}, + cpuMetrics: []cpuMetricType{ + // needs msr + cpuTemperature, + }, + packageMetrics: []packageMetricType{ + // needs rapl + packageCurrentPowerConsumption, + }, + }) + + require.Len(t, opts, 3) + }) + + t.Run("IncludedCPUs", func(t *testing.T) { + g := &optGenerator{} + opts := g.generate(optConfig{ + includedCPUs: []int{0, 1, 2, 3}, + cpuMetrics: []cpuMetricType{ + cpuFrequency, // needs coreFreq + cpuC0SubstateC0WaitPercent, // needs perf + }, + packageMetrics: []packageMetricType{ + packageTurboLimit, // needs msr + packageCurrentDramPowerConsumption, // needs rapl + packageUncoreFrequency, // needs uncoreFreq + }, + }) + + require.Len(t, opts, 6) + }) + + t.Run("WithMsrTimeout", func(t *testing.T) { + g := &optGenerator{} + opts := g.generate(optConfig{ + cpuMetrics: []cpuMetricType{ + cpuTemperature, + }, + msrReadTimeout: time.Second, + }) + + require.Len(t, opts, 1) + + withMsrTimeoutUsed := false + for _, opt := range opts { + if strings.Contains(runtime.FuncForPC(reflect.ValueOf(opt).Pointer()).Name(), ".WithMsrTimeout.") { + withMsrTimeoutUsed = true + continue + } + } + require.True(t, withMsrTimeoutUsed, "WithMsrTimeout wasn't included in the generated options") + }) + + t.Run("WithMsr", func(t *testing.T) { + g := &optGenerator{} + opts := g.generate(optConfig{ + cpuMetrics: []cpuMetricType{ + cpuC7StateResidency, + }, + msrReadTimeout: 0, //timeout disabled + }) + + require.Len(t, opts, 1) + + withMsrUsed := false + for _, opt := range opts { + if strings.Contains(runtime.FuncForPC(reflect.ValueOf(opt).Pointer()).Name(), ".WithMsr.") { + withMsrUsed = true + continue + } + } + require.True(t, withMsrUsed, "WithMsr wasn't included in the generated options") + }) + + t.Run("WithLogger", func(t *testing.T) { + g := &optGenerator{} + opts := g.generate(optConfig{ + cpuMetrics: []cpuMetricType{ + cpuC3StateResidency, + }, + log: &testutil.Logger{}, + }) + + require.Len(t, opts, 2) + + withLoggerUsed := false + for _, opt := range opts { + if strings.Contains(runtime.FuncForPC(reflect.ValueOf(opt).Pointer()).Name(), ".WithLogger.") { + withLoggerUsed = true + continue + } + } + require.True(t, withLoggerUsed, "WithLogger wasn't included in the generated options") + }) +} + +func TestNeedsMsrPackage(t *testing.T) { + packageMetrics := []packageMetricType{ + packageThermalDesignPower, // needs rapl + packageCurrentDramPowerConsumption, // needs rapl + packageMetricType(420), + } + + t.Run("False", func(t *testing.T) { + require.False(t, needsMsrPackage(packageMetrics)) + }) + + t.Run("True", func(t *testing.T) { + t.Run("CPUBaseFreq", func(t *testing.T) { + packageMetrics[len(packageMetrics)-1] = packageCPUBaseFrequency + require.True(t, needsMsrPackage(packageMetrics)) + }) + + t.Run("PackageTurboLimit", func(t *testing.T) { + packageMetrics[len(packageMetrics)-1] = packageTurboLimit + require.True(t, needsMsrPackage(packageMetrics)) + }) + + t.Run("PackageUncoreFrequency", func(t *testing.T) { + packageMetrics[len(packageMetrics)-1] = packageUncoreFrequency + require.True(t, needsMsrPackage(packageMetrics)) + }) + }) +} + +func TestNeedsMsrCPU(t *testing.T) { + cpuMetrics := []cpuMetricType{ + cpuFrequency, // needs cpuFreq + cpuC0SubstateC01Percent, // needs perf + } + + t.Run("False", func(t *testing.T) { + require.False(t, needsMsrCPU(cpuMetrics)) + }) + + t.Run("True", func(t *testing.T) { + t.Run("CPUTemperature", func(t *testing.T) { + cpuMetrics[len(cpuMetrics)-1] = cpuTemperature + require.True(t, needsMsrCPU(cpuMetrics)) + }) + + t.Run("CPUC0StateResidency", func(t *testing.T) { + cpuMetrics[len(cpuMetrics)-1] = cpuC0StateResidency + require.True(t, needsMsrCPU(cpuMetrics)) + }) + + t.Run("CPUC1StateResidency", func(t *testing.T) { + cpuMetrics[len(cpuMetrics)-1] = cpuC1StateResidency + require.True(t, needsMsrCPU(cpuMetrics)) + }) + + t.Run("CPUC3StateResidency", func(t *testing.T) { + cpuMetrics[len(cpuMetrics)-1] = cpuC3StateResidency + require.True(t, needsMsrCPU(cpuMetrics)) + }) + + t.Run("CPUC6StateResidency", func(t *testing.T) { + cpuMetrics[len(cpuMetrics)-1] = cpuC6StateResidency + require.True(t, needsMsrCPU(cpuMetrics)) + }) + + t.Run("CPUC7StateResidency", func(t *testing.T) { + cpuMetrics[len(cpuMetrics)-1] = cpuC7StateResidency + require.True(t, needsMsrCPU(cpuMetrics)) + }) + + t.Run("CPUBusyCycles", func(t *testing.T) { + cpuMetrics[len(cpuMetrics)-1] = cpuBusyCycles + require.True(t, needsMsrCPU(cpuMetrics)) + }) + + t.Run("CPUBusyFrequency", func(t *testing.T) { + cpuMetrics[len(cpuMetrics)-1] = cpuBusyFrequency + require.True(t, needsMsrCPU(cpuMetrics)) + }) + }) +} + +func TestNeedsRapl(t *testing.T) { + metrics := []packageMetricType{ + packageCPUBaseFrequency, // needs msr + packageUncoreFrequency, // needs uncoreFreq + packageMetricType(420), + } + + t.Run("False", func(t *testing.T) { + require.False(t, needsRapl(metrics)) + }) + + t.Run("True", func(t *testing.T) { + t.Run("PackageCurrentPowerConsumption", func(t *testing.T) { + metrics[len(metrics)-1] = packageCurrentPowerConsumption + require.True(t, needsRapl(metrics)) + }) + + t.Run("PackageCurrentDramPowerConsumption", func(t *testing.T) { + metrics[len(metrics)-1] = packageCurrentDramPowerConsumption + require.True(t, needsRapl(metrics)) + }) + + t.Run("PackageThermalDesignPower", func(t *testing.T) { + metrics[len(metrics)-1] = packageThermalDesignPower + require.True(t, needsRapl(metrics)) + }) + }) +} + +func TestNeedsCoreFreq(t *testing.T) { + metrics := []cpuMetricType{ + cpuTemperature, // needs msr + cpuC1StateResidency, // needs msr + cpuC0SubstateC01Percent, // needs perf + cpuMetricType(420), + } + + t.Run("False", func(t *testing.T) { + require.False(t, needsCoreFreq(metrics)) + }) + + t.Run("True", func(t *testing.T) { + metrics[len(metrics)-1] = cpuFrequency + require.True(t, needsCoreFreq(metrics)) + }) +} + +func TestNeedsUncoreFreq(t *testing.T) { + metrics := []packageMetricType{ + packageCPUBaseFrequency, // needs msr + packageThermalDesignPower, // needs rapl + packageMetricType(420), + } + + t.Run("False", func(t *testing.T) { + require.False(t, needsUncoreFreq(metrics)) + }) + + t.Run("True", func(t *testing.T) { + metrics[len(metrics)-1] = packageUncoreFrequency + require.True(t, needsUncoreFreq(metrics)) + }) +} + +func TestNeedsPerf(t *testing.T) { + metrics := []cpuMetricType{ + cpuFrequency, // needs cpuFreq + cpuC1StateResidency, // needs msr + cpuMetricType(420), + } + + t.Run("False", func(t *testing.T) { + require.False(t, needsPerf(metrics)) + }) + + t.Run("True", func(t *testing.T) { + t.Run("CPUC0SubstateC01Percent", func(t *testing.T) { + metrics[len(metrics)-1] = cpuC0SubstateC01Percent + require.True(t, needsPerf(metrics)) + }) + + t.Run("CPUC0SubstateC02Percent", func(t *testing.T) { + metrics[len(metrics)-1] = cpuC0SubstateC02Percent + require.True(t, needsPerf(metrics)) + }) + + t.Run("CPUC0SubstateC0WaitPercent", func(t *testing.T) { + metrics[len(metrics)-1] = cpuC0SubstateC0WaitPercent + require.True(t, needsPerf(metrics)) + }) + }) +} diff --git a/plugins/inputs/intel_powerstat/rapl.go b/plugins/inputs/intel_powerstat/rapl.go deleted file mode 100644 index 29e1fd0e94afc..0000000000000 --- a/plugins/inputs/intel_powerstat/rapl.go +++ /dev/null @@ -1,265 +0,0 @@ -//go:build linux - -package intel_powerstat - -import ( - "fmt" - "io" - "os" - "path/filepath" - "strings" - - "github.com/influxdata/telegraf" -) - -const ( - intelRaplPath = "/sys/devices/virtual/powercap/intel-rapl" - intelRaplSocketPartialPath = "%s/intel-rapl:%s" - energyUjPartialPath = "%s/energy_uj" - maxEnergyRangeUjPartialPath = "%s/max_energy_range_uj" - maxPowerUwPartialPath = "%s/constraint_0_max_power_uw" - intelRaplDramPartialPath = "%s/intel-rapl:%s/%s" - intelRaplDramNamePartialPath = "%s/name" -) - -// raplService is responsible for interactions with RAPL. -type raplService interface { - initializeRaplData() - getRaplData() map[string]*raplData - retrieveAndCalculateData(socketID string) error - getConstraintMaxPowerWatts(socketID string) (float64, error) -} - -type raplServiceImpl struct { - log telegraf.Logger - data map[string]*raplData - dramFolders map[string]string - fs fileService - logOnce map[string]error -} - -// initializeRaplData looks for RAPL folders and initializes data map with fetched information. -func (r *raplServiceImpl) initializeRaplData() { - r.prepareData() - r.findDramFolders() -} - -func (r *raplServiceImpl) getRaplData() map[string]*raplData { - return r.data -} - -func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error { - socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID) - socketEnergyUjPath := fmt.Sprintf(energyUjPartialPath, socketRaplPath) - err := checkFile(socketEnergyUjPath) - if err != nil { - return err - } - socketEnergyUjFile, err := os.Open(socketEnergyUjPath) - if err != nil { - return fmt.Errorf("error opening socket energy_uj file on path %q: %w", socketEnergyUjPath, err) - } - defer socketEnergyUjFile.Close() - - dramRaplPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, r.dramFolders[socketID]) - dramEnergyUjPath := fmt.Sprintf(energyUjPartialPath, dramRaplPath) - err = checkFile(dramEnergyUjPath) - if err != nil { - return err - } - dramEnergyUjFile, err := os.Open(dramEnergyUjPath) - if err != nil { - return fmt.Errorf("error opening dram energy_uj file on path %q: %w", dramEnergyUjPath, err) - } - defer dramEnergyUjFile.Close() - - socketMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, socketRaplPath) - err = checkFile(socketMaxEnergyUjPath) - if err != nil { - return err - } - socketMaxEnergyUjFile, err := os.Open(socketMaxEnergyUjPath) - if err != nil { - return fmt.Errorf("error opening socket max_energy_range_uj file on path %q: %w", socketMaxEnergyUjPath, err) - } - defer socketMaxEnergyUjFile.Close() - - dramMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, dramRaplPath) - err = checkFile(dramMaxEnergyUjPath) - if err != nil { - return err - } - dramMaxEnergyUjFile, err := os.Open(dramMaxEnergyUjPath) - if err != nil { - return fmt.Errorf("error opening dram max_energy_range_uj file on path %q: %w", dramMaxEnergyUjPath, err) - } - defer dramMaxEnergyUjFile.Close() - - return r.calculateData(socketID, socketEnergyUjFile, dramEnergyUjFile, socketMaxEnergyUjFile, dramMaxEnergyUjFile) -} - -func (r *raplServiceImpl) getConstraintMaxPowerWatts(socketID string) (float64, error) { - socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID) - socketMaxPowerPath := fmt.Sprintf(maxPowerUwPartialPath, socketRaplPath) - err := checkFile(socketMaxPowerPath) - if err != nil { - return 0, err - } - socketMaxPowerFile, err := os.Open(socketMaxPowerPath) - if err != nil { - return 0, fmt.Errorf("error opening constraint_0_max_power_uw file on path %q: %w", socketMaxPowerPath, err) - } - defer socketMaxPowerFile.Close() - - socketMaxPower, _, err := r.fs.readFileToFloat64(socketMaxPowerFile) - return convertMicroWattToWatt(socketMaxPower), err -} - -func (r *raplServiceImpl) prepareData() { - intelRaplPrefix := "intel-rapl:" - intelRapl := fmt.Sprintf("%s%s", intelRaplPrefix, "[0-9]*") - raplPattern := fmt.Sprintf("%s/%s", intelRaplPath, intelRapl) - - raplPaths, err := r.fs.getStringsMatchingPatternOnPath(raplPattern) - if err != nil { - r.log.Errorf("error while preparing RAPL data: %v", err) - r.data = make(map[string]*raplData) - return - } - if len(raplPaths) == 0 { - r.log.Debugf("RAPL data wasn't found using pattern: %s", raplPattern) - r.data = make(map[string]*raplData) - return - } - - // If RAPL exists initialize data map (if it wasn't initialized before). - if len(r.data) == 0 { - for _, raplPath := range raplPaths { - socketID := strings.TrimPrefix(filepath.Base(raplPath), intelRaplPrefix) - r.data[socketID] = &raplData{ - socketCurrentEnergy: 0, - dramCurrentEnergy: 0, - socketEnergy: 0, - dramEnergy: 0, - readDate: 0, - } - } - } -} - -func (r *raplServiceImpl) findDramFolders() { - intelRaplPrefix := "intel-rapl:" - intelRaplDram := fmt.Sprintf("%s%s", intelRaplPrefix, "[0-9]*[0-9]*") - // Clean existing map - r.dramFolders = make(map[string]string) - - for socketID := range r.data { - path := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID) - raplFoldersPattern := fmt.Sprintf("%s/%s", path, intelRaplDram) - pathsToRaplFolders, err := r.fs.getStringsMatchingPatternOnPath(raplFoldersPattern) - if err != nil { - r.log.Errorf("error during lookup for rapl dram: %v", err) - continue - } - if len(pathsToRaplFolders) == 0 { - r.log.Debugf("RAPL folders weren't found using pattern: %s", raplFoldersPattern) - continue - } - - raplFolders := make([]string, 0) - for _, folderPath := range pathsToRaplFolders { - raplFolders = append(raplFolders, filepath.Base(folderPath)) - } - - r.findDramFolder(raplFolders, socketID) - } -} - -func (r *raplServiceImpl) findDramFolder(raplFolders []string, socketID string) { - if r.logOnce == nil { - r.logOnce = make(map[string]error) - } - - for _, raplFolder := range raplFolders { - potentialDramPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, raplFolder) - nameFilePath := fmt.Sprintf(intelRaplDramNamePartialPath, potentialDramPath) - read, err := r.fs.readFile(nameFilePath) - if err != nil { - if val := r.logOnce[nameFilePath]; val == nil || val.Error() != err.Error() { - r.log.Errorf("error reading file on path %q: %v", nameFilePath, err) - r.logOnce[nameFilePath] = err - } - continue - } - r.logOnce[nameFilePath] = nil - // Remove new line character - trimmedString := strings.TrimRight(string(read), "\n") - if trimmedString == "dram" { - // There should be only one DRAM folder per socket - r.dramFolders[socketID] = raplFolder - return - } - } -} - -func (r *raplServiceImpl) calculateData(socketID string, socketEnergyUjFile io.Reader, dramEnergyUjFile io.Reader, - socketMaxEnergyUjFile io.Reader, dramMaxEnergyUjFile io.Reader, -) error { - newSocketEnergy, _, err := r.readEnergyInJoules(socketEnergyUjFile) - if err != nil { - return err - } - - newDramEnergy, readDate, err := r.readEnergyInJoules(dramEnergyUjFile) - if err != nil { - return err - } - - interval := convertNanoSecondsToSeconds(readDate - r.data[socketID].readDate) - r.data[socketID].readDate = readDate - if interval == 0 { - return fmt.Errorf("interval between last two Telegraf cycles is 0") - } - - if newSocketEnergy >= r.data[socketID].socketEnergy { - r.data[socketID].socketCurrentEnergy = (newSocketEnergy - r.data[socketID].socketEnergy) / interval - } else { - socketMaxEnergy, _, err := r.readEnergyInJoules(socketMaxEnergyUjFile) - if err != nil { - return err - } - // When socket energy_uj counter reaches maximum value defined in max_energy_range_uj file it - // starts counting from 0. - r.data[socketID].socketCurrentEnergy = (socketMaxEnergy - r.data[socketID].socketEnergy + newSocketEnergy) / interval - } - - if newDramEnergy >= r.data[socketID].dramEnergy { - r.data[socketID].dramCurrentEnergy = (newDramEnergy - r.data[socketID].dramEnergy) / interval - } else { - dramMaxEnergy, _, err := r.readEnergyInJoules(dramMaxEnergyUjFile) - if err != nil { - return err - } - // When dram energy_uj counter reaches maximum value defined in max_energy_range_uj file it - // starts counting from 0. - r.data[socketID].dramCurrentEnergy = (dramMaxEnergy - r.data[socketID].dramEnergy + newDramEnergy) / interval - } - r.data[socketID].socketEnergy = newSocketEnergy - r.data[socketID].dramEnergy = newDramEnergy - - return nil -} - -func (r *raplServiceImpl) readEnergyInJoules(reader io.Reader) (float64, int64, error) { - currentEnergy, readDate, err := r.fs.readFileToFloat64(reader) - return convertMicroJoulesToJoules(currentEnergy), readDate, err -} - -func newRaplServiceWithFs(logger telegraf.Logger, fs fileService) *raplServiceImpl { - return &raplServiceImpl{ - log: logger, - data: make(map[string]*raplData), - dramFolders: make(map[string]string), - fs: fs, - } -} diff --git a/plugins/inputs/intel_powerstat/rapl_mock_test.go b/plugins/inputs/intel_powerstat/rapl_mock_test.go deleted file mode 100644 index 4e23fb1baf463..0000000000000 --- a/plugins/inputs/intel_powerstat/rapl_mock_test.go +++ /dev/null @@ -1,83 +0,0 @@ -//go:build linux - -// Code generated by mockery v2.12.3. DO NOT EDIT. - -package intel_powerstat - -import mock "github.com/stretchr/testify/mock" - -// mockRaplService is an autogenerated mock type for the mockRaplService type -type mockRaplService struct { - mock.Mock -} - -// getConstraintMaxPowerWatts provides a mock function with given fields: socketID -func (_m *mockRaplService) getConstraintMaxPowerWatts(socketID string) (float64, error) { - ret := _m.Called(socketID) - - var r0 float64 - if rf, ok := ret.Get(0).(func(string) float64); ok { - r0 = rf(socketID) - } else { - r0 = ret.Get(0).(float64) - } - - var r1 error - if rf, ok := ret.Get(1).(func(string) error); ok { - r1 = rf(socketID) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// getRaplData provides a mock function with given fields: -func (_m *mockRaplService) getRaplData() map[string]*raplData { - ret := _m.Called() - - var r0 map[string]*raplData - if rf, ok := ret.Get(0).(func() map[string]*raplData); ok { - r0 = rf() - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(map[string]*raplData) - } - } - - return r0 -} - -// initializeRaplData provides a mock function with given fields: -func (_m *mockRaplService) initializeRaplData() { - _m.Called() -} - -// retrieveAndCalculateData provides a mock function with given fields: socketID -func (_m *mockRaplService) retrieveAndCalculateData(socketID string) error { - ret := _m.Called(socketID) - - var r0 error - if rf, ok := ret.Get(0).(func(string) error); ok { - r0 = rf(socketID) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -type newmockRaplServiceT interface { - mock.TestingT - Cleanup(func()) -} - -// newmockRaplService creates a new instance of mockRaplService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func newmockRaplService(t newmockRaplServiceT) *mockRaplService { - mock := &mockRaplService{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/plugins/inputs/intel_powerstat/rapl_test.go b/plugins/inputs/intel_powerstat/rapl_test.go deleted file mode 100644 index 74a86a5251ded..0000000000000 --- a/plugins/inputs/intel_powerstat/rapl_test.go +++ /dev/null @@ -1,115 +0,0 @@ -//go:build linux - -package intel_powerstat - -import ( - "errors" - "fmt" - "strings" - "testing" - - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - - "github.com/influxdata/telegraf/testutil" -) - -func TestPrepareData(t *testing.T) { - sockets := []string{"intel-rapl:0", "intel-rapl:1"} - rapl, fsMock := getRaplWithMockedFs() - fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).Return(sockets, nil).Twice() - rapl.prepareData() - fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything) - require.Equal(t, len(sockets), len(rapl.getRaplData())) - - // Verify no data is wiped in the next calls - socketEnergy := 74563813417.0 - socketID := "0" - rapl.data[socketID].socketEnergy = socketEnergy - - rapl.prepareData() - fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything) - require.Equal(t, len(sockets), len(rapl.getRaplData())) - require.Equal(t, socketEnergy, rapl.data[socketID].socketEnergy) - - // Verify data is wiped once there is no RAPL folders - fsMock.On("getStringsMatchingPatternOnPath", mock.Anything). - Return(nil, errors.New("missing RAPL")).Once() - rapl.prepareData() - fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything) - require.Empty(t, rapl.getRaplData()) -} - -func TestFindDramFolders(t *testing.T) { - sockets := []string{"0", "1"} - raplFolders := []string{"intel-rapl:0:1", "intel-rapl:0:2", "intel-rapl:0:3"} - rapl, fsMock := getRaplWithMockedFs() - - for _, socketID := range sockets { - rapl.data[socketID] = &raplData{} - } - - firstPath := fmt.Sprintf(intelRaplDramNamePartialPath, - fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, "0", raplFolders[2])) - secondPath := fmt.Sprintf(intelRaplDramNamePartialPath, - fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, "1", raplFolders[1])) - - fsMock. - On("getStringsMatchingPatternOnPath", mock.Anything).Return(raplFolders, nil).Twice(). - On("readFile", firstPath).Return([]byte("dram"), nil).Once(). - On("readFile", secondPath).Return([]byte("dram"), nil).Once(). - On("readFile", mock.Anything).Return([]byte("random"), nil) - - rapl.findDramFolders() - - require.Equal(t, len(sockets), len(rapl.dramFolders)) - require.Equal(t, raplFolders[2], rapl.dramFolders["0"]) - require.Equal(t, raplFolders[1], rapl.dramFolders["1"]) - fsMock.AssertNumberOfCalls(t, "readFile", 5) -} - -func TestCalculateDataOverflowCases(t *testing.T) { - socketID := "1" - rapl, fsMock := getRaplWithMockedFs() - - rapl.data[socketID] = &raplData{} - rapl.data[socketID].socketEnergy = convertMicroJoulesToJoules(23424123.1) - rapl.data[socketID].dramEnergy = convertMicroJoulesToJoules(345611233.2) - rapl.data[socketID].readDate = 54123 - - interval := int64(54343) - convertedInterval := convertNanoSecondsToSeconds(interval - rapl.data[socketID].readDate) - - newEnergy := 3343443.4 - maxEnergy := 234324546456.6 - convertedNewEnergy := convertMicroJoulesToJoules(newEnergy) - convertedMaxNewEnergy := convertMicroJoulesToJoules(maxEnergy) - - maxDramEnergy := 981230834098.3 - newDramEnergy := 4533311.1 - convertedMaxDramEnergy := convertMicroJoulesToJoules(maxDramEnergy) - convertedDramEnergy := convertMicroJoulesToJoules(newDramEnergy) - - expectedCurrentEnergy := (convertedMaxNewEnergy - rapl.data[socketID].socketEnergy + convertedNewEnergy) / convertedInterval - expectedDramCurrentEnergy := (convertedMaxDramEnergy - rapl.data[socketID].dramEnergy + convertedDramEnergy) / convertedInterval - - fsMock. - On("readFileToFloat64", mock.Anything).Return(newEnergy, int64(12321), nil).Once(). - On("readFileToFloat64", mock.Anything).Return(newDramEnergy, interval, nil).Once(). - On("readFileToFloat64", mock.Anything).Return(maxEnergy, int64(64534), nil).Once(). - On("readFileToFloat64", mock.Anything).Return(maxDramEnergy, int64(98342), nil).Once() - - require.NoError(t, rapl.calculateData(socketID, strings.NewReader(mock.Anything), strings.NewReader(mock.Anything), - strings.NewReader(mock.Anything), strings.NewReader(mock.Anything))) - - require.Equal(t, expectedCurrentEnergy, rapl.data[socketID].socketCurrentEnergy) - require.Equal(t, expectedDramCurrentEnergy, rapl.data[socketID].dramCurrentEnergy) -} - -func getRaplWithMockedFs() (*raplServiceImpl, *mockFileService) { - logger := testutil.Logger{Name: "PowerPluginTest"} - fsMock := &mockFileService{} - rapl := newRaplServiceWithFs(logger, fsMock) - - return rapl, fsMock -} diff --git a/plugins/inputs/intel_powerstat/sample.conf b/plugins/inputs/intel_powerstat/sample.conf index fdad448fd36ab..74f4704535497 100644 --- a/plugins/inputs/intel_powerstat/sample.conf +++ b/plugins/inputs/intel_powerstat/sample.conf @@ -1,15 +1,15 @@ # Intel PowerStat plugin enables monitoring of platform metrics (power, TDP) # and per-CPU metrics like temperature, power and utilization. Please see the # plugin readme for details on software and hardware compatability. -# This plugin ONLY supports Linux +# This plugin ONLY supports Linux. [[inputs.intel_powerstat]] ## The user can choose which package metrics are monitored by the plugin with ## the package_metrics setting: ## - The default, will collect "current_power_consumption", - ## "current_dram_power_consumption" and "thermal_design_power" - ## - Leaving this setting empty means no package metrics will be collected + ## "current_dram_power_consumption" and "thermal_design_power". + ## - Leaving this setting empty means no package metrics will be collected. ## - Finally, a user can specify individual metrics to capture from the - ## supported options list + ## supported options list. ## Supported options: ## "current_power_consumption", "current_dram_power_consumption", ## "thermal_design_power", "max_turbo_frequency", "uncore_frequency", @@ -22,7 +22,29 @@ ## by the plugin. ## Supported options: ## "cpu_frequency", "cpu_c0_state_residency", "cpu_c1_state_residency", - ## "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature", - ## "cpu_busy_frequency" - ## ATTENTION: cpu_busy_cycles is DEPRECATED - use cpu_c0_state_residency + ## "cpu_c3_state_residency", "cpu_c6_state_residency", "cpu_c7_state_residency", + ## "cpu_temperature", "cpu_busy_frequency", "cpu_c0_substate_c01", + ## "cpu_c0_substate_c02", "cpu_c0_substate_c0_wait" # cpu_metrics = [] + + ## Optionally the user can choose for which CPUs metrics configured in cpu_metrics array should be gathered. + ## Can't be combined with excluded_cpus. + ## Empty or missing array means CPU metrics are gathered for all CPUs. + ## e.g. ["0-3", "4,5,6"] or ["1-3,4"] + # included_cpus = [] + + ## Optionally the user can choose which CPUs should be excluded from gathering metrics configured in cpu_metrics array. + ## Can't be combined with included_cpus. + ## Empty or missing array means CPU metrics are gathered for all CPUs. + ## e.g. ["0-3", "4,5,6"] or ["1-3,4"] + # excluded_cpus = [] + + ## Filesystem location of JSON file that contains PMU event definitions. + ## Mandatory only for perf-related metrics (cpu_c0_substate_c01, cpu_c0_substate_c02, cpu_c0_substate_c0_wait). + # event_definitions = "" + + ## The user can set the timeout duration for MSR reading. + ## Enabling this timeout can be useful in situations where, on heavily loaded systems, + ## the code waits too long for a kernel response to MSR read requests. + ## 0 disables the timeout (default). + # msr_read_timeout = "0ms" diff --git a/plugins/inputs/intel_powerstat/testdata/aperfmperf_flag_not_found/cpuinfo b/plugins/inputs/intel_powerstat/testdata/aperfmperf_flag_not_found/cpuinfo new file mode 100644 index 0000000000000..3d35ba4068366 --- /dev/null +++ b/plugins/inputs/intel_powerstat/testdata/aperfmperf_flag_not_found/cpuinfo @@ -0,0 +1,27 @@ +processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 143 +model name : Intel(R) Xeon(R) Platinum 8480+ +stepping : 8 +microcode : 0xab0000c0 +cpu MHz : 2000.000 +cache size : 107520 KB +physical id : 0 +siblings : 112 +core id : 0 +cpu cores : 56 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 32 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities +vmx flags : vnmi preemption_timer posted_intr invvpid ept_x_only ept_ad ept_1gb flexpriority apicv tsc_offset vtpr mtf vapic ept vpid unrestricted_guest vapic_reg vid ple shadow_vmcs pml ept_mode_based_exec tsc_scaling usr_wait_pause +bugs : spectre_v1 spectre_v2 spec_store_bypass swapgs eibrs_pbrsb +bogomips : 4000.00 +clflush size : 64 +cache_alignment : 64 +address sizes : 52 bits physical, 57 bits virtual +power management: diff --git a/plugins/inputs/intel_powerstat/testdata/cpu_model_missing/cpuinfo b/plugins/inputs/intel_powerstat/testdata/cpu_model_missing/cpuinfo new file mode 100644 index 0000000000000..4cae6dcc4a876 --- /dev/null +++ b/plugins/inputs/intel_powerstat/testdata/cpu_model_missing/cpuinfo @@ -0,0 +1,26 @@ +processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model name : Intel(R) Xeon(R) Platinum 8480+ +stepping : 8 +microcode : 0xab0000c0 +cpu MHz : 2000.000 +cache size : 107520 KB +physical id : 0 +siblings : 112 +core id : 0 +cpu cores : 56 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 32 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities +vmx flags : vnmi preemption_timer posted_intr invvpid ept_x_only ept_ad ept_1gb flexpriority apicv tsc_offset vtpr mtf vapic ept vpid unrestricted_guest vapic_reg vid ple shadow_vmcs pml ept_mode_based_exec tsc_scaling usr_wait_pause +bugs : spectre_v1 spectre_v2 spec_store_bypass swapgs eibrs_pbrsb +bogomips : 4000.00 +clflush size : 64 +cache_alignment : 64 +address sizes : 52 bits physical, 57 bits virtual +power management: diff --git a/plugins/inputs/intel_powerstat/testdata/cpuinfo b/plugins/inputs/intel_powerstat/testdata/cpuinfo new file mode 100644 index 0000000000000..e263f39c802c0 --- /dev/null +++ b/plugins/inputs/intel_powerstat/testdata/cpuinfo @@ -0,0 +1,111 @@ +processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 143 +model name : Intel(R) Xeon(R) Platinum 8480+ +stepping : 8 +microcode : 0xab0000c0 +cpu MHz : 2000.000 +cache size : 107520 KB +physical id : 0 +siblings : 112 +core id : 0 +cpu cores : 56 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 32 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities +vmx flags : vnmi preemption_timer posted_intr invvpid ept_x_only ept_ad ept_1gb flexpriority apicv tsc_offset vtpr mtf vapic ept vpid unrestricted_guest vapic_reg vid ple shadow_vmcs pml ept_mode_based_exec tsc_scaling usr_wait_pause +bugs : spectre_v1 spectre_v2 spec_store_bypass swapgs eibrs_pbrsb +bogomips : 4000.00 +clflush size : 64 +cache_alignment : 64 +address sizes : 52 bits physical, 57 bits virtual +power management: + +processor : 1 +vendor_id : GenuineIntel +cpu family : 6 +model : 143 +model name : Intel(R) Xeon(R) Platinum 8480+ +stepping : 8 +microcode : 0xab0000c0 +cpu MHz : 2000.000 +cache size : 107520 KB +physical id : 0 +siblings : 112 +core id : 1 +cpu cores : 56 +apicid : 2 +initial apicid : 2 +fpu : yes +fpu_exception : yes +cpuid level : 32 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities +vmx flags : vnmi preemption_timer posted_intr invvpid ept_x_only ept_ad ept_1gb flexpriority apicv tsc_offset vtpr mtf vapic ept vpid unrestricted_guest vapic_reg vid ple shadow_vmcs pml ept_mode_based_exec tsc_scaling usr_wait_pause +bugs : spectre_v1 spectre_v2 spec_store_bypass swapgs eibrs_pbrsb +bogomips : 4000.00 +clflush size : 64 +cache_alignment : 64 +address sizes : 52 bits physical, 57 bits virtual +power management: + +processor : 2 +vendor_id : GenuineIntel +cpu family : 6 +model : 143 +model name : Intel(R) Xeon(R) Platinum 8480+ +stepping : 8 +microcode : 0xab0000c0 +cpu MHz : 2000.000 +cache size : 107520 KB +physical id : 0 +siblings : 112 +core id : 2 +cpu cores : 56 +apicid : 4 +initial apicid : 4 +fpu : yes +fpu_exception : yes +cpuid level : 32 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities +vmx flags : vnmi preemption_timer posted_intr invvpid ept_x_only ept_ad ept_1gb flexpriority apicv tsc_offset vtpr mtf vapic ept vpid unrestricted_guest vapic_reg vid ple shadow_vmcs pml ept_mode_based_exec tsc_scaling usr_wait_pause +bugs : spectre_v1 spectre_v2 spec_store_bypass swapgs eibrs_pbrsb +bogomips : 4000.00 +clflush size : 64 +cache_alignment : 64 +address sizes : 52 bits physical, 57 bits virtual +power management: + +processor : 3 +vendor_id : GenuineIntel +cpu family : 6 +model : 143 +model name : Intel(R) Xeon(R) Platinum 8480+ +stepping : 8 +microcode : 0xab0000c0 +cpu MHz : 2000.000 +cache size : 107520 KB +physical id : 0 +siblings : 112 +core id : 3 +cpu cores : 56 +apicid : 6 +initial apicid : 6 +fpu : yes +fpu_exception : yes +cpuid level : 32 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities +vmx flags : vnmi preemption_timer posted_intr invvpid ept_x_only ept_ad ept_1gb flexpriority apicv tsc_offset vtpr mtf vapic ept vpid unrestricted_guest vapic_reg vid ple shadow_vmcs pml ept_mode_based_exec tsc_scaling usr_wait_pause +bugs : spectre_v1 spectre_v2 spec_store_bypass swapgs eibrs_pbrsb +bogomips : 4000.00 +clflush size : 64 +cache_alignment : 64 +address sizes : 52 bits physical, 57 bits virtual +power management: diff --git a/plugins/inputs/intel_powerstat/testdata/dts_flag_not_found/cpuinfo b/plugins/inputs/intel_powerstat/testdata/dts_flag_not_found/cpuinfo new file mode 100644 index 0000000000000..8436d080f8bc4 --- /dev/null +++ b/plugins/inputs/intel_powerstat/testdata/dts_flag_not_found/cpuinfo @@ -0,0 +1,27 @@ +processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 143 +model name : Intel(R) Xeon(R) Platinum 8480+ +stepping : 8 +microcode : 0xab0000c0 +cpu MHz : 2000.000 +cache size : 107520 KB +physical id : 0 +siblings : 112 +core id : 0 +cpu cores : 56 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 32 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities +vmx flags : vnmi preemption_timer posted_intr invvpid ept_x_only ept_ad ept_1gb flexpriority apicv tsc_offset vtpr mtf vapic ept vpid unrestricted_guest vapic_reg vid ple shadow_vmcs pml ept_mode_based_exec tsc_scaling usr_wait_pause +bugs : spectre_v1 spectre_v2 spec_store_bypass swapgs eibrs_pbrsb +bogomips : 4000.00 +clflush size : 64 +cache_alignment : 64 +address sizes : 52 bits physical, 57 bits virtual +power management: diff --git a/plugins/inputs/intel_powerstat/testdata/model_not_supported/cpuinfo b/plugins/inputs/intel_powerstat/testdata/model_not_supported/cpuinfo new file mode 100644 index 0000000000000..e31e22a756d8a --- /dev/null +++ b/plugins/inputs/intel_powerstat/testdata/model_not_supported/cpuinfo @@ -0,0 +1,27 @@ +processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 14 +model name : Intel(R) Xeon(R) Platinum 8480+ +stepping : 8 +microcode : 0xab0000c0 +cpu MHz : 2000.000 +cache size : 107520 KB +physical id : 0 +siblings : 112 +core id : 0 +cpu cores : 56 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 32 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities +vmx flags : vnmi preemption_timer posted_intr invvpid ept_x_only ept_ad ept_1gb flexpriority apicv tsc_offset vtpr mtf vapic ept vpid unrestricted_guest vapic_reg vid ple shadow_vmcs pml ept_mode_based_exec tsc_scaling usr_wait_pause +bugs : spectre_v1 spectre_v2 spec_store_bypass swapgs eibrs_pbrsb +bogomips : 4000.00 +clflush size : 64 +cache_alignment : 64 +address sizes : 52 bits physical, 57 bits virtual +power management: diff --git a/plugins/inputs/intel_powerstat/testdata/msr_flag_not_found/cpuinfo b/plugins/inputs/intel_powerstat/testdata/msr_flag_not_found/cpuinfo new file mode 100644 index 0000000000000..5285e7480b036 --- /dev/null +++ b/plugins/inputs/intel_powerstat/testdata/msr_flag_not_found/cpuinfo @@ -0,0 +1,27 @@ +processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 143 +model name : Intel(R) Xeon(R) Platinum 8480+ +stepping : 8 +microcode : 0xab0000c0 +cpu MHz : 2000.000 +cache size : 107520 KB +physical id : 0 +siblings : 112 +core id : 0 +cpu cores : 56 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 32 +wp : yes +flags : fpu vme de pse tsc pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities +vmx flags : vnmi preemption_timer posted_intr invvpid ept_x_only ept_ad ept_1gb flexpriority apicv tsc_offset vtpr mtf vapic ept vpid unrestricted_guest vapic_reg vid ple shadow_vmcs pml ept_mode_based_exec tsc_scaling usr_wait_pause +bugs : spectre_v1 spectre_v2 spec_store_bypass swapgs eibrs_pbrsb +bogomips : 4000.00 +clflush size : 64 +cache_alignment : 64 +address sizes : 52 bits physical, 57 bits virtual +power management: diff --git a/plugins/inputs/intel_powerstat/testdata/sapphirerapids_core.json b/plugins/inputs/intel_powerstat/testdata/sapphirerapids_core.json new file mode 100644 index 0000000000000..6451550ec2a5c --- /dev/null +++ b/plugins/inputs/intel_powerstat/testdata/sapphirerapids_core.json @@ -0,0 +1,299 @@ +{ + "Header": { + "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.", + "Info": "Performance Monitoring Events for 4th Generation Intel(R) Xeon(R) Processor Scalable Family based on Sapphire Rapids microarchitecture - V1.15", + "DatePublished": "06/28/2023", + "Version": "1.15", + "Legend": "" + }, + "Events": [ + { + "EventCode": "0x00", + "UMask": "0x02", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Core cycles when the thread is not in halt state", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.", + "Counter": "Fixed counter 1", + "PEBScounters": "33", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x00", + "UMask": "0x03", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Reference cycles when the core is not in halt state.", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "Counter": "Fixed counter 2", + "PEBScounters": "34", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x00", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "BriefDescription": "Thread cycles when thread is not in halt state", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x01", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "BriefDescription": "Reference cycles when the core is not in halt state.", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x02", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "25003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x08", + "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED", + "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.", + "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x02", + "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED", + "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.", + "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x10", + "EventName": "CPU_CLK_UNHALTED.C01", + "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x20", + "EventName": "CPU_CLK_UNHALTED.C02", + "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.", + "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x40", + "EventName": "CPU_CLK_UNHALTED.PAUSE", + "BriefDescription": "CPU_CLK_UNHALTED.PAUSE", + "PublicDescription": "CPU_CLK_UNHALTED.PAUSE", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x40", + "EventName": "CPU_CLK_UNHALTED.PAUSE_INST", + "BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST", + "PublicDescription": "CPU_CLK_UNHALTED.PAUSE_INST", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "1", + "Invert": "0", + "EdgeDetect": "1", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x70", + "EventName": "CPU_CLK_UNHALTED.C0_WAIT", + "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + } + ] +} \ No newline at end of file diff --git a/plugins/inputs/intel_powerstat/unit_converter.go b/plugins/inputs/intel_powerstat/unit_converter.go deleted file mode 100644 index 229255218c548..0000000000000 --- a/plugins/inputs/intel_powerstat/unit_converter.go +++ /dev/null @@ -1,49 +0,0 @@ -//go:build linux - -package intel_powerstat - -import ( - "math" - "strconv" -) - -const ( - microJouleToJoule = 1.0 / 1000000 - microWattToWatt = 1.0 / 1000000 - kiloHertzToMegaHertz = 1.0 / 1000 - nanoSecondsToSeconds = 1.0 / 1000000000 - cyclesToHertz = 1.0 / 1000000 -) - -func convertMicroJoulesToJoules(mJ float64) float64 { - return mJ * microJouleToJoule -} - -func convertMicroWattToWatt(mW float64) float64 { - return mW * microWattToWatt -} - -func convertKiloHertzToMegaHertz(kiloHertz float64) float64 { - return kiloHertz * kiloHertzToMegaHertz -} - -func convertNanoSecondsToSeconds(ns int64) float64 { - return float64(ns) * nanoSecondsToSeconds -} - -func convertProcessorCyclesToHertz(pc uint64) float64 { - return float64(pc) * cyclesToHertz -} - -func roundFloatToNearestTwoDecimalPlaces(n float64) float64 { - return math.Round(n*100) / 100 -} - -func convertIntegerArrayToStringArray(array []int64) []string { - stringArray := make([]string, 0) - for _, value := range array { - stringArray = append(stringArray, strconv.FormatInt(value, 10)) - } - - return stringArray -}