From 6d7c1cec4e3b9e4801432bd9e96d5bcec5e7bb29 Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Fri, 2 Jun 2023 16:58:20 +0200 Subject: [PATCH 01/11] WIP add system metrics to yaml --- Makefile | 2 +- docs/system/runtime-environment-metrics.md | 12 +- docs/system/system-metrics.md | 796 +++++++++++++++++---- model/metrics/system-metrics.yaml | 456 ++++++++++++ 4 files changed, 1119 insertions(+), 147 deletions(-) create mode 100644 model/metrics/system-metrics.yaml diff --git a/Makefile b/Makefile index b02fdc0952..13bbc4924a 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ MISSPELL = $(TOOLS_DIR)/$(MISSPELL_BINARY) # see https://github.com/open-telemetry/build-tools/releases for semconvgen updates # Keep links in model/README.md and .vscode/settings.json in sync! -SEMCONVGEN_VERSION=0.18.0 +SEMCONVGEN_VERSION=0.19.0 # TODO: add `yamllint` step to `all` after making sure it works on Mac. .PHONY: all diff --git a/docs/system/runtime-environment-metrics.md b/docs/system/runtime-environment-metrics.md index d28c355e90..65d2ce7ca7 100644 --- a/docs/system/runtime-environment-metrics.md +++ b/docs/system/runtime-environment-metrics.md @@ -299,7 +299,9 @@ Note that the JVM does not provide a definition of what "recent" means. | Name | Instrument Type | Unit (UCUM) | Description | | -------- | --------------- | ----------- | -------------- | -| `process.runtime.jvm.cpu.recent_utilization` | Gauge | `1` | Recent CPU utilization for the process as reported by the JVM. | +| `process.runtime.jvm.cpu.recent_utilization` | Gauge | `1` | Recent CPU utilization for the process as reported by the JVM. [1] | + +**[1]:** The value range is [0.0,1.0]. This utilization is not defined as being for the specific interval since last measurement (unlike `system.cpu.utilization`). [Reference](https://docs.oracle.com/en/java/javase/17/docs/api/jdk.management/com/sun/management/OperatingSystemMXBean.html#getProcessCpuLoad()). @@ -345,7 +347,9 @@ and [`com.ibm.lang.management.OperatingSystemMXBean#getSystemCpuLoad()`](https:/ | Name | Instrument Type | Unit (UCUM) | Description | | -------- | --------------- | ----------- | -------------- | -| `process.runtime.jvm.system.cpu.utilization` | Gauge | `1` | Recent CPU utilization for the whole system as reported by the JVM. | +| `process.runtime.jvm.system.cpu.utilization` | Gauge | `1` | Recent CPU utilization for the whole system as reported by the JVM. [1] | + +**[1]:** The value range is [0.0,1.0]. This utilization is not defined as being for the specific interval since last measurement (unlike `system.cpu.utilization`). [Reference](https://docs.oracle.com/en/java/javase/17/docs/api/jdk.management/com/sun/management/OperatingSystemMXBean.html#getCpuLoad()). @@ -359,7 +363,9 @@ This metric is obtained from [`OperatingSystemMXBean#getSystemLoadAverage()`](ht | Name | Instrument Type | Unit (UCUM) | Description | | -------- | --------------- | ----------- | -------------- | -| `process.runtime.jvm.system.cpu.load_1m` | Gauge | `1` | Average CPU load of the whole system for the last minute as reported by the JVM. | +| `process.runtime.jvm.system.cpu.load_1m` | Gauge | `1` | Average CPU load of the whole system for the last minute as reported by the JVM. [1] | + +**[1]:** The value range is [0,n], where n is the number of CPU cores - or a negative number if the value is not available. This utilization is not defined as being for the specific interval since last measurement (unlike `system.cpu.utilization`). [Reference](https://docs.oracle.com/en/java/javase/17/docs/api/java.management/java/lang/management/OperatingSystemMXBean.html#getSystemLoadAverage()). diff --git a/docs/system/system-metrics.md b/docs/system/system-metrics.md index 8aef8ad299..c9df78b7a6 100644 --- a/docs/system/system-metrics.md +++ b/docs/system/system-metrics.md @@ -15,152 +15,661 @@ instruments not explicitly defined in the specification. -- [Metric Instruments](#metric-instruments) - * [`system.cpu.` - Processor metrics](#systemcpu---processor-metrics) - * [`system.memory.` - Memory metrics](#systemmemory---memory-metrics) - * [`system.paging.` - Paging/swap metrics](#systempaging---pagingswap-metrics) - * [`system.disk.` - Disk controller metrics](#systemdisk---disk-controller-metrics) - * [`system.filesystem.` - Filesystem metrics](#systemfilesystem---filesystem-metrics) - * [`system.network.` - Network metrics](#systemnetwork---network-metrics) - * [`system.processes.` - Aggregate system process metrics](#systemprocesses---aggregate-system-process-metrics) - * [`system.{os}.` - OS Specific System Metrics](#systemos---os-specific-system-metrics) +- [Processor Metrics](#processor-metrics) + * [Metric: `system.cpu.time`](#metric-systemcputime) + * [Metric: `system.cpu.utilization`](#metric-systemcpuutilization) +- [Memory Metrics](#memory-metrics) + * [Metric: `system.memory.usage`](#metric-systemmemoryusage) + * [Metric: `system.memory.utilization`](#metric-systemmemoryutilization) +- [Paging/Swap Metrics](#pagingswap-metrics) + * [Metric: `system.paging.usage`](#metric-systempagingusage) + * [Metric: `system.paging.utilization`](#metric-systempagingutilization) + * [Metric: `system.paging.faults`](#metric-systempagingfaults) + * [Metric: `system.paging.operations`](#metric-systempagingoperations) +- [Disk Controller Metrics](#disk-controller-metrics) + * [Metric: `system.disk.io`](#metric-systemdiskio) + * [Metric: `system.disk.operations`](#metric-systemdiskoperations) + * [Metric: `system.disk.io_time`](#metric-systemdiskio_time) + * [Metric: `system.disk.operation_time`](#metric-systemdiskoperation_time) + * [Metric: `system.disk.merged`](#metric-systemdiskmerged) +- [Filesystem Metrics](#filesystem-metrics) + * [Metric: `system.filesystem.usage`](#metric-systemfilesystemusage) + * [Metric: `system.filesystem.utilization`](#metric-systemfilesystemutilization) +- [Network Metrics](#network-metrics) + * [Metric: `system.network.dropped`](#metric-systemnetworkdropped) + * [Metric: `system.network.packets`](#metric-systemnetworkpackets) + * [Metric: `system.network.errors`](#metric-systemnetworkerrors) + * [Metric: `system.network.io`](#metric-systemnetworkio) + * [Metric: `system.network.connections`](#metric-systemnetworkconnections) +- [Aggregate System Process Metrics](#aggregate-system-process-metrics) + * [Metric: `system.processes.count`](#metric-systemprocessescount) + * [Metric: `system.processes.created`](#metric-systemprocessescreated) +- [`system.{os}.` - OS Specific System Metrics](#systemos---os-specific-system-metrics) -## Metric Instruments - -### `system.cpu.` - Processor metrics - -**Description:** System level processor metrics. - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics-general.md#instrument-types)) | Value Type | Attribute Key(s) | Attribute Values | -| ---------------------- | -------------------------------------------------------------------------------------------------------- | ----- | ------------------------------------------------- | ---------- | ---------------- | ----------------------------------- | -| system.cpu.time | | s | Counter | Double | state | idle, user, system, interrupt, etc. | -| | | | | | cpu | CPU number [0..n-1] | -| system.cpu.utilization | Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of CPUs | 1 | Gauge | Double | state | idle, user, system, interrupt, etc. | -| | | | | | cpu | CPU number (0..n) | - -### `system.memory.` - Memory metrics - -**Description:** System level memory metrics. This does not include [paging/swap -memory](#systempaging---pagingswap-metrics). - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics-general.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -| ------------------------- | ----------- | ----- | ------------------------------------------------- | ---------- | ------------- | ------------------------ | -| system.memory.usage | | By | UpDownCounter | Int64 | state | used, free, cached, etc. | -| system.memory.utilization | | 1 | Gauge | Double | state | used, free, cached, etc. | - -### `system.paging.` - Paging/swap metrics - -**Description:** System level paging/swap memory metrics. - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics-general.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -|---------------------------|-------------------------------------|--------------|---------------------------------------------------|------------|---------------|------------------| -| system.paging.usage | Unix swap or windows pagefile usage | By | UpDownCounter | Int64 | state | used, free | -| system.paging.utilization | | 1 | Gauge | Double | state | used, free | -| system.paging.faults | | {fault} | Counter | Int64 | type | major, minor | -| system.paging.operations | | {operation} | Counter | Int64 | type | major, minor | -| | | | | | direction | in, out | - -### `system.disk.` - Disk controller metrics - -**Description:** System level disk performance metrics. - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics-general.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -|--------------------------------------------|-------------------------------------------------|--------------|---------------------------------------------------|------------|---------------|------------------| -| system.disk.io | | By | Counter | Int64 | device | (identifier) | -| | | | | | direction | read, write | -| system.disk.operations | | {operation} | Counter | Int64 | device | (identifier) | -| | | | | | direction | read, write | -| system.disk.io_time\[1\] | Time disk spent activated | s | Counter | Double | device | (identifier) | -| system.disk.operation_time\[2\] | Sum of the time each operation took to complete | s | Counter | Double | device | (identifier) | -| | | | | | direction | read, write | -| system.disk.merged | | {operation} | Counter | Int64 | device | (identifier) | -| | | | | | direction | read, write | - -1 The real elapsed time ("wall clock") -used in the I/O path (time from operations running in parallel are not -counted). Measured as: - -- Linux: Field 13 from -[procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) -- Windows: The complement of ["Disk\% Idle -Time"](https://docs.microsoft.com/en-us/archive/blogs/askcore/windows-performance-monitor-disk-counters-explained#windows-performance-monitor-disk-counters-explained:~:text=%25%20Idle%20Time,Idle\)%20to%200%20(meaning%20always%20busy).) -performance counter: `uptime * (100 - "Disk\% Idle Time") / 100` - -2 Because it is the sum of time each -request took, parallel-issued requests each contribute to make the count -grow. Measured as: - -- Linux: Fields 7 & 11 from -[procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) -- Windows: "Avg. Disk sec/Read" perf counter multiplied by "Disk Reads/sec" -perf counter (similar for Writes) - -### `system.filesystem.` - Filesystem metrics - -**Description:** System level filesystem metrics. - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics-general.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -| ----------------------------- | ----------- | ----- | ------------------------------------------------- | ---------- | ------------- | -------------------- | -| system.filesystem.usage | | By | UpDownCounter | Int64 | device | (identifier) | -| | | | | | state | used, free, reserved | -| | | | | | type | ext4, tmpfs, etc. | -| | | | | | mode | rw, ro, etc. | -| | | | | | mountpoint | (path) | -| system.filesystem.utilization | | 1 | Gauge | Double | device | (identifier) | -| | | | | | state | used, free, reserved | -| | | | | | type | ext4, tmpfs, etc. | -| | | | | | mode | rw, ro, etc. | -| | | | | | mountpoint | (path) | - -### `system.network.` - Network metrics - -**Description:** System level network metrics. - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics-general.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -|----------------------------------------|-------------------------------------------------------------------------------|---------------|---------------------------------------------------|------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| system.network.dropped\[1\] | Count of packets that are dropped or discarded even though there was no error | {packet} | Counter | Int64 | device | (identifier) | -| | | | | | direction | transmit, receive | -| system.network.packets | | {packet} | Counter | Int64 | device | (identifier) | -| | | | | | direction | transmit, receive | -| system.network.errors\[2\] | Count of network errors detected | {error} | Counter | Int64 | device | (identifier) | -| | | | | | direction | transmit, receive | -| system.network.io | | By | Counter | Int64 | device | (identifier) | -| | | | | | direction | transmit, receive | -| system.network.connections | | {connection} | UpDownCounter | Int64 | device | (identifier) | -| | | | | | protocol | tcp, udp, [etc.](https://en.wikipedia.org/wiki/Transport_layer#Protocols) | -| | | | | | state | If specified, SHOULD be one of: close, close_wait, closing, delete, established, fin_wait_1, fin_wait_2, last_ack, listen, syn_recv, syn_sent, time_wait. A stateless protocol MUST NOT set this attribute. | - -1 Measured as: - -- Linux: the `drop` column in `/proc/dev/net` -([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)). -- Windows: -[`InDiscards`/`OutDiscards`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) -from -[`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2). - -2 Measured as: - -- Linux: the `errs` column in `/proc/dev/net` -([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)). -- Windows: -[`InErrors`/`OutErrors`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) -from -[`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2). - -### `system.processes.` - Aggregate system process metrics - -**Description:** System level aggregate process metrics. For metrics at the -individual process level, see [process metrics](process-metrics.md). - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics-general.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -| ------------------------ | --------------------------------------------------------- | ----------- | ------------------------------------------------- | ---------- | ------------- | ---------------------------------------------------------------------------------------------- | -| system.processes.count | Total number of processes in each state | {process} | UpDownCounter | Int64 | status | running, sleeping, [etc.](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) | -| system.processes.created | Total number of processes created over uptime of the host | {process} | Counter | Int64 | - | - | - -### `system.{os}.` - OS Specific System Metrics +## Processor Metrics + +**Description:** System level processor metrics captured under `system.cpu`. + +### Metric: `system.cpu.time` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.cpu.time` | Counter | `s` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.cpu.cpu` | int | The CPU number [0..n-1] | `1` | Recommended | +| `system.cpu.state` | string | The state of the CPU | `idle`; `interrupt` | Recommended | + +`system.cpu.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `idle` | idle | +| `user` | user | +| `system` | system | +| `interrupt` | interrupt | + + +### Metric: `system.cpu.utilization` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.cpu.utilization` | Gauge | `1` | Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of CPUs | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.cpu.cpu` | int | The CPU number [0..n-1] | `1` | Recommended | +| `system.cpu.state` | string | The state of the CPU | `idle`; `interrupt` | Recommended | + +`system.cpu.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `idle` | idle | +| `user` | user | +| `system` | system | +| `interrupt` | interrupt | + + +## Memory Metrics + +**Description:** System level memory metrics capture under `system.memory`. +This does not include [paging/swap memory](#pagingswap-metrics). + +### Metric: `system.memory.usage` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.memory.usage` | UpDownCounter | `By` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.memory.state` | string | The memory state | `free`; `cached` | Recommended | + +`system.memory.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `used` | used | +| `free` | free | +| `cached` | cached | + + +### Metric: `system.memory.utilization` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.memory.utilization` | Gauge | `1` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.memory.state` | string | The memory state | `free`; `cached` | Recommended | + +`system.memory.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `used` | used | +| `free` | free | +| `cached` | cached | + + +## Paging/Swap Metrics + +**Description:** System level paging/swap memory metrics captured under `system.paging`. + +### Metric: `system.paging.usage` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.paging.usage` | UpDownCounter | `By` | Unix swap or windows pagefile usage | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.paging.state` | string | The memory paging state | `free` | Recommended | + +`system.paging.state` MUST be one of the following: + +| Value | Description | +|---|---| +| `used` | used | +| `free` | free | + + +### Metric: `system.paging.utilization` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.paging.utilization` | Gauge | `1` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.paging.state` | string | The memory paging state | `free` | Recommended | + +`system.paging.state` MUST be one of the following: + +| Value | Description | +|---|---| +| `used` | used | +| `free` | free | + + +### Metric: `system.paging.faults` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.paging.faults` | Counter | `{fault}` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.paging.type` | string | The memory paging type | `minor` | Recommended | + +`system.paging.type` MUST be one of the following: + +| Value | Description | +|---|---| +| `major` | major | +| `minor` | minor | + + +### Metric: `system.paging.operations` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.paging.operations` | Counter | `{operation}` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.paging.direction` | string | The paging access direction | `in` | Recommended | +| `system.paging.type` | string | The memory paging type | `minor` | Recommended | + +`system.paging.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `in` | in | +| `out` | out | + +`system.paging.type` MUST be one of the following: + +| Value | Description | +|---|---| +| `major` | major | +| `minor` | minor | + + +## Disk Controller Metrics + +**Description:** System level disk performance metrics captured under `system.disk`. + +### Metric: `system.disk.io` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.disk.io` | Counter | `By` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.disk.direction` | string | The disk operation direction | `read` | Recommended | + +`system.disk.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `read` | read | +| `write` | write | + + +### Metric: `system.disk.operations` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.disk.operations` | Counter | `{operation}` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.disk.direction` | string | The disk operation direction | `read` | Recommended | + +`system.disk.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `read` | read | +| `write` | write | + + +### Metric: `system.disk.io_time` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.disk.io_time` | Counter | `s` | Time disk spent activated [1] | + +**[1]:** The real elapsed time ("wall clock") used in the I/O path (time from operations running in parallel are not counted). Measured as: + +- Linux: Field 13 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) +- Windows: The complement of + ["Disk\% IdleTime"](https://docs.microsoft.com/en-us/archive/blogs/askcore/windows-performance-monitor-disk-counters-explained#windows-performance-monitor-disk-counters-explained) + %20to%200%20(meaning%20always%20busy).) performance counter: `uptime * (100 - "Disk\% Idle Time") / 100` + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | + + +### Metric: `system.disk.operation_time` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.disk.operation_time` | Counter | `s` | Sum of the time each operation took to complete [1] | + +**[1]:** Because it is the sum of time each request took, parallel-issued requests each contribute to make the count grow. Measured as: + +- Linux: Fields 7 & 11 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) +- Windows: "Avg. Disk sec/Read" perf counter multiplied by "Disk Reads/sec" perf counter (similar for Writes) + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.disk.direction` | string | The disk operation direction | `read` | Recommended | + +`system.disk.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `read` | read | +| `write` | write | + + +### Metric: `system.disk.merged` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.disk.merged` | Counter | `{operation}` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.disk.direction` | string | The disk operation direction | `read` | Recommended | + +`system.disk.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `read` | read | +| `write` | write | + + +## Filesystem Metrics + +**Description:** System level filesystem metrics captured under `system.filesystem`. + +### Metric: `system.filesystem.usage` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.filesystem.usage` | UpDownCounter | `By` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.filesystem.mode` | string | The filesystem mode | `rw, ro` | Recommended | +| `system.filesystem.mountpoint` | string | The filesystem mount path | `/mnt/data` | Recommended | +| `system.filesystem.state` | string | The filesystem state | `used` | Recommended | +| `system.filesystem.type` | string | The filesystem type | `ext4` | Recommended | + +`system.filesystem.state` MUST be one of the following: + +| Value | Description | +|---|---| +| `used` | used | +| `free` | free | +| `reserved` | reserved | + +`system.filesystem.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `fat32` | fat32 | +| `exfat` | exfat | +| `ntfs` | ntfs | +| `refs` | refs | +| `hfsplus` | hfsplus | +| `ext4` | ext4 | + + +### Metric: `system.filesystem.utilization` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.filesystem.utilization` | Gauge | `1` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.filesystem.mode` | string | The filesystem mode | `rw, ro` | Recommended | +| `system.filesystem.mountpoint` | string | The filesystem mount path | `/mnt/data` | Recommended | +| `system.filesystem.state` | string | The filesystem state | `used` | Recommended | +| `system.filesystem.type` | string | The filesystem type | `ext4` | Recommended | + +`system.filesystem.state` MUST be one of the following: + +| Value | Description | +|---|---| +| `used` | used | +| `free` | free | +| `reserved` | reserved | + +`system.filesystem.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `fat32` | fat32 | +| `exfat` | exfat | +| `ntfs` | ntfs | +| `refs` | refs | +| `hfsplus` | hfsplus | +| `ext4` | ext4 | + + +## Network Metrics + +**Description:** System level network metrics captured under `system.network`. + +### Metric: `system.network.dropped` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.network.dropped` | Counter | `{packet}` | Count of packets that are dropped or discarded even though there was no error [1] | + +**[1]:** Measured as: + +- Linux: the `drop` column in `/proc/dev/net` ([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)) +- Windows: [`InDiscards`/`OutDiscards`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) + from [`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2) + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.network.direction` | string | | `transmit` | Recommended | + +`system.network.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `transmit` | transmit | +| `receive` | receive | + + +### Metric: `system.network.packets` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.network.packets` | Counter | `{packet}` | Count of packets that are dropped or discarded even though there was no error | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.network.direction` | string | | `transmit` | Recommended | + +`system.network.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `transmit` | transmit | +| `receive` | receive | + + +### Metric: `system.network.errors` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.network.errors` | Counter | `{error}` | Count of network errors detected [1] | + +**[1]:** Measured as: + +- Linux: the `errs` column in `/proc/dev/net` ([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)). +- Windows: [`InErrors`/`OutErrors`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) + from [`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2). + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.network.direction` | string | | `transmit` | Recommended | + +`system.network.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `transmit` | transmit | +| `receive` | receive | + + +### Metric: `system.network.io` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.network.io` | Counter | `By` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.network.direction` | string | | `transmit` | Recommended | + +`system.network.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `transmit` | transmit | +| `receive` | receive | + + +### Metric: `system.network.connections` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.network.connections` | UpDownCounter | `{connection}` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| [`network.transport`](../general/general-attributes.md) | string | [OSI Transport Layer](https://osi-model.com/transport-layer/) or [Inter-process Communication method](https://en.wikipedia.org/wiki/Inter-process_communication). The value SHOULD be normalized to lowercase. | `tcp`; `udp` | Recommended | +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.network.state` | string | | `close_wait` | Recommended | + +`network.transport` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `tcp` | TCP | +| `udp` | UDP | +| `pipe` | Named or anonymous pipe. See note below. | +| `unix` | Unix domain socket | + +`system.network.state` MUST be one of the following: + +| Value | Description | +|---|---| +| `close` | close | +| `close_wait` | close_wait | +| `closing` | closing | +| `delete` | delete | +| `established` | established | +| `fin_wait_1` | fin_wait_1 | +| `fin_wait_2` | fin_wait_2 | +| `last_ack` | last_ack | +| `listen` | listen | +| `syn_recv` | syn_recv | +| `syn_sent` | syn_sent | +| `time_wait` | time_wait | + +## Aggregate System Process Metrics + +**Description:** System level aggregate process metrics captured under `system.processes`. +For metrics at the individual process level, see [process metrics](process-metrics.md). + +### Metric: `system.processes.count` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.processes.count` | UpDownCounter | `{process}` | Total number of processes in each state | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `status` | string | [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) | `running, sleeping` | Recommended | + +`status` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `running` | running | +| `sleeping` | sleeping | +| `stopped` | stopped | +| `defunct` | defunct | + + +### Metric: `system.processes.created` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.processes.created` | Counter | `{process}` | Total number of processes created over uptime of the host | + + + + + +## `system.{os}.` - OS Specific System Metrics Instrument names for system level metrics that have different and conflicting meaning across multiple OSes should be prefixed with `system.{os}.` and @@ -189,3 +698,4 @@ An instrument for load average over 1 minute on Linux could be named an `{os}` prefix to split this metric across OSes. [DocumentStatus]: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.21.0/specification/document-status.md +[MetricRecommended]: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.21.0/specification/metrics/metric-requirement-level.md#recommended diff --git a/model/metrics/system-metrics.yaml b/model/metrics/system-metrics.yaml new file mode 100644 index 0000000000..1004049f5f --- /dev/null +++ b/model/metrics/system-metrics.yaml @@ -0,0 +1,456 @@ +groups: + # General system attributes + - id: attributes.system + prefix: system + type: attribute_group + brief: "Describes System metric attributes" + attributes: + - id: device + type: string + brief: "The device identifier" + examples: ["(identifier)"] + + # sytem.cpu.* metrics and attribute group + - id: attributes.system.cpu + prefix: system.cpu + type: attribute_group + brief: "Describes System CPU metric attributes" + attributes: + - id: state + type: + allow_custom_values: true + members: + - id: idle + value: 'idle' + - id: user + value: 'user' + - id: system + value: 'system' + - id: interrupt + value: 'interrupt' + brief: "The state of the CPU" + examples: ["idle", "interrupt"] + - id: cpu + type: int + brief: "The CPU number [0..n-1]" + examples: [1] + + - id: metric.system.cpu.time + type: metric + metric_name: system.cpu.time + brief: "" + instrument: counter + unit: "s" + attributes: + - ref: system.cpu.state + - ref: system.cpu.cpu + + - id: metric.system.cpu.utilization + type: metric + metric_name: system.cpu.utilization + brief: "Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of CPUs" + instrument: gauge + unit: "1" + attributes: + - ref: system.cpu.state + - ref: system.cpu.cpu + + # sytem.memory.* metrics and attribute group + - id: attributes.system.memory + prefix: system.memory + type: attribute_group + brief: "Describes System Memory metric attributes" + attributes: + - id: state + type: + allow_custom_values: true + members: + - id: used + value: 'used' + - id: free + value: 'free' + - id: cached + value: 'cached' + brief: "The memory state" + examples: ["free", "cached"] + + - id: metric.system.memory.usage + type: metric + metric_name: system.memory.usage + brief: "" + instrument: updowncounter + unit: "By" + attributes: + - ref: system.memory.state + + - id: metric.system.memory.utilization + type: metric + metric_name: system.memory.utilization + brief: "" + instrument: gauge + unit: "1" + attributes: + - ref: system.memory.state + + # system.paging.* metrics and attribute group + - id: attributes.system.paging + prefix: system.paging + type: attribute_group + brief: "Describes System Memory Paging metric attributes" + attributes: + - id: state + type: + allow_custom_values: false + members: + - id: used + value: 'used' + - id: free + value: 'free' + brief: "The memory paging state" + examples: ["free"] + - id: type + type: + allow_custom_values: false + members: + - id: major + value: 'major' + - id: minor + value: 'minor' + brief: "The memory paging type" + examples: ["minor"] + - id: direction + type: + allow_custom_values: false + members: + - id: in + value: 'in' + - id: out + value: 'out' + brief: "The paging access direction" + examples: ["in"] + - id: metric.system.paging.usage + type: metric + metric_name: system.paging.usage + brief: "Unix swap or windows pagefile usage" + instrument: updowncounter + unit: "By" + attributes: + - ref: system.paging.state + + - id: metric.system.paging.utilization + type: metric + metric_name: system.paging.utilization + brief: "" + instrument: gauge + unit: "1" + attributes: + - ref: system.paging.state + + - id: metric.system.paging.faults + type: metric + metric_name: system.paging.faults + brief: "" + instrument: counter + unit: "{fault}" + attributes: + - ref: system.paging.type + + - id: metric.system.paging.operations + type: metric + metric_name: system.paging.operations + brief: "" + instrument: counter + unit: "{operation}" + attributes: + - ref: system.paging.type + - ref: system.paging.direction + + # system.disk.* metrics and attribute group + - id: attributes.system.disk + prefix: system.disk + type: attribute_group + brief: "Describes System Disk metric attributes" + attributes: + - id: direction + type: + allow_custom_values: false + members: + - id: read + value: 'read' + - id: write + value: 'write' + brief: "The disk operation direction" + examples: ["read"] + + - id: metric.system.disk.io + type: metric + metric_name: system.disk.io + brief: "" + instrument: counter + unit: "By" + attributes: + - ref: system.device + - ref: system.disk.direction + + - id: metric.system.disk.operations + type: metric + metric_name: system.disk.operations + brief: "" + instrument: counter + unit: "{operation}" + attributes: + - ref: system.device + - ref: system.disk.direction + + - id: metric.system.disk.io_time + type: metric + metric_name: system.disk.io_time + brief: "Time disk spent activated" + instrument: counter + unit: "s" + note: | + The real elapsed time ("wall clock") used in the I/O path (time from operations running in parallel are not counted). Measured as: + + - Linux: Field 13 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) + - Windows: The complement of + ["Disk\% IdleTime"](https://docs.microsoft.com/en-us/archive/blogs/askcore/windows-performance-monitor-disk-counters-explained#windows-performance-monitor-disk-counters-explained) + %20to%200%20(meaning%20always%20busy).) performance counter: `uptime * (100 - "Disk\% Idle Time") / 100` + attributes: + - ref: system.device + + - id: metric.system.disk.operation_time + type: metric + metric_name: system.disk.operation_time + brief: "Sum of the time each operation took to complete" + instrument: counter + unit: "s" + note: | + Because it is the sum of time each request took, parallel-issued requests each contribute to make the count grow. Measured as: + + - Linux: Fields 7 & 11 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) + - Windows: "Avg. Disk sec/Read" perf counter multiplied by "Disk Reads/sec" perf counter (similar for Writes) + attributes: + - ref: system.device + - ref: system.disk.direction + + - id: metric.system.disk.merged + type: metric + metric_name: system.disk.merged + brief: "" + instrument: counter + unit: "{operation}" + attributes: + - ref: system.device + - ref: system.disk.direction + + # system.filesystem.* metrics and attribute group + - id: attributes.system.filesystem + prefix: system.filesystem + type: attribute_group + brief: "Describes Filesystem metric attributes" + attributes: + - id: state + brief: "The filesystem state" + type: + allow_custom_values: false + members: + - id: used + value: 'used' + - id: free + value: 'free' + - id: reserved + value: 'reserved' + examples: ["used"] + - id: type + type: + allow_custom_values: true + members: + - id: fat32 + value: 'fat32' + - id: exfat + value: 'exfat' + - id: ntfs + value: 'ntfs' + - id: refs + value: 'refs' + - id: hfsplus + value: 'hfsplus' + - id: ext4 + value: 'ext4' + brief: "The filesystem type" + examples: ["ext4"] + - id: mode + type: string + brief: "The filesystem mode" + examples: ["rw, ro"] + - id: mountpoint + type: string + brief: "The filesystem mount path" + examples: ["/mnt/data"] + + - id: metric.system.filesystem.usage + type: metric + metric_name: system.filesystem.usage + brief: "" + instrument: updowncounter + unit: "By" + attributes: + - ref: system.device + - ref: system.filesystem.state + - ref: system.filesystem.type + - ref: system.filesystem.mode + - ref: system.filesystem.mountpoint + + - id: metric.system.filesystem.utilization + type: metric + metric_name: system.filesystem.utilization + brief: "" + instrument: gauge + unit: "1" + attributes: + - ref: system.device + - ref: system.filesystem.state + - ref: system.filesystem.type + - ref: system.filesystem.mode + - ref: system.filesystem.mountpoint + + # system.network.* metrics and attribute group + - id: attributes.system.network + prefix: system.network + type: attribute_group + brief: "Describes Network metric attributes" + attributes: + - id: direction + type: + allow_custom_values: false + members: + - id: transmit + value: 'transmit' + - id: receive + value: 'receive' + brief: "" + examples: ["transmit"] + - id: state + type: + allow_custom_values: false + members: + - id: close + value: 'close' + - id: close_wait + value: 'close_wait' + - id: closing + value: 'closing' + - id: delete + value: 'delete' + - id: established + value: 'established' + - id: fin_wait_1 + value: 'fin_wait_1' + - id: fin_wait_2 + value: 'fin_wait_2' + - id: last_ack + value: 'last_ack' + - id: listen + value: 'listen' + - id: syn_recv + value: 'syn_recv' + - id: syn_sent + value: 'syn_sent' + - id: time_wait + value: 'time_wait' + brief: "" + examples: ["close_wait"] + + - id: metric.system.network.dropped + type: metric + metric_name: system.network.dropped + brief: "Count of packets that are dropped or discarded even though there was no error" + instrument: counter + unit: "{packet}" + note: | + Measured as: + + - Linux: the `drop` column in `/proc/dev/net` ([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)) + - Windows: [`InDiscards`/`OutDiscards`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) + from [`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2) + attributes: + - ref: system.device + - ref: system.network.direction + + - id: metric.system.network.packets + type: metric + metric_name: system.network.packets + brief: "Count of packets that are dropped or discarded even though there was no error" + instrument: counter + unit: "{packet}" + attributes: + - ref: system.device + - ref: system.network.direction + + - id: metric.system.network.errors + type: metric + metric_name: system.network.errors + brief: "Count of network errors detected" + instrument: counter + unit: "{error}" + note: | + Measured as: + + - Linux: the `errs` column in `/proc/dev/net` ([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)). + - Windows: [`InErrors`/`OutErrors`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) + from [`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2). + attributes: + - ref: system.device + - ref: system.network.direction + + - id: metric.system.network.io + type: metric + metric_name: system.network.io + brief: "" + instrument: counter + unit: "By" + attributes: + - ref: system.device + - ref: system.network.direction + + - id: metric.system.network.connections + type: metric + metric_name: system.network.connections + brief: "" + instrument: updowncounter + unit: "{connection}" + attributes: + - ref: system.device + - ref: system.network.state + - ref: network.transport + + # system.processes.* metrics and attribute group + - id: metric.system.processes.count + type: metric + metric_name: system.processes.count + brief: "Total number of processes in each state" + instrument: updowncounter + unit: "{process}" + attributes: + - id: status + type: + allow_custom_values: true + members: + - id: running + value: 'running' + - id: sleeping + value: 'sleeping' + - id: stopped + value: 'stopped' + - id: defunct + value: 'defunct' + brief: > + [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) + examples: ["running, sleeping"] + + - id: metric.system.processes.created + type: metric + metric_name: system.processes.created + brief: "Total number of processes created over uptime of the host" + instrument: counter + unit: "{process}" From 988a16e436200c7db7944a584601276597554474 Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Wed, 2 Aug 2023 10:11:18 +0200 Subject: [PATCH 02/11] Add new metrics --- docs/system/system-metrics.md | 64 ++++++++++++++++++++++--------- model/metrics/system-metrics.yaml | 56 +++++++++++++++++++-------- 2 files changed, 86 insertions(+), 34 deletions(-) diff --git a/docs/system/system-metrics.md b/docs/system/system-metrics.md index 20f6748857..c2c3057ea2 100644 --- a/docs/system/system-metrics.md +++ b/docs/system/system-metrics.md @@ -18,6 +18,8 @@ instruments not explicitly defined in the specification. - [Processor Metrics](#processor-metrics) * [Metric: `system.cpu.time`](#metric-systemcputime) * [Metric: `system.cpu.utilization`](#metric-systemcpuutilization) + * [Metric: `system.cpu.physical.count`](#metric-systemcpuphysicalcount) + * [Metric: `system.cpu.logical.count`](#metric-systemcpulogicalcount) - [Memory Metrics](#memory-metrics) * [Metric: `system.memory.usage`](#metric-systemmemoryusage) * [Metric: `system.memory.utilization`](#metric-systemmemoryutilization) @@ -50,7 +52,7 @@ instruments not explicitly defined in the specification. ## Processor Metrics -**Description:** System level processor metrics captured under `system.cpu`. +**Description:** System level processor metrics captured under the namespace `system.cpu`. ### Metric: `system.cpu.time` @@ -59,13 +61,13 @@ This metric is [recommended][MetricRecommended]. | Name | Instrument Type | Unit (UCUM) | Description | | -------- | --------------- | ----------- | -------------- | -| `system.cpu.time` | Counter | `s` | | +| `system.cpu.time` | Counter | `s` | Seconds each logical CPU spent on each mode | | Attribute | Type | Description | Examples | Requirement Level | |---|---|---|---|---| -| `system.cpu.cpu` | int | The CPU number [0..n-1] | `1` | Recommended | +| `system.cpu.cpu` | int | The logical CPU number [0..n-1] | `1` | Recommended | | `system.cpu.state` | string | The state of the CPU | `idle`; `interrupt` | Recommended | `system.cpu.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. @@ -85,13 +87,13 @@ This metric is [recommended][MetricRecommended]. | Name | Instrument Type | Unit (UCUM) | Description | | -------- | --------------- | ----------- | -------------- | -| `system.cpu.utilization` | Gauge | `1` | Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of CPUs | +| `system.cpu.utilization` | Gauge | `1` | Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of logical CPUs | | Attribute | Type | Description | Examples | Requirement Level | |---|---|---|---|---| -| `system.cpu.cpu` | int | The CPU number [0..n-1] | `1` | Recommended | +| `system.cpu.cpu` | int | The logical CPU number [0..n-1] | `1` | Recommended | | `system.cpu.state` | string | The state of the CPU | `idle`; `interrupt` | Recommended | `system.cpu.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. @@ -104,9 +106,35 @@ This metric is [recommended][MetricRecommended]. | `interrupt` | interrupt | +### Metric: `system.cpu.physical.count` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.cpu.physical.count` | UpDownCounter | `{cpu}` | Reports the number of actual physical processor cores on the hardware | + + + + + +### Metric: `system.cpu.logical.count` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.cpu.logical.count` | UpDownCounter | `{cpu}` | Reports the number of logical (virtual) processor cores created by the operating system to manage multitasking | + + + + + ## Memory Metrics -**Description:** System level memory metrics capture under `system.memory`. +**Description:** System level memory metrics capture under the namespace `system.memory`. This does not include [paging/swap memory](#pagingswap-metrics). ### Metric: `system.memory.usage` @@ -159,7 +187,7 @@ This metric is [recommended][MetricRecommended]. ## Paging/Swap Metrics -**Description:** System level paging/swap memory metrics captured under `system.paging`. +**Description:** System level paging/swap memory metrics captured under the namespace `system.paging`. ### Metric: `system.paging.usage` @@ -263,7 +291,7 @@ This metric is [recommended][MetricRecommended]. ## Disk Controller Metrics -**Description:** System level disk performance metrics captured under `system.disk`. +**Description:** System level disk performance metrics captured under the namespace `system.disk`. ### Metric: `system.disk.io` @@ -326,8 +354,8 @@ This metric is [recommended][MetricRecommended]. - Linux: Field 13 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) - Windows: The complement of - ["Disk\% IdleTime"](https://docs.microsoft.com/en-us/archive/blogs/askcore/windows-performance-monitor-disk-counters-explained#windows-performance-monitor-disk-counters-explained) - %20to%200%20(meaning%20always%20busy).) performance counter: `uptime * (100 - "Disk\% Idle Time") / 100` + ["Disk\% Idle Time"](https://learn.microsoft.com/en-us/archive/blogs/askcore/windows-performance-monitor-disk-counters-explained#windows-performance-monitor-disk-counters-explained) + performance counter: `uptime * (100 - "Disk\% Idle Time") / 100` @@ -391,7 +419,7 @@ This metric is [recommended][MetricRecommended]. ## Filesystem Metrics -**Description:** System level filesystem metrics captured under `system.filesystem`. +**Description:** System level filesystem metrics captured under the namespace `system.filesystem`. ### Metric: `system.filesystem.usage` @@ -473,7 +501,7 @@ This metric is [recommended][MetricRecommended]. ## Network Metrics -**Description:** System level network metrics captured under `system.network`. +**Description:** System level network metrics captured under the namespace `system.network`. ### Metric: `system.network.dropped` @@ -512,7 +540,7 @@ This metric is [recommended][MetricRecommended]. | Name | Instrument Type | Unit (UCUM) | Description | | -------- | --------------- | ----------- | -------------- | -| `system.network.packets` | Counter | `{packet}` | Count of packets that are dropped or discarded even though there was no error | +| `system.network.packets` | Counter | `{packet}` | | @@ -596,9 +624,9 @@ This metric is [recommended][MetricRecommended]. | Attribute | Type | Description | Examples | Requirement Level | |---|---|---|---|---| -| [`network.transport`](../general/general-attributes.md) | string | [OSI Transport Layer](https://osi-model.com/transport-layer/) or [Inter-process Communication method](https://en.wikipedia.org/wiki/Inter-process_communication). The value SHOULD be normalized to lowercase. | `tcp`; `udp` | Recommended | +| [`network.transport`](../general/attributes.md) | string | [OSI Transport Layer](https://osi-model.com/transport-layer/) or [Inter-process Communication method](https://en.wikipedia.org/wiki/Inter-process_communication). The value SHOULD be normalized to lowercase. | `tcp`; `udp` | Recommended | | `system.device` | string | The device identifier | `(identifier)` | Recommended | -| `system.network.state` | string | | `close_wait` | Recommended | +| `system.network.state` | string | A stateless protocol MUST NOT set this attribute | `close_wait` | Recommended | `network.transport` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. @@ -628,7 +656,7 @@ This metric is [recommended][MetricRecommended]. ## Aggregate System Process Metrics -**Description:** System level aggregate process metrics captured under `system.processes`. +**Description:** System level aggregate process metrics captured under the namespace `system.processes`. For metrics at the individual process level, see [process metrics](process-metrics.md). ### Metric: `system.processes.count` @@ -644,9 +672,9 @@ This metric is [recommended][MetricRecommended]. | Attribute | Type | Description | Examples | Requirement Level | |---|---|---|---|---| -| `status` | string | [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) | `running, sleeping` | Recommended | +| `system.processes.status` | string | The process state, e.g., [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) | `running` | Recommended | -`status` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. +`system.processes.status` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. | Value | Description | |---|---| diff --git a/model/metrics/system-metrics.yaml b/model/metrics/system-metrics.yaml index 1004049f5f..5c62fe00fe 100644 --- a/model/metrics/system-metrics.yaml +++ b/model/metrics/system-metrics.yaml @@ -32,13 +32,13 @@ groups: examples: ["idle", "interrupt"] - id: cpu type: int - brief: "The CPU number [0..n-1]" + brief: "The logical CPU number [0..n-1]" examples: [1] - id: metric.system.cpu.time type: metric metric_name: system.cpu.time - brief: "" + brief: "Seconds each logical CPU spent on each mode" instrument: counter unit: "s" attributes: @@ -48,13 +48,29 @@ groups: - id: metric.system.cpu.utilization type: metric metric_name: system.cpu.utilization - brief: "Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of CPUs" + brief: "Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of logical CPUs" instrument: gauge unit: "1" attributes: - ref: system.cpu.state - ref: system.cpu.cpu + - id: metric.system.cpu.physical.count + type: metric + metric_name: system.cpu.physical.count + brief: "Reports the number of actual physical processor cores on the hardware" + instrument: updowncounter + unit: "{cpu}" + attributes: [] + + - id: metric.system.cpu.logical.count + type: metric + metric_name: system.cpu.logical.count + brief: "Reports the number of logical (virtual) processor cores created by the operating system to manage multitasking" + instrument: updowncounter + unit: "{cpu}" + attributes: [] + # sytem.memory.* metrics and attribute group - id: attributes.system.memory prefix: system.memory @@ -213,8 +229,8 @@ groups: - Linux: Field 13 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) - Windows: The complement of - ["Disk\% IdleTime"](https://docs.microsoft.com/en-us/archive/blogs/askcore/windows-performance-monitor-disk-counters-explained#windows-performance-monitor-disk-counters-explained) - %20to%200%20(meaning%20always%20busy).) performance counter: `uptime * (100 - "Disk\% Idle Time") / 100` + ["Disk\% Idle Time"](https://learn.microsoft.com/en-us/archive/blogs/askcore/windows-performance-monitor-disk-counters-explained#windows-performance-monitor-disk-counters-explained) + performance counter: `uptime * (100 - "Disk\% Idle Time") / 100` attributes: - ref: system.device @@ -358,7 +374,7 @@ groups: value: 'syn_sent' - id: time_wait value: 'time_wait' - brief: "" + brief: "A stateless protocol MUST NOT set this attribute" examples: ["close_wait"] - id: metric.system.network.dropped @@ -380,7 +396,7 @@ groups: - id: metric.system.network.packets type: metric metric_name: system.network.packets - brief: "Count of packets that are dropped or discarded even though there was no error" + brief: "" instrument: counter unit: "{packet}" attributes: @@ -424,13 +440,11 @@ groups: - ref: system.network.state - ref: network.transport - # system.processes.* metrics and attribute group - - id: metric.system.processes.count - type: metric - metric_name: system.processes.count - brief: "Total number of processes in each state" - instrument: updowncounter - unit: "{process}" + # system.processes.* metrics and attribute group + - id: attributes.system.processes + prefix: system.processes + type: attribute_group + brief: "Describes System Processes metric attributes" attributes: - id: status type: @@ -445,8 +459,18 @@ groups: - id: defunct value: 'defunct' brief: > - [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) - examples: ["running, sleeping"] + The process state, e.g., [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) + examples: ["running"] + + + - id: metric.system.processes.count + type: metric + metric_name: system.processes.count + brief: "Total number of processes in each state" + instrument: updowncounter + unit: "{process}" + attributes: + - ref: system.processes.status - id: metric.system.processes.created type: metric From a474ca38132382b851b4749befbb8f1919faa3cc Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Thu, 3 Aug 2023 09:33:13 +0200 Subject: [PATCH 03/11] Expand system.cpu.state enum values --- docs/system/system-metrics.md | 10 ++++++++-- model/metrics/system-metrics.yaml | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/system/system-metrics.md b/docs/system/system-metrics.md index c2c3057ea2..a117a63963 100644 --- a/docs/system/system-metrics.md +++ b/docs/system/system-metrics.md @@ -74,10 +74,13 @@ This metric is [recommended][MetricRecommended]. | Value | Description | |---|---| -| `idle` | idle | | `user` | user | | `system` | system | +| `nice` | nice | +| `idle` | idle | +| `iowait` | iowait | | `interrupt` | interrupt | +| `steal` | steal | ### Metric: `system.cpu.utilization` @@ -100,10 +103,13 @@ This metric is [recommended][MetricRecommended]. | Value | Description | |---|---| -| `idle` | idle | | `user` | user | | `system` | system | +| `nice` | nice | +| `idle` | idle | +| `iowait` | iowait | | `interrupt` | interrupt | +| `steal` | steal | ### Metric: `system.cpu.physical.count` diff --git a/model/metrics/system-metrics.yaml b/model/metrics/system-metrics.yaml index 5c62fe00fe..eaae33427a 100644 --- a/model/metrics/system-metrics.yaml +++ b/model/metrics/system-metrics.yaml @@ -20,14 +20,20 @@ groups: type: allow_custom_values: true members: - - id: idle - value: 'idle' - id: user value: 'user' - id: system value: 'system' + - id: nice + value: 'nice' + - id: idle + value: 'idle' + - id: iowait + value: 'iowait' - id: interrupt value: 'interrupt' + - id: steal + value: 'steal' brief: "The state of the CPU" examples: ["idle", "interrupt"] - id: cpu From 4f882159f7967f9c5c33bcf08d84e4118a4c3c1f Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Thu, 3 Aug 2023 09:37:24 +0200 Subject: [PATCH 04/11] Expand system.memory.state enum values --- docs/system/system-metrics.md | 6 ++++++ model/metrics/system-metrics.yaml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/docs/system/system-metrics.md b/docs/system/system-metrics.md index a117a63963..d08900e8c8 100644 --- a/docs/system/system-metrics.md +++ b/docs/system/system-metrics.md @@ -162,8 +162,11 @@ This metric is [recommended][MetricRecommended]. | Value | Description | |---|---| +| `total` | total | | `used` | used | | `free` | free | +| `shared` | shared | +| `buffers` | buffers | | `cached` | cached | @@ -186,8 +189,11 @@ This metric is [recommended][MetricRecommended]. | Value | Description | |---|---| +| `total` | total | | `used` | used | | `free` | free | +| `shared` | shared | +| `buffers` | buffers | | `cached` | cached | diff --git a/model/metrics/system-metrics.yaml b/model/metrics/system-metrics.yaml index eaae33427a..2e8fb71ea0 100644 --- a/model/metrics/system-metrics.yaml +++ b/model/metrics/system-metrics.yaml @@ -87,10 +87,16 @@ groups: type: allow_custom_values: true members: + - id: total + value: 'total' - id: used value: 'used' - id: free value: 'free' + - id: shared + value: 'shared' + - id: buffers + value: 'buffers' - id: cached value: 'cached' brief: "The memory state" From 19cb403c98d10562214187631d2b572fa776b358 Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Thu, 3 Aug 2023 09:40:25 +0200 Subject: [PATCH 05/11] Change system.cpu.cpu attribute to system.cpu.logical --- docs/system/system-metrics.md | 4 ++-- model/metrics/system-metrics.yaml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/system/system-metrics.md b/docs/system/system-metrics.md index d08900e8c8..d44639e40c 100644 --- a/docs/system/system-metrics.md +++ b/docs/system/system-metrics.md @@ -67,7 +67,7 @@ This metric is [recommended][MetricRecommended]. | Attribute | Type | Description | Examples | Requirement Level | |---|---|---|---|---| -| `system.cpu.cpu` | int | The logical CPU number [0..n-1] | `1` | Recommended | +| `system.cpu.logical` | int | The logical CPU number [0..n-1] | `1` | Recommended | | `system.cpu.state` | string | The state of the CPU | `idle`; `interrupt` | Recommended | `system.cpu.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. @@ -96,7 +96,7 @@ This metric is [recommended][MetricRecommended]. | Attribute | Type | Description | Examples | Requirement Level | |---|---|---|---|---| -| `system.cpu.cpu` | int | The logical CPU number [0..n-1] | `1` | Recommended | +| `system.cpu.logical` | int | The logical CPU number [0..n-1] | `1` | Recommended | | `system.cpu.state` | string | The state of the CPU | `idle`; `interrupt` | Recommended | `system.cpu.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. diff --git a/model/metrics/system-metrics.yaml b/model/metrics/system-metrics.yaml index 2e8fb71ea0..90a1b8dc86 100644 --- a/model/metrics/system-metrics.yaml +++ b/model/metrics/system-metrics.yaml @@ -36,7 +36,7 @@ groups: value: 'steal' brief: "The state of the CPU" examples: ["idle", "interrupt"] - - id: cpu + - id: logical type: int brief: "The logical CPU number [0..n-1]" examples: [1] @@ -49,7 +49,7 @@ groups: unit: "s" attributes: - ref: system.cpu.state - - ref: system.cpu.cpu + - ref: system.cpu.logical - id: metric.system.cpu.utilization type: metric @@ -59,7 +59,7 @@ groups: unit: "1" attributes: - ref: system.cpu.state - - ref: system.cpu.cpu + - ref: system.cpu.logical - id: metric.system.cpu.physical.count type: metric From a2f9ec4a4ef40bbf00f98c3aad3dcffbc1a5ef0d Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Thu, 10 Aug 2023 13:25:33 +0200 Subject: [PATCH 06/11] Add changelog and schema file changes --- CHANGELOG.md | 27 ++++++++++++++++++++ schema-next.yaml | 64 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2009f95066..b6ea5f9277 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,33 @@ release. ([#88](https://github.com/open-telemetry/semantic-conventions/pull/88)) The conventions cover metrics that are recorded by the FaaS itself and not by clients invoking them. +- BREAKING: Generate System metrics semconv from YAML. + ([#89](https://github.com/open-telemetry/semantic-conventions/pull/89)) + - Rename attributes for `system.cpu.*` metrics: + - `state` to `system.cpu.state` + - `cpu` to `system.cpu.logical` + - Rename attributes for `system.memory.*` metrics: + - `state` to `system.memory.state` + - Rename attributes for `system.paging.*` metrics: + - `state` to `system.paging.state` + - `type` to `system.paging.type` + - `direction` to `system.paging.direction` + - Rename attributes for `system.disk.*` metrics: + - `device` to `system.device` + - `direction` to `system.disk.direction` + - Rename attributes for `system.filesystem.*` metrics: + - `device` to `system.device` + - `state` to `system.filesystem.state` + - `type` to `system.filesystem.type` + - `mode` to `system.filesystem.mode` + - `mountpoint` to `system.filesystem.mountpoint` + - Rename attributes for `system.network.*` metrics: + - `device` to `system.device` + - `direction` to `system.network.direction` + - `protocol` to `network.protocol` + - `state` to `system.network.state` + - Rename attributes for `system.processes.*` metrics: + - `status` to `system.processes.status` ## v1.21.0 (2023-07-13) diff --git a/schema-next.yaml b/schema-next.yaml index 14e82faac1..d2fc1ed1f4 100644 --- a/schema-next.yaml +++ b/schema-next.yaml @@ -8,6 +8,70 @@ versions: - rename_metrics: http.client.duration: http.client.request.duration http.server.duration: http.server.request.duration + # https://github.com/open-telemetry/semantic-conventions/pull/89 + - rename_attributes: + attribute_map: + state: system.cpu.state + cpu: system.cpu.logical + apply_to_metrics: + - system.cpu.time + - system.cpu.utilization + - rename_attributes: + attribute_map: + state: system.memory.state + apply_to_metrics: + - system.memory.usage + - system.memory.utilization + - rename_attributes: + attribute_map: + state: system.paging.state + apply_to_metrics: + - system.paging.usage + - system.paging.utilization + - rename_attributes: + attribute_map: + type: system.paging.type + direction: system.paging.direction + apply_to_metrics: + - system.paging.faults + - system.paging.operations + - rename_attributes: + attribute_map: + device: system.device + direction: system.disk.direction + apply_to_metrics: + - system.disk.io + - system.disk.operations + - system.disk.io_time + - system.disk.operation_time + - system.disk.merged + - rename_attributes: + attribute_map: + device: system.device + state: system.filesystem.state + type: system.filesystem.type + mode: system.filesystem.mode + mountpoint: system.filesystem.mountpoint + apply_to_metrics: + - system.filesystem.usage + - system.filesystem.utilization + - rename_attributes: + attribute_map: + device: system.device + direction: system.network.direction + protocol: network.protocol + state: system.network.state + apply_to_metrics: + - system.network.dropped + - system.network.packets + - system.network.errors + - system.network.io + - system.network.connections + - rename_attributes: + attribute_map: + status: system.processes.status + apply_to_metrics: + - system.processes.count 1.21.0: spans: changes: From a2d015e803d97272f8a89075afdea8153dd68aba Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Thu, 10 Aug 2023 13:33:09 +0200 Subject: [PATCH 07/11] Update model/metrics/system-metrics.yaml Co-authored-by: Pablo Baeyens --- model/metrics/system-metrics.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/metrics/system-metrics.yaml b/model/metrics/system-metrics.yaml index 90a1b8dc86..1830e1ba75 100644 --- a/model/metrics/system-metrics.yaml +++ b/model/metrics/system-metrics.yaml @@ -10,7 +10,7 @@ groups: brief: "The device identifier" examples: ["(identifier)"] - # sytem.cpu.* metrics and attribute group + # system.cpu.* metrics and attribute group - id: attributes.system.cpu prefix: system.cpu type: attribute_group From c8a4a8d482cf17741d7f1801577f89d5745058be Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Wed, 16 Aug 2023 10:13:46 +0200 Subject: [PATCH 08/11] Drop pluralization on system.process metric names --- CHANGELOG.md | 3 ++- docs/system/system-metrics.md | 26 +++++++++++++------------- model/metrics/system-metrics.yaml | 18 +++++++++--------- schema-next.yaml | 3 +++ 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a04dc260df..7c60d618c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,7 +49,8 @@ release. in `OTEL_SEMCONV_STABILITY_OPT_IN` ([#249](https://github.com/open-telemetry/semantic-conventions/pull/249)) - BREAKING: Generate System metrics semconv from YAML. - ([#89](https://github.com/open-telemetry/semantic-conventions/pull/89)) + ([#89](https://github.com/open-telemetry/semantic-conventions/pull/89)) + - Remove pluralization from process metrics `system.processes.*` to `system.process.*` - Rename attributes for `system.cpu.*` metrics: - `state` to `system.cpu.state` - `cpu` to `system.cpu.logical` diff --git a/docs/system/system-metrics.md b/docs/system/system-metrics.md index d44639e40c..8795bc819a 100644 --- a/docs/system/system-metrics.md +++ b/docs/system/system-metrics.md @@ -44,8 +44,8 @@ instruments not explicitly defined in the specification. * [Metric: `system.network.io`](#metric-systemnetworkio) * [Metric: `system.network.connections`](#metric-systemnetworkconnections) - [Aggregate System Process Metrics](#aggregate-system-process-metrics) - * [Metric: `system.processes.count`](#metric-systemprocessescount) - * [Metric: `system.processes.created`](#metric-systemprocessescreated) + * [Metric: `system.process.count`](#metric-systemprocesscount) + * [Metric: `system.process.created`](#metric-systemprocesscreated) - [`system.{os}.` - OS Specific System Metrics](#systemos---os-specific-system-metrics) @@ -668,25 +668,25 @@ This metric is [recommended][MetricRecommended]. ## Aggregate System Process Metrics -**Description:** System level aggregate process metrics captured under the namespace `system.processes`. +**Description:** System level aggregate process metrics captured under the namespace `system.process`. For metrics at the individual process level, see [process metrics](process-metrics.md). -### Metric: `system.processes.count` +### Metric: `system.process.count` This metric is [recommended][MetricRecommended]. - + | Name | Instrument Type | Unit (UCUM) | Description | | -------- | --------------- | ----------- | -------------- | -| `system.processes.count` | UpDownCounter | `{process}` | Total number of processes in each state | +| `system.process.count` | UpDownCounter | `{process}` | Total number of processes in each state | - + | Attribute | Type | Description | Examples | Requirement Level | |---|---|---|---|---| -| `system.processes.status` | string | The process state, e.g., [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) | `running` | Recommended | +| `system.process.status` | string | The process state, e.g., [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) | `running` | Recommended | -`system.processes.status` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. +`system.process.status` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. | Value | Description | |---|---| @@ -696,17 +696,17 @@ This metric is [recommended][MetricRecommended]. | `defunct` | defunct | -### Metric: `system.processes.created` +### Metric: `system.process.created` This metric is [recommended][MetricRecommended]. - + | Name | Instrument Type | Unit (UCUM) | Description | | -------- | --------------- | ----------- | -------------- | -| `system.processes.created` | Counter | `{process}` | Total number of processes created over uptime of the host | +| `system.process.created` | Counter | `{process}` | Total number of processes created over uptime of the host | - + ## `system.{os}.` - OS Specific System Metrics diff --git a/model/metrics/system-metrics.yaml b/model/metrics/system-metrics.yaml index 1830e1ba75..d6a5c99ee0 100644 --- a/model/metrics/system-metrics.yaml +++ b/model/metrics/system-metrics.yaml @@ -452,11 +452,11 @@ groups: - ref: system.network.state - ref: network.transport - # system.processes.* metrics and attribute group - - id: attributes.system.processes - prefix: system.processes + # system.process.* metrics and attribute group + - id: attributes.system.process + prefix: system.process type: attribute_group - brief: "Describes System Processes metric attributes" + brief: "Describes System Process metric attributes" attributes: - id: status type: @@ -475,18 +475,18 @@ groups: examples: ["running"] - - id: metric.system.processes.count + - id: metric.system.process.count type: metric - metric_name: system.processes.count + metric_name: system.process.count brief: "Total number of processes in each state" instrument: updowncounter unit: "{process}" attributes: - - ref: system.processes.status + - ref: system.process.status - - id: metric.system.processes.created + - id: metric.system.process.created type: metric - metric_name: system.processes.created + metric_name: system.process.created brief: "Total number of processes created over uptime of the host" instrument: counter unit: "{process}" diff --git a/schema-next.yaml b/schema-next.yaml index 6df492e124..4037c77a18 100644 --- a/schema-next.yaml +++ b/schema-next.yaml @@ -57,6 +57,9 @@ versions: - jvm.buffer.limit - jvm.buffer.count # https://github.com/open-telemetry/semantic-conventions/pull/89 + - rename_metrics: + system.processes.count: system.process.count + system.processes.created: system.process.created - rename_attributes: attribute_map: state: system.cpu.state From 9dab3ff0d85315f6a0245db76a7d1f9c00e56cc0 Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Mon, 21 Aug 2023 10:37:02 +0200 Subject: [PATCH 09/11] Fix 'processes" left-overs --- CHANGELOG.md | 2 +- schema-next.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c60d618c5..08507978ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,7 +75,7 @@ release. - `protocol` to `network.protocol` - `state` to `system.network.state` - Rename attributes for `system.processes.*` metrics: - - `status` to `system.processes.status` + - `status` to `system.process.status` ## v1.21.0 (2023-07-13) diff --git a/schema-next.yaml b/schema-next.yaml index 4037c77a18..3e800dc481 100644 --- a/schema-next.yaml +++ b/schema-next.yaml @@ -120,9 +120,9 @@ versions: - system.network.connections - rename_attributes: attribute_map: - status: system.processes.status + status: system.process.status apply_to_metrics: - - system.processes.count + - system.process.count 1.21.0: spans: changes: From c67ec3355437902751da0fd5248ec8c6ae1883d6 Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Thu, 7 Sep 2023 09:58:16 +0200 Subject: [PATCH 10/11] Revert system.processes pluralization change --- CHANGELOG.md | 3 +-- docs/system/system-metrics.md | 24 ++++++++++++------------ model/metrics/system-metrics.yaml | 16 ++++++++-------- schema-next.yaml | 7 ++----- 4 files changed, 23 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed8ffd12e0..d74fab3098 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,7 +61,6 @@ release. ([#276](https://github.com/open-telemetry/semantic-conventions/pull/276)) - BREAKING: Generate System metrics semconv from YAML. ([#89](https://github.com/open-telemetry/semantic-conventions/pull/89)) - - Remove pluralization from process metrics `system.processes.*` to `system.process.*` - Rename attributes for `system.cpu.*` metrics: - `state` to `system.cpu.state` - `cpu` to `system.cpu.logical` @@ -86,7 +85,7 @@ release. - `protocol` to `network.protocol` - `state` to `system.network.state` - Rename attributes for `system.processes.*` metrics: - - `status` to `system.process.status` + - `status` to `system.processes.status` ## v1.21.0 (2023-07-13) diff --git a/docs/system/system-metrics.md b/docs/system/system-metrics.md index 8795bc819a..bee2dcb0cb 100644 --- a/docs/system/system-metrics.md +++ b/docs/system/system-metrics.md @@ -44,8 +44,8 @@ instruments not explicitly defined in the specification. * [Metric: `system.network.io`](#metric-systemnetworkio) * [Metric: `system.network.connections`](#metric-systemnetworkconnections) - [Aggregate System Process Metrics](#aggregate-system-process-metrics) - * [Metric: `system.process.count`](#metric-systemprocesscount) - * [Metric: `system.process.created`](#metric-systemprocesscreated) + * [Metric: `system.processes.count`](#metric-systemprocessescount) + * [Metric: `system.processes.created`](#metric-systemprocessescreated) - [`system.{os}.` - OS Specific System Metrics](#systemos---os-specific-system-metrics) @@ -671,22 +671,22 @@ This metric is [recommended][MetricRecommended]. **Description:** System level aggregate process metrics captured under the namespace `system.process`. For metrics at the individual process level, see [process metrics](process-metrics.md). -### Metric: `system.process.count` +### Metric: `system.processes.count` This metric is [recommended][MetricRecommended]. - + | Name | Instrument Type | Unit (UCUM) | Description | | -------- | --------------- | ----------- | -------------- | -| `system.process.count` | UpDownCounter | `{process}` | Total number of processes in each state | +| `system.processes.count` | UpDownCounter | `{process}` | Total number of processes in each state | - + | Attribute | Type | Description | Examples | Requirement Level | |---|---|---|---|---| -| `system.process.status` | string | The process state, e.g., [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) | `running` | Recommended | +| `system.processes.status` | string | The process state, e.g., [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) | `running` | Recommended | -`system.process.status` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. +`system.processes.status` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. | Value | Description | |---|---| @@ -696,17 +696,17 @@ This metric is [recommended][MetricRecommended]. | `defunct` | defunct | -### Metric: `system.process.created` +### Metric: `system.processes.created` This metric is [recommended][MetricRecommended]. - + | Name | Instrument Type | Unit (UCUM) | Description | | -------- | --------------- | ----------- | -------------- | -| `system.process.created` | Counter | `{process}` | Total number of processes created over uptime of the host | +| `system.processes.created` | Counter | `{process}` | Total number of processes created over uptime of the host | - + ## `system.{os}.` - OS Specific System Metrics diff --git a/model/metrics/system-metrics.yaml b/model/metrics/system-metrics.yaml index d6a5c99ee0..809d0dff1d 100644 --- a/model/metrics/system-metrics.yaml +++ b/model/metrics/system-metrics.yaml @@ -452,9 +452,9 @@ groups: - ref: system.network.state - ref: network.transport - # system.process.* metrics and attribute group - - id: attributes.system.process - prefix: system.process + # system.processes.* metrics and attribute group + - id: attributes.system.processes + prefix: system.processes type: attribute_group brief: "Describes System Process metric attributes" attributes: @@ -475,18 +475,18 @@ groups: examples: ["running"] - - id: metric.system.process.count + - id: metric.system.processes.count type: metric - metric_name: system.process.count + metric_name: system.processes.count brief: "Total number of processes in each state" instrument: updowncounter unit: "{process}" attributes: - - ref: system.process.status + - ref: system.processes.status - - id: metric.system.process.created + - id: metric.system.processes.created type: metric - metric_name: system.process.created + metric_name: system.processes.created brief: "Total number of processes created over uptime of the host" instrument: counter unit: "{process}" diff --git a/schema-next.yaml b/schema-next.yaml index 6ed83a7132..bd77850264 100644 --- a/schema-next.yaml +++ b/schema-next.yaml @@ -64,9 +64,6 @@ versions: - jvm.buffer.limit - jvm.buffer.count # https://github.com/open-telemetry/semantic-conventions/pull/89 - - rename_metrics: - system.processes.count: system.process.count - system.processes.created: system.process.created - rename_attributes: attribute_map: state: system.cpu.state @@ -127,9 +124,9 @@ versions: - system.network.connections - rename_attributes: attribute_map: - status: system.process.status + status: system.processes.status apply_to_metrics: - - system.process.count + - system.processes.count 1.21.0: spans: changes: From e7bbf62a44cbac66de1d202ce3a9a1962da792db Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Tue, 12 Sep 2023 11:38:43 +0200 Subject: [PATCH 11/11] Rename cpu.logical attribute to cpu.logical_number --- CHANGELOG.md | 2 +- docs/system/system-metrics.md | 4 ++-- model/metrics/system-metrics.yaml | 6 +++--- schema-next.yaml | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d74fab3098..65b613ea76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -63,7 +63,7 @@ release. ([#89](https://github.com/open-telemetry/semantic-conventions/pull/89)) - Rename attributes for `system.cpu.*` metrics: - `state` to `system.cpu.state` - - `cpu` to `system.cpu.logical` + - `cpu` to `system.cpu.logical_number` - Rename attributes for `system.memory.*` metrics: - `state` to `system.memory.state` - Rename attributes for `system.paging.*` metrics: diff --git a/docs/system/system-metrics.md b/docs/system/system-metrics.md index bee2dcb0cb..f17b799050 100644 --- a/docs/system/system-metrics.md +++ b/docs/system/system-metrics.md @@ -67,7 +67,7 @@ This metric is [recommended][MetricRecommended]. | Attribute | Type | Description | Examples | Requirement Level | |---|---|---|---|---| -| `system.cpu.logical` | int | The logical CPU number [0..n-1] | `1` | Recommended | +| `system.cpu.logical_number` | int | The logical CPU number [0..n-1] | `1` | Recommended | | `system.cpu.state` | string | The state of the CPU | `idle`; `interrupt` | Recommended | `system.cpu.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. @@ -96,7 +96,7 @@ This metric is [recommended][MetricRecommended]. | Attribute | Type | Description | Examples | Requirement Level | |---|---|---|---|---| -| `system.cpu.logical` | int | The logical CPU number [0..n-1] | `1` | Recommended | +| `system.cpu.logical_number` | int | The logical CPU number [0..n-1] | `1` | Recommended | | `system.cpu.state` | string | The state of the CPU | `idle`; `interrupt` | Recommended | `system.cpu.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. diff --git a/model/metrics/system-metrics.yaml b/model/metrics/system-metrics.yaml index 809d0dff1d..41c508f3d2 100644 --- a/model/metrics/system-metrics.yaml +++ b/model/metrics/system-metrics.yaml @@ -36,7 +36,7 @@ groups: value: 'steal' brief: "The state of the CPU" examples: ["idle", "interrupt"] - - id: logical + - id: logical_number type: int brief: "The logical CPU number [0..n-1]" examples: [1] @@ -49,7 +49,7 @@ groups: unit: "s" attributes: - ref: system.cpu.state - - ref: system.cpu.logical + - ref: system.cpu.logical_number - id: metric.system.cpu.utilization type: metric @@ -59,7 +59,7 @@ groups: unit: "1" attributes: - ref: system.cpu.state - - ref: system.cpu.logical + - ref: system.cpu.logical_number - id: metric.system.cpu.physical.count type: metric diff --git a/schema-next.yaml b/schema-next.yaml index bd77850264..609605b787 100644 --- a/schema-next.yaml +++ b/schema-next.yaml @@ -67,7 +67,7 @@ versions: - rename_attributes: attribute_map: state: system.cpu.state - cpu: system.cpu.logical + cpu: system.cpu.logical_number apply_to_metrics: - system.cpu.time - system.cpu.utilization