Skip to content
This repository has been archived by the owner on Mar 27, 2024. It is now read-only.

systemdunits: dimension per unit state #795

Merged
merged 2 commits into from
Sep 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 25 additions & 28 deletions modules/systemdunits/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ sidebar_label: "Systemd units"

# Systemd units state monitoring with Netdata

[`Systemd`](https://www.freedesktop.org/wiki/Software/systemd/) is a suite of basic building blocks for a Linux system.
[Systemd](https://www.freedesktop.org/wiki/Software/systemd/) is a suite of basic building blocks for a Linux system.

This module monitors `Systemd` units state.
This module monitors Systemd units state.

- Works only on linux systems.
## Requirements

- Works only on Linux systems.
- Disabled by default. Should be explicitly enabled in the `go.d.conf`:

```yaml
Expand All @@ -20,31 +22,26 @@ modules:
systemdunits: yes
```

## Charts

It produces the following charts:

- Service Unit State in `state`
- Socket Unit State in `state`
- Target Unit State in `state`
- Path Unit State in `state`
- Device Unit State in `state`
- Mount Unit State in `state`
- Automount Unit State in `state`
- Swap Unit State in `state`
- Timer Unit State in `state`
- Scope Unit State in `state`
- Slice Unit State in `state`

## Unit states

| Code | Name | Meaning |
|------|----------------|--------------------------------------------------------------------------------------------------------------------------------------|
| 1 | `active` | started, bound, plugged in, ..., depending on the unit type |
| 2 | `inactive` | stopped, unbound, unplugged, ..., depending on the unit type |
| 3 | `activating` | in the process of being activated |
| 4 | `deactivating` | in the process of being deactivated |
| 5 | `failed` | the service failed in some way (process returned error code on exit, or crashed, an operation timed out, or after too many restarts) |
## Metrics

The unit types and states description can be found in
the [official documentation](https://www.freedesktop.org/software/systemd/man/systemd.html#Concepts).

All metrics have "systemd." prefix.

| Metric | Scope | Dimensions | Units |
|----------------------|:-----:|:--------------------------------------------------:|:-----:|
| service_unit_state | unit | active, inactive, activating, deactivating, failed | state |
| socket_unit_state | unit | active, inactive, activating, deactivating, failed | state |
| target_unit_state | unit | active, inactive, activating, deactivating, failed | state |
| path_unit_state | unit | active, inactive, activating, deactivating, failed | state |
| device_unit_state | unit | active, inactive, activating, deactivating, failed | state |
| mount_unit_state | unit | active, inactive, activating, deactivating, failed | state |
| automount_unit_state | unit | active, inactive, activating, deactivating, failed | state |
| swap_unit_state | unit | active, inactive, activating, deactivating, failed | state |
| timer_unit_state | unit | active, inactive, activating, deactivating, failed | state |
| scope_unit_state | unit | active, inactive, activating, deactivating, failed | state |
| slice_unit_state | unit | active, inactive, activating, deactivating, failed | state |

## Configuration

Expand Down
143 changes: 64 additions & 79 deletions modules/systemdunits/charts.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,86 +6,71 @@
package systemdunits

import (
"fmt"

"github.com/netdata/go.d.plugin/agent/module"

"golang.org/x/text/cases"
"golang.org/x/text/language"
)

// systemd unit types: https://www.freedesktop.org/software/systemd/man/systemd.html
var charts = module.Charts{
{
ID: "service_unit_state",
Title: "Service Unit State (1: active, 2: inactive, 3: activating, 4: deactivating, 5: failed)",
Units: "state",
Fam: "service",
Ctx: "systemd.service_units_state",
},
{
ID: "socket_unit_state",
Title: "Socket Unit State (1: active, 2: inactive, 3: activating, 4: deactivating, 5: failed)",
Units: "state",
Fam: "socket",
Ctx: "systemd.socket_unit_state",
},
{
ID: "target_unit_state",
Title: "Target Unit State (1: active, 2: inactive, 3: activating, 4: deactivating, 5: failed)",
Units: "state",
Fam: "target",
Ctx: "systemd.target_unit_state",
},
{
ID: "path_unit_state",
Title: "Path Unit State (1: active, 2: inactive, 3: activating, 4: deactivating, 5: failed)",
Units: "state",
Fam: "path",
Ctx: "systemd.path_unit_state",
},
{
ID: "device_unit_state",
Title: "Device Unit State (1: active, 2: inactive, 3: activating, 4: deactivating, 5: failed)",
Units: "state",
Fam: "device",
Ctx: "systemd.device_unit_state",
},
{
ID: "mount_unit_state",
Title: "Mount Unit State (1: active, 2: inactive, 3: activating, 4: deactivating, 5: failed)",
Units: "state",
Fam: "mount",
Ctx: "systemd.mount_unit_state",
},
{
ID: "automount_unit_state",
Title: "Automount Unit State (1: active, 2: inactive, 3: activating, 4: deactivating, 5: failed)",
Units: "state",
Fam: "automount",
Ctx: "systemd.automount_unit_state",
},
{
ID: "swap_unit_state",
Title: "Swap Unit State (1: active, 2: inactive, 3: activating, 4: deactivating, 5: failed)",
Units: "state",
Fam: "swap",
Ctx: "systemd.swap_unit_state",
},
{
ID: "timer_unit_state",
Title: "Timer Unit State (1: active, 2: inactive, 3: activating, 4: deactivating, 5: failed)",
Units: "state",
Fam: "timer",
Ctx: "systemd.timer_unit_state",
},
{
ID: "scope_unit_state",
Title: "Scope Unit State (1: active, 2: inactive, 3: activating, 4: deactivating, 5: failed)",
Units: "state",
Fam: "scope",
Ctx: "systemd.scope_unit_state",
},
{
ID: "slice_unit_state",
Title: "Slice Unit State (1: active, 2: inactive, 3: activating, 4: deactivating, 5: failed)",
Units: "state",
Fam: "slice",
Ctx: "systemd.slice_unit_state",
},
const (
prioServiceUnitState = module.Priority + iota
prioSocketUnitState
prioTargetUnitState
prioPathUnitState
prioDeviceUnitState
prioMountUnitState
prioAutomountUnitState
prioSwapUnitState
prioTimerUnitState
prioScopeUnitState
prioSliceUnitState
)

var prioMap = map[string]int{
unitTypeService: prioServiceUnitState,
unitTypeSocket: prioSocketUnitState,
unitTypeTarget: prioTargetUnitState,
unitTypePath: prioPathUnitState,
unitTypeDevice: prioDeviceUnitState,
unitTypeMount: prioMountUnitState,
unitTypeAutomount: prioAutomountUnitState,
unitTypeSwap: prioSwapUnitState,
unitTypeTimer: prioTimerUnitState,
unitTypeScope: prioScopeUnitState,
unitTypeSlice: prioSliceUnitState,
}

func newTypedUnitStateChartTmpl(name, typ string) *module.Chart {
chart := module.Chart{
ID: fmt.Sprintf("unit_%s_%s_state", name, typ),
Title: fmt.Sprintf("%s Unit State", cases.Title(language.English, cases.Compact).String(typ)),
Units: "state",
Fam: fmt.Sprintf("%s units", typ),
Ctx: fmt.Sprintf("systemd.%s_unit_state", typ),
Priority: prioMap[typ],
Labels: []module.Label{
{Key: "unit_name", Value: name},
},
Dims: module.Dims{
{Name: unitStateActive},
{Name: unitStateInactive},
{Name: unitStateActivating},
{Name: unitStateDeactivating},
{Name: unitStateFailed},
},
}
for _, d := range chart.Dims {
d.ID = fmt.Sprintf("unit_%s_%s_state_%s", name, typ, d.Name)
}
return &chart
}

func (s *SystemdUnits) addUnitToCharts(name, typ string) {
chart := newTypedUnitStateChartTmpl(name, typ)

if err := s.Charts().Add(chart); err != nil {
s.Warning(err)
}
}
121 changes: 57 additions & 64 deletions modules/systemdunits/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,41 @@ import (
"strconv"
"strings"

"github.com/netdata/go.d.plugin/agent/module"

"github.com/coreos/go-systemd/v22/dbus"
)

const (
// https://www.freedesktop.org/software/systemd/man/systemd.html
unitStateActive = "active"
unitStateInactive = "inactive"
unitStateActivating = "activating"
unitStateDeactivating = "deactivating"
unitStateFailed = "failed"

// https://www.freedesktop.org/software/systemd/man/systemd.html
unitTypeService = "service"
unitTypeSocket = "socket"
unitTypeTarget = "target"
unitTypePath = "path"
unitTypeDevice = "device"
unitTypeMount = "mount"
unitTypeAutomount = "automount"
unitTypeSwap = "swap"
unitTypeTimer = "timer"
unitTypeScope = "scope"
unitTypeSlice = "slice"
)

var (
unitStates = []string{
unitStateActive,
unitStateActivating,
unitStateFailed,
unitStateInactive,
unitStateDeactivating,
}
)

func (s *SystemdUnits) collect() (map[string]int64, error) {
conn, err := s.getConnection()
if err != nil {
Expand Down Expand Up @@ -48,19 +78,28 @@ func (s *SystemdUnits) collect() (map[string]int64, error) {
return nil, nil
}

collected := make(map[string]int64)
s.collectUnitsStates(collected, units)
return collected, nil
mx := make(map[string]int64)
s.collectUnitsStates(mx, units)

return mx, nil
}

func (s *SystemdUnits) collectUnitsStates(collected map[string]int64, units []dbus.UnitStatus) {
func (s *SystemdUnits) collectUnitsStates(mx map[string]int64, units []dbus.UnitStatus) {
for _, unit := range units {
name := cleanUnitName(unit.Name)
if !s.collectedUnits[name] {
s.collectedUnits[name] = true
s.addUnitToCharts(name)
name, typ := extractUnitNameType(cleanUnitName(unit.Name))
if name == "" || typ == "" {
continue
}

if !s.units[unit.Name] {
s.units[unit.Name] = true
s.addUnitToCharts(name, typ)
}
collected[name] = convertUnitState(unit.ActiveState)

for _, s := range unitStates {
mx[fmt.Sprintf("unit_%s_%s_state_%s", name, typ, s)] = 0
}
mx[fmt.Sprintf("unit_%s_%s_state_%s", name, typ, unit.ActiveState)] = 1
}
}

Expand Down Expand Up @@ -124,8 +163,8 @@ func (s *SystemdUnits) getLoadedUnits(conn systemdConnection) ([]dbus.UnitStatus
loaded = append(loaded, unit)
}
}

s.Debugf("got total/loaded %d/%d units", len(units), len(loaded))

return loaded, nil
}

Expand All @@ -134,11 +173,8 @@ func (s *SystemdUnits) getLoadedUnitsByPatterns(conn systemdConnection) ([]dbus.
defer cancel()

s.Debugf("calling function 'ListUnitsByPatterns'")
units, err := conn.ListUnitsByPatternsContext(
ctx,
[]string{"active", "activating", "failed", "inactive", "deactivating"},
s.Include,
)

units, err := conn.ListUnitsByPatternsContext(ctx, unitStates, s.Include)
if err != nil {
return nil, fmt.Errorf("error on ListUnitsByPatterns: %v", err)
}
Expand All @@ -149,60 +185,17 @@ func (s *SystemdUnits) getLoadedUnitsByPatterns(conn systemdConnection) ([]dbus.
loaded = append(loaded, unit)
}
}

s.Debugf("got total/loaded %d/%d units", len(units), len(loaded))
return loaded, nil
}

func (s *SystemdUnits) addUnitToCharts(name string) {
typ := extractUnitType(name)
if typ == "" {
s.Warningf("add dimension (unit '%s'): can't extract unit type", name)
return
}

id := fmt.Sprintf("%s_unit_state", typ)
chart := s.Charts().Get(id)
if chart == nil {
s.Warningf("add dimension (unit '%s'): can't find '%s' chart", name, id)
return
}

dim := &module.Dim{
ID: name,
Name: name[:len(name)-len(typ)-1], // name.type => name
}
if err := chart.AddDim(dim); err != nil {
s.Warningf("add dimension (unit '%s'): %v", name, err)
}
chart.MarkNotCreated()
return loaded, nil
}

func extractUnitType(name string) string {
// name.type => type
func extractUnitNameType(name string) (string, string) {
idx := strings.LastIndexByte(name, '.')
if idx <= 0 {
return ""
}
return name[idx+1:]
}

func convertUnitState(state string) int64 {
// https://www.freedesktop.org/software/systemd/man/systemd.html
switch state {
case "active":
return 1
case "inactive":
return 2
case "activating":
return 3
case "deactivating":
return 4
case "failed":
return 5
default:
return -1
return "", ""
}
return name[:idx], name[idx+1:]
}

func cleanUnitName(name string) string {
Expand Down
Loading