Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metric/system/cgroup: introduce cache for cgroup v2 #117

Merged
merged 3 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions metric/system/cgroup/cgstats.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (stat StatsV1) CGVersion() CgroupsVersion {
return CgroupsV1
}

//Format converts the stats object to a MapStr that can be sent to Report()
// Format converts the stats object to a MapStr that can be sent to Report()
func (stat StatsV1) Format() (mapstr.M, error) {
to := mapstr.M{}
err := typeconv.Convert(&to, stat)
Expand Down Expand Up @@ -95,7 +95,7 @@ func (stat *StatsV1) FillPercentages(prev CGStats, curTime, prevTime time.Time)
stat.CPUAccounting.Stats.System.Norm.Pct = opt.FloatWith(metric.Round(normalizedSystem))
}

//Format converts the stats object to a MapStr that can be sent to Report()
// Format converts the stats object to a MapStr that can be sent to Report()
func (stat StatsV2) Format() (mapstr.M, error) {
to := mapstr.M{}
err := typeconv.Convert(&to, stat)
Expand Down
2 changes: 1 addition & 1 deletion metric/system/cgroup/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
// control groups, a Linux kernel feature for grouping tasks to track and limit
// resource usage.
//
// Terminology
// # Terminology
//
// A cgroup is a collection of processes that are bound to a set of limits.
//
Expand Down
17 changes: 17 additions & 0 deletions metric/system/cgroup/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
"path/filepath"
"strconv"
"strings"
"sync"
"time"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-system-metrics/metric/system/cgroup/cgv1"
Expand Down Expand Up @@ -68,7 +70,7 @@
memoryStat = "memory"
)

//nolint: deadcode,structcheck,unused // needed by other platforms

Check failure on line 73 in metric/system/cgroup/reader.go

View workflow job for this annotation

GitHub Actions / lint-darwin

File is not `goimports`-ed with -local github.com/elastic (goimports)

Check failure on line 73 in metric/system/cgroup/reader.go

View workflow job for this annotation

GitHub Actions / lint (linux)

File is not `goimports`-ed with -local github.com/elastic (goimports)
type mount struct {
subsystem string // Subsystem name (e.g. cpuacct).
mountpoint string // Mountpoint of the subsystem (e.g. /cgroup/cpuacct).
Expand All @@ -77,6 +79,17 @@
fullPath string // Absolute path to the cgroup. It's the mountpoint joined with the path.
}

// pathListWithTime combines PathList with a timestamp.
type pathListWithTime struct {
added time.Time
pathList PathList
}

type pathCache struct {
sync.RWMutex
cache map[string]pathListWithTime
}

// Reader reads cgroup metrics and limits.
type Reader struct {
// Mountpoint of the root filesystem. Defaults to / if not set. This can be
Expand All @@ -85,6 +98,9 @@
ignoreRootCgroups bool // Ignore a cgroup when its path is "/".
cgroupsHierarchyOverride string
cgroupMountpoints Mountpoints // Mountpoints for each subsystem (e.g. cpu, cpuacct, memory, blkio).

// Cache to map known v2 cgroup controllerPaths to pathListWithTime.
v2ControllerPathCache pathCache
}

// ReaderOptions holds options for NewReaderOptions.
Expand Down Expand Up @@ -135,6 +151,7 @@
ignoreRootCgroups: opts.IgnoreRootCgroups,
cgroupsHierarchyOverride: opts.CgroupsHierarchyOverride,
cgroupMountpoints: mountpoints,
v2ControllerPathCache: pathCache{cache: make(map[string]pathListWithTime)},
}, nil
}

Expand Down
29 changes: 27 additions & 2 deletions metric/system/cgroup/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"path/filepath"
"strconv"
"strings"
"time"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
Expand Down Expand Up @@ -67,7 +68,7 @@ type PathList struct {

// Flatten combines the V1 and V2 cgroups in cases where we don't need a map with keys
func (pl PathList) Flatten() []ControllerPath {
list := []ControllerPath{}
list := make([]ControllerPath, 0, len(pl.V1)+len(pl.V2))
for _, v1 := range pl.V1 {
list = append(list, v1)
}
Expand Down Expand Up @@ -229,7 +230,7 @@ func SubsystemMountpoints(rootfs resolve.Resolver, subsystems map[string]struct{

// ProcessCgroupPaths returns the cgroups to which a process belongs and the
// pathname of the cgroup relative to the mountpoint of the subsystem.
func (r Reader) ProcessCgroupPaths(pid int) (PathList, error) {
func (r *Reader) ProcessCgroupPaths(pid int) (PathList, error) {
cgroupPath := filepath.Join("proc", strconv.Itoa(pid), "cgroup")
cgroup, err := os.Open(r.rootfsMountpoint.ResolveHostFS(cgroupPath))
if err != nil {
Expand Down Expand Up @@ -275,6 +276,24 @@ the container as /sys/fs/cgroup/unified and start the system module with the hos
controllerPath = r.rootfsMountpoint.ResolveHostFS(filepath.Join("/sys/fs/cgroup/unified", path))
}

// Check if there is an entry for controllerPath already cached.
r.v2ControllerPathCache.Lock()
cacheEntry, ok := r.v2ControllerPathCache.cache[controllerPath]
if ok {
// If the cached entry for controllerPath is not older than 5 minutes,
// return the cached entry.
if time.Since(cacheEntry.added) < 5*time.Minute {
cPaths.V2 = cacheEntry.pathList.V2
r.v2ControllerPathCache.Unlock()
continue
}

// Consider the existing entry for controllerPath invalid, as it is
// older than 5 minutes.
delete(r.v2ControllerPathCache.cache, controllerPath)
}
r.v2ControllerPathCache.Unlock()

cgpaths, err := os.ReadDir(controllerPath)
if err != nil {
return cPaths, fmt.Errorf("error fetching cgroupV2 controllers for cgroup location '%s' and path line '%s': %w", r.cgroupMountpoints.V2Loc, line, err)
Expand All @@ -287,6 +306,12 @@ the container as /sys/fs/cgroup/unified and start the system module with the hos
cPaths.V2[controllerName] = ControllerPath{ControllerPath: path, FullPath: controllerPath, IsV2: true}
}
}
r.v2ControllerPathCache.Lock()
r.v2ControllerPathCache.cache[controllerPath] = pathListWithTime{
added: time.Now(),
pathList: cPaths,
}
r.v2ControllerPathCache.Unlock()
// cgroup v1
} else {
subsystems := strings.Split(fields[1], ",")
Expand Down
2 changes: 1 addition & 1 deletion metric/system/filesystem/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
)

//FSStat carries the metadata for a given filesystem
// FSStat carries the metadata for a given filesystem
type FSStat struct {
Directory string `struct:"mount_point,omitempty"`
Device string `struct:"device_name,omitempty"`
Expand Down
8 changes: 5 additions & 3 deletions metric/system/host/host_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
package host

import (
"github.com/elastic/elastic-agent-libs/mapstr"
"github.com/elastic/go-sysinfo/types"
"github.com/stretchr/testify/require"
"testing"
"time"

"github.com/stretchr/testify/require"

"github.com/elastic/elastic-agent-libs/mapstr"
"github.com/elastic/go-sysinfo/types"
)

func TestMapHostInfo(t *testing.T) {
Expand Down
4 changes: 2 additions & 2 deletions metric/system/hwmon/hwmon.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
)

//ReportSensors returns the metrics from all the known sensors.
// ReportSensors returns the metrics from all the known sensors.
func ReportSensors(dev Device) (MonData, error) {
metrics := MonData{}
for _, sensor := range dev.Sensors {
Expand All @@ -51,7 +51,7 @@ func ReportSensors(dev Device) (MonData, error) {
return metrics, nil
}

//DetectHwmon returns a list of hwmon sensors found on the system, if they exist
// DetectHwmon returns a list of hwmon sensors found on the system, if they exist
func DetectHwmon(hostfs resolve.Resolver) ([]Device, error) {
sensorTypeRegex := regexp.MustCompile("(^[a-z]*)([0-9]*)")
fullPath := hostfs.ResolveHostFS(baseDir)
Expand Down
3 changes: 2 additions & 1 deletion metric/system/network/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ package network
import (
"testing"

"github.com/elastic/go-sysinfo/types"
"github.com/stretchr/testify/require"

"github.com/elastic/go-sysinfo/types"
)

func TestFilter(t *testing.T) {
Expand Down
Loading