Skip to content

Commit

Permalink
metric/system/cgroup: introduce cache for cgroup v2 (#117)
Browse files Browse the repository at this point in the history
## What does this PR do?

Reintroduce the improvements from
#103. This
PR got reverted with
#113 because
of #109.
The issue, that got reported in
#109, was
fixed with
#116.

So its time to bring back the performance improvements by reintroducing
the cache for cgroup v2.

## Why is it important?

<!-- Mandatory
Explain here the WHY, or the rationale/motivation for the changes.
-->

## Checklist

<!-- Mandatory
Add a checklist of things that are required to be reviewed in order to
have the PR approved

List here all the items you have verified BEFORE sending this PR. Please
DO NOT remove any item, striking through those that do not apply. (Just
in case, strikethrough uses two tildes. ~~Scratch this.~~)
-->

- [x] My code follows the style guidelines of this project
- [x] I have commented my code, particularly in hard-to-understand areas
- [ ] ~~I have added tests that prove my fix is effective or that my
feature works~~
- [ ] ~~I have added an entry in `CHANGELOG.md`~~

---------

Signed-off-by: Florian Lehner <[email protected]>
  • Loading branch information
florianl authored Dec 21, 2023
1 parent 07d5c44 commit 4daddf3
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 12 deletions.
4 changes: 2 additions & 2 deletions metric/system/cgroup/cgstats.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (stat StatsV1) CGVersion() CgroupsVersion {
return CgroupsV1
}

//Format converts the stats object to a MapStr that can be sent to Report()
// Format converts the stats object to a MapStr that can be sent to Report()
func (stat StatsV1) Format() (mapstr.M, error) {
to := mapstr.M{}
err := typeconv.Convert(&to, stat)
Expand Down Expand Up @@ -95,7 +95,7 @@ func (stat *StatsV1) FillPercentages(prev CGStats, curTime, prevTime time.Time)
stat.CPUAccounting.Stats.System.Norm.Pct = opt.FloatWith(metric.Round(normalizedSystem))
}

//Format converts the stats object to a MapStr that can be sent to Report()
// Format converts the stats object to a MapStr that can be sent to Report()
func (stat StatsV2) Format() (mapstr.M, error) {
to := mapstr.M{}
err := typeconv.Convert(&to, stat)
Expand Down
2 changes: 1 addition & 1 deletion metric/system/cgroup/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
// control groups, a Linux kernel feature for grouping tasks to track and limit
// resource usage.
//
// Terminology
// # Terminology
//
// A cgroup is a collection of processes that are bound to a set of limits.
//
Expand Down
17 changes: 17 additions & 0 deletions metric/system/cgroup/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"path/filepath"
"strconv"
"strings"
"sync"
"time"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-system-metrics/metric/system/cgroup/cgv1"
Expand Down Expand Up @@ -77,6 +79,17 @@ type mount struct {
fullPath string // Absolute path to the cgroup. It's the mountpoint joined with the path.
}

// pathListWithTime combines PathList with a timestamp.
type pathListWithTime struct {
added time.Time
pathList PathList
}

type pathCache struct {
sync.RWMutex
cache map[string]pathListWithTime
}

// Reader reads cgroup metrics and limits.
type Reader struct {
// Mountpoint of the root filesystem. Defaults to / if not set. This can be
Expand All @@ -85,6 +98,9 @@ type Reader struct {
ignoreRootCgroups bool // Ignore a cgroup when its path is "/".
cgroupsHierarchyOverride string
cgroupMountpoints Mountpoints // Mountpoints for each subsystem (e.g. cpu, cpuacct, memory, blkio).

// Cache to map known v2 cgroup controllerPaths to pathListWithTime.
v2ControllerPathCache pathCache
}

// ReaderOptions holds options for NewReaderOptions.
Expand Down Expand Up @@ -135,6 +151,7 @@ func NewReaderOptions(opts ReaderOptions) (*Reader, error) {
ignoreRootCgroups: opts.IgnoreRootCgroups,
cgroupsHierarchyOverride: opts.CgroupsHierarchyOverride,
cgroupMountpoints: mountpoints,
v2ControllerPathCache: pathCache{cache: make(map[string]pathListWithTime)},
}, nil
}

Expand Down
29 changes: 27 additions & 2 deletions metric/system/cgroup/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"path/filepath"
"strconv"
"strings"
"time"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
Expand Down Expand Up @@ -67,7 +68,7 @@ type PathList struct {

// Flatten combines the V1 and V2 cgroups in cases where we don't need a map with keys
func (pl PathList) Flatten() []ControllerPath {
list := []ControllerPath{}
list := make([]ControllerPath, 0, len(pl.V1)+len(pl.V2))
for _, v1 := range pl.V1 {
list = append(list, v1)
}
Expand Down Expand Up @@ -229,7 +230,7 @@ func SubsystemMountpoints(rootfs resolve.Resolver, subsystems map[string]struct{

// ProcessCgroupPaths returns the cgroups to which a process belongs and the
// pathname of the cgroup relative to the mountpoint of the subsystem.
func (r Reader) ProcessCgroupPaths(pid int) (PathList, error) {
func (r *Reader) ProcessCgroupPaths(pid int) (PathList, error) {
cgroupPath := filepath.Join("proc", strconv.Itoa(pid), "cgroup")
cgroup, err := os.Open(r.rootfsMountpoint.ResolveHostFS(cgroupPath))
if err != nil {
Expand Down Expand Up @@ -288,6 +289,24 @@ the container as /sys/fs/cgroup/unified and start the system module with the hos
controllerPath = r.rootfsMountpoint.ResolveHostFS(filepath.Join("/sys/fs/cgroup/unified", path))
}

// Check if there is an entry for controllerPath already cached.
r.v2ControllerPathCache.Lock()
cacheEntry, ok := r.v2ControllerPathCache.cache[controllerPath]
if ok {
// If the cached entry for controllerPath is not older than 5 minutes,
// return the cached entry.
if time.Since(cacheEntry.added) < 5*time.Minute {
cPaths.V2 = cacheEntry.pathList.V2
r.v2ControllerPathCache.Unlock()
continue
}

// Consider the existing entry for controllerPath invalid, as it is
// older than 5 minutes.
delete(r.v2ControllerPathCache.cache, controllerPath)
}
r.v2ControllerPathCache.Unlock()

cgpaths, err := os.ReadDir(controllerPath)
if err != nil {
return cPaths, fmt.Errorf("error fetching cgroupV2 controllers for cgroup location '%s' and path line '%s': %w", r.cgroupMountpoints.V2Loc, line, err)
Expand All @@ -300,6 +319,12 @@ the container as /sys/fs/cgroup/unified and start the system module with the hos
cPaths.V2[controllerName] = ControllerPath{ControllerPath: path, FullPath: controllerPath, IsV2: true}
}
}
r.v2ControllerPathCache.Lock()
r.v2ControllerPathCache.cache[controllerPath] = pathListWithTime{
added: time.Now(),
pathList: cPaths,
}
r.v2ControllerPathCache.Unlock()
// cgroup v1
} else {
subsystems := strings.Split(fields[1], ",")
Expand Down
2 changes: 1 addition & 1 deletion metric/system/filesystem/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
)

//FSStat carries the metadata for a given filesystem
// FSStat carries the metadata for a given filesystem
type FSStat struct {
Directory string `struct:"mount_point,omitempty"`
Device string `struct:"device_name,omitempty"`
Expand Down
8 changes: 5 additions & 3 deletions metric/system/host/host_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
package host

import (
"github.com/elastic/elastic-agent-libs/mapstr"
"github.com/elastic/go-sysinfo/types"
"github.com/stretchr/testify/require"
"testing"
"time"

"github.com/stretchr/testify/require"

"github.com/elastic/elastic-agent-libs/mapstr"
"github.com/elastic/go-sysinfo/types"
)

func TestMapHostInfo(t *testing.T) {
Expand Down
4 changes: 2 additions & 2 deletions metric/system/hwmon/hwmon.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
)

//ReportSensors returns the metrics from all the known sensors.
// ReportSensors returns the metrics from all the known sensors.
func ReportSensors(dev Device) (MonData, error) {
metrics := MonData{}
for _, sensor := range dev.Sensors {
Expand All @@ -51,7 +51,7 @@ func ReportSensors(dev Device) (MonData, error) {
return metrics, nil
}

//DetectHwmon returns a list of hwmon sensors found on the system, if they exist
// DetectHwmon returns a list of hwmon sensors found on the system, if they exist
func DetectHwmon(hostfs resolve.Resolver) ([]Device, error) {
sensorTypeRegex := regexp.MustCompile("(^[a-z]*)([0-9]*)")
fullPath := hostfs.ResolveHostFS(baseDir)
Expand Down
3 changes: 2 additions & 1 deletion metric/system/network/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ package network
import (
"testing"

"github.com/elastic/go-sysinfo/types"
"github.com/stretchr/testify/require"

"github.com/elastic/go-sysinfo/types"
)

func TestFilter(t *testing.T) {
Expand Down

0 comments on commit 4daddf3

Please sign in to comment.