diff --git a/metric/system/cgroup/reader.go b/metric/system/cgroup/reader.go index 1debc009b9..f9bb6bb86c 100644 --- a/metric/system/cgroup/reader.go +++ b/metric/system/cgroup/reader.go @@ -23,6 +23,8 @@ import ( "path/filepath" "strconv" "strings" + "sync" + "time" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-system-metrics/metric/system/cgroup/cgv1" @@ -68,7 +70,7 @@ const ( memoryStat = "memory" ) -//nolint: deadcode,structcheck,unused // needed by other platforms +// nolint: deadcode,structcheck,unused // needed by other platforms type mount struct { subsystem string // Subsystem name (e.g. cpuacct). mountpoint string // Mountpoint of the subsystem (e.g. /cgroup/cpuacct). @@ -77,6 +79,17 @@ type mount struct { fullPath string // Absolute path to the cgroup. It's the mountpoint joined with the path. } +// pathListWithTime combines PathList with a timestamp. +type pathListWithTime struct { + added time.Time + pathList PathList +} + +type pathCache struct { + sync.RWMutex + cache map[string]pathListWithTime +} + // Reader reads cgroup metrics and limits. type Reader struct { // Mountpoint of the root filesystem. Defaults to / if not set. This can be @@ -85,6 +98,9 @@ type Reader struct { ignoreRootCgroups bool // Ignore a cgroup when its path is "/". cgroupsHierarchyOverride string cgroupMountpoints Mountpoints // Mountpoints for each subsystem (e.g. cpu, cpuacct, memory, blkio). + + // Cache to map known v2 cgroup controllerPaths to pathListWithTime. + v2ControllerPathCache pathCache } // ReaderOptions holds options for NewReaderOptions. @@ -135,6 +151,7 @@ func NewReaderOptions(opts ReaderOptions) (*Reader, error) { ignoreRootCgroups: opts.IgnoreRootCgroups, cgroupsHierarchyOverride: opts.CgroupsHierarchyOverride, cgroupMountpoints: mountpoints, + v2ControllerPathCache: pathCache{cache: make(map[string]pathListWithTime)}, }, nil } diff --git a/metric/system/cgroup/util.go b/metric/system/cgroup/util.go index bf43828a2d..95942b7a83 100644 --- a/metric/system/cgroup/util.go +++ b/metric/system/cgroup/util.go @@ -25,6 +25,7 @@ import ( "path/filepath" "strconv" "strings" + "time" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-system-metrics/metric/system/resolve" @@ -67,7 +68,7 @@ type PathList struct { // Flatten combines the V1 and V2 cgroups in cases where we don't need a map with keys func (pl PathList) Flatten() []ControllerPath { - list := []ControllerPath{} + list := make([]ControllerPath, 0, len(pl.V1)+len(pl.V2)) for _, v1 := range pl.V1 { list = append(list, v1) } @@ -255,6 +256,7 @@ func (r Reader) ProcessCgroupPaths(pid int) (PathList, error) { if r.cgroupsHierarchyOverride != "" { path = r.cgroupsHierarchyOverride } + // cgroup V2 // cgroup v2 controllers will always start with this string if strings.Contains(line, "0::/") { @@ -275,6 +277,23 @@ the container as /sys/fs/cgroup/unified and start the system module with the hos controllerPath = r.rootfsMountpoint.ResolveHostFS(filepath.Join("/sys/fs/cgroup/unified", path)) } + // Check if there is an entry for controllerPath already cached. + r.v2ControllerPathCache.Lock() + cacheEntry, ok := r.v2ControllerPathCache.cache[controllerPath] + if ok { + // If the cached entry for controllerPath is not older than 5 minutes, + // return the cached entry. + if time.Since(cacheEntry.added) < 5*time.Minute { + cPaths.V2 = cacheEntry.pathList.V2 + r.v2ControllerPathCache.Unlock() + continue + } + } + // Consider the existing entry for controllerPath invalid, as it is + // older than 5 minutes. + delete(r.v2ControllerPathCache.cache, controllerPath) + r.v2ControllerPathCache.Unlock() + cgpaths, err := os.ReadDir(controllerPath) if err != nil { return cPaths, fmt.Errorf("error fetching cgroupV2 controllers for cgroup location '%s' and path line '%s': %w", r.cgroupMountpoints.V2Loc, line, err) @@ -287,6 +306,12 @@ the container as /sys/fs/cgroup/unified and start the system module with the hos cPaths.V2[controllerName] = ControllerPath{ControllerPath: path, FullPath: controllerPath, IsV2: true} } } + r.v2ControllerPathCache.Lock() + r.v2ControllerPathCache.cache[controllerPath] = pathListWithTime{ + added: time.Now(), + pathList: cPaths, + } + r.v2ControllerPathCache.Unlock() // cgroup v1 } else { subsystems := strings.Split(fields[1], ",") diff --git a/metric/system/process/process_darwin.go b/metric/system/process/process_darwin.go index 950d9734b6..a68b854b89 100644 --- a/metric/system/process/process_darwin.go +++ b/metric/system/process/process_darwin.go @@ -64,8 +64,8 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) { bbuf := bytes.NewBuffer(buf) - procMap := make(ProcsMap, 0) - var plist []ProcState + procMap := make(ProcsMap, len(names)) + plist := make([]ProcState, 0, len(names)) for i := 0; i < num; i++ { if err := binary.Read(bbuf, binary.LittleEndian, &pid); err != nil { diff --git a/metric/system/process/process_linux_common.go b/metric/system/process/process_linux_common.go index 38427a85c8..026cf45699 100644 --- a/metric/system/process/process_linux_common.go +++ b/metric/system/process/process_linux_common.go @@ -58,8 +58,8 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) { return nil, nil, fmt.Errorf("error reading directory names: %w", err) } - procMap := make(ProcsMap) - var plist []ProcState + procMap := make(ProcsMap, len(names)) + plist := make([]ProcState, 0, len(names)) // Iterate over the directory, fetch just enough info so we can filter based on user input. logger := logp.L() diff --git a/metric/system/process/process_windows.go b/metric/system/process/process_windows.go index 61882d6ece..30a677fcb1 100644 --- a/metric/system/process/process_windows.go +++ b/metric/system/process/process_windows.go @@ -38,8 +38,9 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) { return nil, nil, fmt.Errorf("EnumProcesses failed: %w", err) } - procMap := make(ProcsMap, 0) - var plist []ProcState + procMap := make(ProcsMap, len(names)) + plist := make([]ProcState, 0, len(names)) + // This is probably the only implementation that doesn't benefit from our // little fillPid callback system. We'll need to iterate over everything // manually.