Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Process memory metrics: report deltas instead of absolute values in OTEL exporter #950

Merged
merged 2 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pkg/internal/export/otel/metrics_proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,10 +303,10 @@ func (me *procMetricsExporter) observeMetric(reporter *procMetrics, s *process.S
me.cpuUtilisationObserver(me.ctx, reporter, s)

mem, attrs := reporter.memory.ForRecord(s)
mem.Add(me.ctx, s.MemoryRSSBytes, metric2.WithAttributeSet(attrs))
mem.Add(me.ctx, s.MemoryRSSBytesDelta, metric2.WithAttributeSet(attrs))

vmem, attrs := reporter.memoryVirtual.ForRecord(s)
vmem.Add(me.ctx, s.MemoryVMSBytes, metric2.WithAttributeSet(attrs))
vmem.Add(me.ctx, s.MemoryVMSBytesDelta, metric2.WithAttributeSet(attrs))

me.diskObserver(me.ctx, reporter, s)
me.netObserver(me.ctx, reporter, s)
Expand Down
25 changes: 17 additions & 8 deletions pkg/internal/infraolly/process/harvest.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ func (ps *Harvester) Harvest(svcID *svc.ID) (*Status, error) {

ps.populateNetworkInfo(status, cached)

// current stats will be used in the next iteration to calculate some delta values
cached.prevStats = cached.stats

return status, nil
}

Expand Down Expand Up @@ -139,14 +142,18 @@ func (ps *Harvester) populateGauges(status *Status, process *linuxProcess) error
var err error

// Calculate CPU metrics from current and previous user/system/wait time
status.CPUTimeSystemDelta = process.stats.cpu.SystemTime - process.previousCPUStats.SystemTime
status.CPUTimeUserDelta = process.stats.cpu.UserTime - process.previousCPUStats.UserTime
status.CPUTimeWaitDelta = process.stats.cpu.WaitTime - process.previousCPUStats.WaitTime

delta := process.measureTime.Sub(process.previousMeasureTime).Seconds() * float64(runtime.NumCPU())
status.CPUUtilisationSystem = (process.stats.cpu.SystemTime - process.previousCPUStats.SystemTime) / delta
status.CPUUtilisationUser = (process.stats.cpu.UserTime - process.previousCPUStats.UserTime) / delta
status.CPUUtilisationWait = (process.stats.cpu.WaitTime - process.previousCPUStats.WaitTime) / delta
var zero CPUInfo
// we only calculate CPU deltas and utilization time from the second sample onwards
if process.prevStats.cpu != zero {
status.CPUTimeSystemDelta = process.stats.cpu.SystemTime - process.prevStats.cpu.SystemTime
status.CPUTimeUserDelta = process.stats.cpu.UserTime - process.prevStats.cpu.UserTime
status.CPUTimeWaitDelta = process.stats.cpu.WaitTime - process.prevStats.cpu.WaitTime

delta := process.measureTime.Sub(process.previousMeasureTime).Seconds() * float64(runtime.NumCPU())
status.CPUUtilisationSystem = (process.stats.cpu.SystemTime - process.prevStats.cpu.SystemTime) / delta
status.CPUUtilisationUser = (process.stats.cpu.UserTime - process.prevStats.cpu.UserTime) / delta
status.CPUUtilisationWait = (process.stats.cpu.WaitTime - process.prevStats.cpu.WaitTime) / delta
}

if ps.privileged {
status.FdCount, err = process.NumFDs()
Expand All @@ -159,7 +166,9 @@ func (ps *Harvester) populateGauges(status *Status, process *linuxProcess) error
status.Status = process.stats.state
status.ThreadCount = process.stats.numThreads
status.MemoryVMSBytes = process.stats.vmSize
status.MemoryVMSBytesDelta = process.stats.vmSize - process.prevStats.vmSize
status.MemoryRSSBytes = process.stats.vmRSS
status.MemoryRSSBytesDelta = process.stats.vmRSS - process.prevStats.vmRSS

return nil
}
Expand Down
4 changes: 1 addition & 3 deletions pkg/internal/infraolly/process/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ type linuxProcess struct {

measureTime time.Time
stats procStats
prevStats procStats
process *process.Process

// used to calculate CPU utilization ratios
previousCPUStats CPUInfo
previousMeasureTime time.Time
previousIOCounters *process.IOCountersStat
previousNetRx int64
Expand Down Expand Up @@ -131,13 +131,11 @@ func getLinuxProcess(cachedCopy *linuxProcess, procFSRoot string, pid int32, pri
stats: currentStats,
measureTime: measureTime,
previousMeasureTime: measureTime,
previousCPUStats: currentStats.cpu,
procFSRoot: procFSRoot,
}, nil
}

// Otherwise, instead of creating a new process snapshot, we just reuse the cachedCopy one, with updated data
cachedCopy.previousCPUStats = cachedCopy.stats.cpu
cachedCopy.previousMeasureTime = cachedCopy.measureTime
cachedCopy.stats = currentStats
cachedCopy.measureTime = measureTime
Expand Down
8 changes: 6 additions & 2 deletions pkg/internal/infraolly/process/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,12 @@ type Status struct {
CPUUtilisationUser float64
CPUUtilisationWait float64

MemoryRSSBytes int64
MemoryVMSBytes int64
// delta values are used in OTEL UpDownCounters while absolute values are used in Prometheus gauges
MemoryRSSBytes int64
MemoryVMSBytes int64
MemoryRSSBytesDelta int64
MemoryVMSBytesDelta int64

Status string
ParentProcessID int32
ThreadCount int32
Expand Down
Loading