Skip to content

Commit

Permalink
Process memory metrics: report deltas instead of absolute values in O…
Browse files Browse the repository at this point in the history
…TEL exporter
  • Loading branch information
mariomac committed Jun 20, 2024
1 parent 1ab104a commit dca415b
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 15 deletions.
4 changes: 2 additions & 2 deletions pkg/internal/export/otel/metrics_proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,10 +303,10 @@ func (me *procMetricsExporter) observeMetric(reporter *procMetrics, s *process.S
me.cpuUtilisationObserver(me.ctx, reporter, s)

mem, attrs := reporter.memory.ForRecord(s)
mem.Add(me.ctx, s.MemoryRSSBytes, metric2.WithAttributeSet(attrs))
mem.Add(me.ctx, s.MemoryRSSBytesDelta, metric2.WithAttributeSet(attrs))

vmem, attrs := reporter.memoryVirtual.ForRecord(s)
vmem.Add(me.ctx, s.MemoryVMSBytes, metric2.WithAttributeSet(attrs))
vmem.Add(me.ctx, s.MemoryVMSBytesDelta, metric2.WithAttributeSet(attrs))

me.diskObserver(me.ctx, reporter, s)
me.netObserver(me.ctx, reporter, s)
Expand Down
25 changes: 17 additions & 8 deletions pkg/internal/infraolly/process/harvest.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ func (ps *Harvester) Harvest(svcID *svc.ID) (*Status, error) {

ps.populateNetworkInfo(status, cached)

// current stats will be used in the next iteration to calculate some delta values
cached.prevStats = cached.stats

return status, nil
}

Expand Down Expand Up @@ -139,14 +142,18 @@ func (ps *Harvester) populateGauges(status *Status, process *linuxProcess) error
var err error

// Calculate CPU metrics from current and previous user/system/wait time
status.CPUTimeSystemDelta = process.stats.cpu.SystemTime - process.previousCPUStats.SystemTime
status.CPUTimeUserDelta = process.stats.cpu.UserTime - process.previousCPUStats.UserTime
status.CPUTimeWaitDelta = process.stats.cpu.WaitTime - process.previousCPUStats.WaitTime

delta := process.measureTime.Sub(process.previousMeasureTime).Seconds() * float64(runtime.NumCPU())
status.CPUUtilisationSystem = (process.stats.cpu.SystemTime - process.previousCPUStats.SystemTime) / delta
status.CPUUtilisationUser = (process.stats.cpu.UserTime - process.previousCPUStats.UserTime) / delta
status.CPUUtilisationWait = (process.stats.cpu.WaitTime - process.previousCPUStats.WaitTime) / delta
var zero CPUInfo
// we only calculate CPU deltas and utilization time from the second sample onwards
if process.prevStats.cpu != zero {
status.CPUTimeSystemDelta = process.stats.cpu.SystemTime - process.prevStats.cpu.SystemTime
status.CPUTimeUserDelta = process.stats.cpu.UserTime - process.prevStats.cpu.UserTime
status.CPUTimeWaitDelta = process.stats.cpu.WaitTime - process.prevStats.cpu.WaitTime

delta := process.measureTime.Sub(process.previousMeasureTime).Seconds() * float64(runtime.NumCPU())
status.CPUUtilisationSystem = (process.stats.cpu.SystemTime - process.prevStats.cpu.SystemTime) / delta
status.CPUUtilisationUser = (process.stats.cpu.UserTime - process.prevStats.cpu.UserTime) / delta
status.CPUUtilisationWait = (process.stats.cpu.WaitTime - process.prevStats.cpu.WaitTime) / delta
}

if ps.privileged {
status.FdCount, err = process.NumFDs()
Expand All @@ -159,7 +166,9 @@ func (ps *Harvester) populateGauges(status *Status, process *linuxProcess) error
status.Status = process.stats.state
status.ThreadCount = process.stats.numThreads
status.MemoryVMSBytes = process.stats.vmSize
status.MemoryVMSBytesDelta = process.stats.vmSize - process.prevStats.vmSize
status.MemoryRSSBytes = process.stats.vmRSS
status.MemoryRSSBytesDelta = process.stats.vmRSS - process.prevStats.vmRSS

return nil
}
Expand Down
4 changes: 1 addition & 3 deletions pkg/internal/infraolly/process/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ type linuxProcess struct {

measureTime time.Time
stats procStats
prevStats procStats
process *process.Process

// used to calculate CPU utilization ratios
previousCPUStats CPUInfo
previousMeasureTime time.Time
previousIOCounters *process.IOCountersStat
previousNetRx int64
Expand Down Expand Up @@ -131,13 +131,11 @@ func getLinuxProcess(cachedCopy *linuxProcess, procFSRoot string, pid int32, pri
stats: currentStats,
measureTime: measureTime,
previousMeasureTime: measureTime,
previousCPUStats: currentStats.cpu,
procFSRoot: procFSRoot,
}, nil
}

// Otherwise, instead of creating a new process snapshot, we just reuse the cachedCopy one, with updated data
cachedCopy.previousCPUStats = cachedCopy.stats.cpu
cachedCopy.previousMeasureTime = cachedCopy.measureTime
cachedCopy.stats = currentStats
cachedCopy.measureTime = measureTime
Expand Down
8 changes: 6 additions & 2 deletions pkg/internal/infraolly/process/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,12 @@ type Status struct {
CPUUtilisationUser float64
CPUUtilisationWait float64

MemoryRSSBytes int64
MemoryVMSBytes int64
// delta values are used in OTEL UpDownCounters while absolute values are used in Prometheus gauges
MemoryRSSBytes int64
MemoryVMSBytes int64
MemoryRSSBytesDelta int64
MemoryVMSBytesDelta int64

Status string
ParentProcessID int32
ThreadCount int32
Expand Down

0 comments on commit dca415b

Please sign in to comment.