From 7ac0a0d2e5835019653dc2d43fbd620b7593ab37 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Fri, 13 Dec 2024 13:33:00 -0800 Subject: [PATCH] add wakeup count telemetry --- go.mod | 2 +- go.sum | 4 ++-- pkg/ebpf/telemetry/perf_metrics.go | 28 ++++++++++++++++++++-------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index a01741a7d419f..58f6dacabd2c6 100644 --- a/go.mod +++ b/go.mod @@ -159,7 +159,7 @@ require ( github.com/DataDog/datadog-agent/pkg/util/scrubber v0.59.1 github.com/DataDog/datadog-go/v5 v5.5.0 github.com/DataDog/datadog-operator v0.7.1-0.20241024104907-734366f3c0d1 - github.com/DataDog/ebpf-manager v0.7.4 + github.com/DataDog/ebpf-manager v0.7.5-0.20241213212949-6c446b204c82 github.com/DataDog/gopsutil v1.2.2 github.com/DataDog/nikos v1.12.8 github.com/DataDog/opentelemetry-mapping-go/pkg/otlp/attributes v0.22.0 diff --git a/go.sum b/go.sum index 99aabf21db82e..033f2c05c5675 100644 --- a/go.sum +++ b/go.sum @@ -136,8 +136,8 @@ github.com/DataDog/dd-sensitive-data-scanner/sds-go/go v0.0.0-20240816154533-f7f github.com/DataDog/dd-sensitive-data-scanner/sds-go/go v0.0.0-20240816154533-f7f9beb53a42/go.mod h1:TX7CTOQ3LbQjfAi4SwqUoR5gY1zfUk7VRBDTuArjaDc= github.com/DataDog/dd-trace-go/v2 v2.0.0-beta.11 h1:6vwU//TjBIghQKMgIP9UyIRhN/LWS1y8tYzvRnu8JZw= github.com/DataDog/dd-trace-go/v2 v2.0.0-beta.11/go.mod h1:woPHoAOfAIM7kl4GauR+qrWui7teNg44Um0verg2rzQ= -github.com/DataDog/ebpf-manager v0.7.4 h1:fI2fJbDpykDiO/hq3IVIi+YLVVrJ97qG6O8LT7mdCnQ= -github.com/DataDog/ebpf-manager v0.7.4/go.mod h1:QlCkGTH3koCMDG7E8o9Si6O9UXjBwQspP6z2YtKlyGU= +github.com/DataDog/ebpf-manager v0.7.5-0.20241213212949-6c446b204c82 h1:uBg02TIv9hSf9xlbHLZSM6f+uAi3Jt+QSGYkL5M3MBQ= +github.com/DataDog/ebpf-manager v0.7.5-0.20241213212949-6c446b204c82/go.mod h1:fuQ8xexSbY/3BlxnNidUr0QORDwK8wEvjYyMb4QO23g= github.com/DataDog/go-grpc-bidirectional-streaming-example v0.0.0-20221024060302-b9cf785c02fe h1:RO40ywnX/vZLi4Pb4jRuFGgQQBYGIIoQ6u+P2MIgFOA= github.com/DataDog/go-grpc-bidirectional-streaming-example v0.0.0-20221024060302-b9cf785c02fe/go.mod h1:90sqV0j7E8wYCyqIp5d9HmYWLTFQttqPFFtNYDyAybQ= github.com/DataDog/go-libddwaf/v3 v3.5.1 h1:GWA4ln4DlLxiXm+X7HA/oj0ZLcdCwOS81KQitegRTyY= diff --git a/pkg/ebpf/telemetry/perf_metrics.go b/pkg/ebpf/telemetry/perf_metrics.go index a8f6ed2e6602e..6eefaddcaceea 100644 --- a/pkg/ebpf/telemetry/perf_metrics.go +++ b/pkg/ebpf/telemetry/perf_metrics.go @@ -22,12 +22,13 @@ var ( ) type perfUsageCollector struct { - mtx sync.Mutex - usage *prometheus.GaugeVec - usagePct *prometheus.GaugeVec - size *prometheus.GaugeVec - lost *prometheus.CounterVec - channelLen *prometheus.GaugeVec + mtx sync.Mutex + usage *prometheus.GaugeVec + usagePct *prometheus.GaugeVec + size *prometheus.GaugeVec + lost *prometheus.CounterVec + channelLen *prometheus.GaugeVec + wakeupCount *prometheus.CounterVec perfMaps []*manager.PerfMap perfChannelLenFuncs map[*manager.PerfMap]func() int @@ -81,6 +82,14 @@ func NewPerfUsageCollector() prometheus.Collector { }, []string{"map_name", "map_type"}, ), + wakeupCount: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: "ebpf__perf", + Name: "_wakeup_count", + Help: "counter tracking number of times reader was woken up", + }, + []string{"map_name", "map_type"}, + ), } return perfCollector } @@ -101,7 +110,7 @@ func (p *perfUsageCollector) Collect(metrics chan<- prometheus.Metric) { for _, pm := range p.perfMaps { mapName, mapType := pm.Name, ebpf.PerfEventArray.String() size := float64(pm.BufferSize()) - usage, lost := pm.Telemetry() + usage, lost, wakeupCount := pm.Telemetry() if usage == nil || lost == nil { continue } @@ -115,6 +124,7 @@ func (p *perfUsageCollector) Collect(metrics chan<- prometheus.Metric) { p.size.WithLabelValues(mapName, mapType, cpuString).Set(size) p.lost.WithLabelValues(mapName, mapType, cpuString).Add(float64(lost[cpu])) } + p.wakeupCount.WithLabelValues(mapName, mapType).Add(float64(wakeupCount)) } for pm, chFunc := range p.perfChannelLenFuncs { @@ -125,7 +135,7 @@ func (p *perfUsageCollector) Collect(metrics chan<- prometheus.Metric) { for _, rb := range p.ringBuffers { mapName, mapType := rb.Name, ebpf.RingBuf.String() size := float64(rb.BufferSize()) - usage, ok := rb.Telemetry() + usage, wakeupCount, ok := rb.Telemetry() if !ok { continue } @@ -135,6 +145,7 @@ func (p *perfUsageCollector) Collect(metrics chan<- prometheus.Metric) { p.usage.WithLabelValues(mapName, mapType, cpuString).Set(count) p.usagePct.WithLabelValues(mapName, mapType, cpuString).Set(100 * (count / size)) p.size.WithLabelValues(mapName, mapType, cpuString).Set(size) + p.wakeupCount.WithLabelValues(mapName, mapType).Add(float64(wakeupCount)) } for rb, chFunc := range p.ringChannelLenFuncs { @@ -147,6 +158,7 @@ func (p *perfUsageCollector) Collect(metrics chan<- prometheus.Metric) { p.size.Collect(metrics) p.lost.Collect(metrics) p.channelLen.Collect(metrics) + p.wakeupCount.Collect(metrics) } // ReportPerfMapTelemetry starts reporting the telemetry for the provided PerfMap