From 55902b6828857ef98ac219f86274437ac2a66e53 Mon Sep 17 00:00:00 2001 From: Antoine Toulme Date: Tue, 15 Aug 2023 13:50:33 -0700 Subject: [PATCH] [service/proctelemetry] Offer to override the HOST_PROC environment variable with a programmatic value (#7998) Reprising https://github.com/open-telemetry/opentelemetry-collector/pull/7434 **Description:** While debugging the below error in k8s env ```` Error: failed to register process metrics: process does not exist 2023/03/23 03:44:47 main.go:115: application run finished with error: failed to register process metrics: process does not exist ```` I have noticed that the metric server is calling GOPSUTIL while the HOST_PROC variable is set , this causes gopsutil `PidExistsWithContext ` to retrieve the process from the host instead from the container ```` func PidExistsWithContext(ctx context.Context, pid int32) (bool, error) { if pid <= 0 { return false, fmt.Errorf("invalid pid %v", pid) } proc, err := os.FindProcess(int(pid)) if err != nil { return false, err } if isMount(common.HostProc()) { // if //proc exists and is mounted, check if //proc/ folder exists _, err := os.Stat(common.HostProc(strconv.Itoa(int(pid)))) if os.IsNotExist(err) { return false, nil } return err == nil, err } ```` This PR unsets and resets the host_proc variable and introduces an option to allow the use of host_proc if for whatever reason they need to **Link to tracking Issue:** Fixes #7435 **Testing:** unit tests --------- Signed-off-by: Dani Louca Co-authored-by: Dani Louca Co-authored-by: Alex Boten --- .../proctelemetry/process_telemetry.go | 40 ++++++++++++-- .../process_telemetry_linux_test.go | 53 +++++++++++++++++++ 2 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 service/internal/proctelemetry/process_telemetry_linux_test.go diff --git a/service/internal/proctelemetry/process_telemetry.go b/service/internal/proctelemetry/process_telemetry.go index c7068e2deca..35f7fd6f10b 100644 --- a/service/internal/proctelemetry/process_telemetry.go +++ b/service/internal/proctelemetry/process_telemetry.go @@ -10,6 +10,7 @@ import ( "sync" "time" + "github.com/shirou/gopsutil/v3/common" "github.com/shirou/gopsutil/v3/process" "go.opencensus.io/metric" "go.opencensus.io/stats" @@ -27,6 +28,7 @@ type processMetrics struct { startTimeUnixNano int64 ballastSizeBytes uint64 proc *process.Process + context context.Context processUptime *metric.Float64DerivedCumulative allocMem *metric.Int64DerivedGauge @@ -49,9 +51,34 @@ type processMetrics struct { ms *runtime.MemStats } +type RegisterOption interface { + apply(*registerOption) +} + +type registerOption struct { + hostProc string +} + +type registerOptionFunc func(*registerOption) + +func (fn registerOptionFunc) apply(set *registerOption) { + fn(set) +} + +// WithHostProc overrides the /proc folder on Linux used by process telemetry. +func WithHostProc(hostProc string) RegisterOption { + return registerOptionFunc(func(uo *registerOption) { + uo.hostProc = hostProc + }) +} + // RegisterProcessMetrics creates a new set of processMetrics (mem, cpu) that can be used to measure // basic information about this process. -func RegisterProcessMetrics(ocRegistry *metric.Registry, mp otelmetric.MeterProvider, useOtel bool, ballastSizeBytes uint64) error { +func RegisterProcessMetrics(ocRegistry *metric.Registry, mp otelmetric.MeterProvider, useOtel bool, ballastSizeBytes uint64, opts ...RegisterOption) error { + set := registerOption{} + for _, opt := range opts { + opt.apply(&set) + } var err error pm := &processMetrics{ startTimeUnixNano: time.Now().UnixNano(), @@ -59,7 +86,12 @@ func RegisterProcessMetrics(ocRegistry *metric.Registry, mp otelmetric.MeterProv ms: &runtime.MemStats{}, } - pm.proc, err = process.NewProcess(int32(os.Getpid())) + ctx := context.Background() + if set.hostProc != "" { + ctx = context.WithValue(ctx, common.EnvKey, common.EnvMap{common.HostProcEnvKey: set.hostProc}) + } + pm.context = ctx + pm.proc, err = process.NewProcessWithContext(pm.context, int32(os.Getpid())) if err != nil { return err } @@ -231,7 +263,7 @@ func (pm *processMetrics) updateSysMem() int64 { } func (pm *processMetrics) updateCPUSeconds() float64 { - times, err := pm.proc.Times() + times, err := pm.proc.TimesWithContext(pm.context) if err != nil { return 0 } @@ -241,7 +273,7 @@ func (pm *processMetrics) updateCPUSeconds() float64 { } func (pm *processMetrics) updateRSSMemory() int64 { - mem, err := pm.proc.MemoryInfo() + mem, err := pm.proc.MemoryInfoWithContext(pm.context) if err != nil { return 0 } diff --git a/service/internal/proctelemetry/process_telemetry_linux_test.go b/service/internal/proctelemetry/process_telemetry_linux_test.go new file mode 100644 index 00000000000..d42838b2782 --- /dev/null +++ b/service/internal/proctelemetry/process_telemetry_linux_test.go @@ -0,0 +1,53 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux +// +build linux + +package proctelemetry + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opencensus.io/metric" + "go.opentelemetry.io/otel/metric/noop" +) + +func TestOCProcessTelemetryWithHostProc(t *testing.T) { + ocRegistry := metric.NewRegistry() + // Make the sure the environment variable value is not used. + t.Setenv("HOST_PROC", "foo/bar") + + require.NoError(t, RegisterProcessMetrics(ocRegistry, noop.NewMeterProvider(), false, 0, WithHostProc("/proc"))) + + // Check that the metrics are actually filled. + time.Sleep(200 * time.Millisecond) + + metrics := ocRegistry.Read() + + for _, metricName := range expectedMetrics { + m := findMetric(metrics, metricName) + require.NotNil(t, m) + require.Len(t, m.TimeSeries, 1) + ts := m.TimeSeries[0] + assert.Len(t, ts.LabelValues, 0) + require.Len(t, ts.Points, 1) + + var value float64 + if metricName == "process/uptime" || metricName == "process/cpu_seconds" { + value = ts.Points[0].Value.(float64) + } else { + value = float64(ts.Points[0].Value.(int64)) + } + + if metricName == "process/uptime" || metricName == "process/cpu_seconds" { + // This likely will still be zero when running the test. + assert.GreaterOrEqual(t, value, float64(0), metricName) + continue + } + assert.Greater(t, value, float64(0), metricName) + } +}