Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[service/proctelemetry] Offer to override the HOST_PROC environment variable with a programmatic value #7998

Merged
merged 14 commits into from
Aug 15, 2023
18 changes: 18 additions & 0 deletions .chloggen/metric-server-hostproc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: breaking
atoulme marked this conversation as resolved.
Show resolved Hide resolved

# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
component: service/proctelemetry

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Allow to override the /proc folder on Linux used by process telemetry.

# One or more tracking issues or pull requests related to the change
issues: [7434]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
This is an API change only, allowing users to set a value to override the environment variable HOST_PROC.
This change affects the proctelemetry.RegisterProcessMetrics function, adding a vararg argument to pass options.
40 changes: 36 additions & 4 deletions service/internal/proctelemetry/process_telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"sync"
"time"

"github.com/shirou/gopsutil/v3/common"
"github.com/shirou/gopsutil/v3/process"
"go.opencensus.io/metric"
"go.opencensus.io/stats"
Expand All @@ -27,6 +28,7 @@ type processMetrics struct {
startTimeUnixNano int64
ballastSizeBytes uint64
proc *process.Process
context context.Context

processUptime *metric.Float64DerivedCumulative
allocMem *metric.Int64DerivedGauge
Expand All @@ -49,17 +51,47 @@ type processMetrics struct {
ms *runtime.MemStats
}

type RegisterOption interface {
apply(*registerOption)
}

type registerOption struct {
hostProc string
}

type registerOptionFunc func(*registerOption)

func (fn registerOptionFunc) apply(set *registerOption) {
fn(set)
}

// WithHostProc overrides the /proc folder on Linux used by process telemetry.
func WithHostProc(hostProc string) RegisterOption {
return registerOptionFunc(func(uo *registerOption) {
uo.hostProc = hostProc
})
}

// RegisterProcessMetrics creates a new set of processMetrics (mem, cpu) that can be used to measure
// basic information about this process.
func RegisterProcessMetrics(ocRegistry *metric.Registry, mp otelmetric.MeterProvider, useOtel bool, ballastSizeBytes uint64) error {
func RegisterProcessMetrics(ocRegistry *metric.Registry, mp otelmetric.MeterProvider, useOtel bool, ballastSizeBytes uint64, opts ...RegisterOption) error {
set := registerOption{}
for _, opt := range opts {
opt.apply(&set)
}
var err error
pm := &processMetrics{
startTimeUnixNano: time.Now().UnixNano(),
ballastSizeBytes: ballastSizeBytes,
ms: &runtime.MemStats{},
}

pm.proc, err = process.NewProcess(int32(os.Getpid()))
ctx := context.Background()
if set.hostProc != "" {
ctx = context.WithValue(ctx, common.EnvKey, common.EnvMap{common.HostProcEnvKey: set.hostProc})
}
pm.context = ctx
pm.proc, err = process.NewProcessWithContext(pm.context, int32(os.Getpid()))
if err != nil {
return err
}
Expand Down Expand Up @@ -231,7 +263,7 @@ func (pm *processMetrics) updateSysMem() int64 {
}

func (pm *processMetrics) updateCPUSeconds() float64 {
times, err := pm.proc.Times()
times, err := pm.proc.TimesWithContext(pm.context)
if err != nil {
return 0
}
Expand All @@ -241,7 +273,7 @@ func (pm *processMetrics) updateCPUSeconds() float64 {
}

func (pm *processMetrics) updateRSSMemory() int64 {
mem, err := pm.proc.MemoryInfo()
mem, err := pm.proc.MemoryInfoWithContext(pm.context)
if err != nil {
return 0
}
Expand Down
53 changes: 53 additions & 0 deletions service/internal/proctelemetry/process_telemetry_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

//go:build linux
// +build linux

package proctelemetry

import (
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opencensus.io/metric"
"go.opentelemetry.io/otel/metric/noop"
)

func TestOCProcessTelemetryWithHostProc(t *testing.T) {
ocRegistry := metric.NewRegistry()
// Make the sure the environment variable value is not used.
t.Setenv("HOST_PROC", "foo/bar")

require.NoError(t, RegisterProcessMetrics(ocRegistry, noop.NewMeterProvider(), false, 0, WithHostProc("/proc")))

// Check that the metrics are actually filled.
time.Sleep(200 * time.Millisecond)

metrics := ocRegistry.Read()

for _, metricName := range expectedMetrics {
m := findMetric(metrics, metricName)
require.NotNil(t, m)
require.Len(t, m.TimeSeries, 1)
ts := m.TimeSeries[0]
assert.Len(t, ts.LabelValues, 0)
require.Len(t, ts.Points, 1)

var value float64
if metricName == "process/uptime" || metricName == "process/cpu_seconds" {
value = ts.Points[0].Value.(float64)
} else {
value = float64(ts.Points[0].Value.(int64))
}

if metricName == "process/uptime" || metricName == "process/cpu_seconds" {
// This likely will still be zero when running the test.
assert.GreaterOrEqual(t, value, float64(0), metricName)
continue
}
assert.Greater(t, value, float64(0), metricName)
}
}
3 changes: 1 addition & 2 deletions service/internal/proctelemetry/process_telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func TestOCProcessTelemetry(t *testing.T) {
require.NoError(t, RegisterProcessMetrics(ocRegistry, noop.NewMeterProvider(), false, 0))

// Check that the metrics are actually filled.
<-time.After(200 * time.Millisecond)
time.Sleep(200 * time.Millisecond)
atoulme marked this conversation as resolved.
Show resolved Hide resolved

metrics := ocRegistry.Read()

Expand All @@ -164,7 +164,6 @@ func TestOCProcessTelemetry(t *testing.T) {
assert.GreaterOrEqual(t, value, float64(0), metricName)
continue
}

assert.Greater(t, value, float64(0), metricName)
}
}
Expand Down