Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[service/proctelemetry] Offer to override the HOST_PROC environment variable with a programmatic value #7998

Merged
merged 14 commits into from
Aug 15, 2023
16 changes: 16 additions & 0 deletions .chloggen/metric-server-hostproc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix
atoulme marked this conversation as resolved.
Show resolved Hide resolved

# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
component: service/proctelemetry

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Allow to override the /proc folder on Linux used by process telemetry.

# One or more tracking issues or pull requests related to the change
issues: [7434]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: This is an API change only, allowing users to set a value to override the environment variable HOST_PROC.
15 changes: 11 additions & 4 deletions service/internal/proctelemetry/process_telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"sync"
"time"

"github.com/shirou/gopsutil/v3/common"
"github.com/shirou/gopsutil/v3/process"
"go.opencensus.io/metric"
"go.opencensus.io/stats"
Expand All @@ -27,6 +28,7 @@ type processMetrics struct {
startTimeUnixNano int64
ballastSizeBytes uint64
proc *process.Process
context context.Context

processUptime *metric.Float64DerivedCumulative
allocMem *metric.Int64DerivedGauge
Expand All @@ -51,15 +53,20 @@ type processMetrics struct {

// RegisterProcessMetrics creates a new set of processMetrics (mem, cpu) that can be used to measure
// basic information about this process.
func RegisterProcessMetrics(ocRegistry *metric.Registry, mp otelmetric.MeterProvider, useOtel bool, ballastSizeBytes uint64) error {
func RegisterProcessMetrics(ocRegistry *metric.Registry, mp otelmetric.MeterProvider, useOtel bool, ballastSizeBytes uint64, hostProc string) error {
var err error
pm := &processMetrics{
startTimeUnixNano: time.Now().UnixNano(),
ballastSizeBytes: ballastSizeBytes,
ms: &runtime.MemStats{},
}

pm.proc, err = process.NewProcess(int32(os.Getpid()))
ctx := context.Background()
if hostProc != "" {
atoulme marked this conversation as resolved.
Show resolved Hide resolved
ctx = context.WithValue(ctx, common.EnvKey, common.EnvMap{common.HostProcEnvKey: hostProc})
}
pm.context = ctx
pm.proc, err = process.NewProcessWithContext(pm.context, int32(os.Getpid()))
if err != nil {
return err
}
Expand Down Expand Up @@ -231,7 +238,7 @@ func (pm *processMetrics) updateSysMem() int64 {
}

func (pm *processMetrics) updateCPUSeconds() float64 {
times, err := pm.proc.Times()
times, err := pm.proc.TimesWithContext(pm.context)
if err != nil {
return 0
}
Expand All @@ -241,7 +248,7 @@ func (pm *processMetrics) updateCPUSeconds() float64 {
}

func (pm *processMetrics) updateRSSMemory() int64 {
mem, err := pm.proc.MemoryInfo()
mem, err := pm.proc.MemoryInfoWithContext(pm.context)
if err != nil {
return 0
}
Expand Down
53 changes: 53 additions & 0 deletions service/internal/proctelemetry/process_telemetry_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

//go:build linux
// +build linux

package proctelemetry

import (
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opencensus.io/metric"
"go.opentelemetry.io/otel/metric/noop"
)

func TestOCProcessTelemetryWithHostProc(t *testing.T) {
ocRegistry := metric.NewRegistry()
// Make the sure the environment variable value is not used.
t.Setenv("HOST_PROC", "foo/bar")

require.NoError(t, RegisterProcessMetrics(ocRegistry, noop.NewMeterProvider(), false, 0, "/proc"))

// Check that the metrics are actually filled.
time.Sleep(200 * time.Millisecond)

metrics := ocRegistry.Read()

for _, metricName := range expectedMetrics {
m := findMetric(metrics, metricName)
require.NotNil(t, m)
require.Len(t, m.TimeSeries, 1)
ts := m.TimeSeries[0]
assert.Len(t, ts.LabelValues, 0)
require.Len(t, ts.Points, 1)

var value float64
if metricName == "process/uptime" || metricName == "process/cpu_seconds" {
value = ts.Points[0].Value.(float64)
} else {
value = float64(ts.Points[0].Value.(int64))
}

if metricName == "process/uptime" || metricName == "process/cpu_seconds" {
// This likely will still be zero when running the test.
assert.GreaterOrEqual(t, value, float64(0), metricName)
continue
}
assert.Greater(t, value, float64(0), metricName)
}
}
9 changes: 4 additions & 5 deletions service/internal/proctelemetry/process_telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func fetchPrometheusMetrics(handler http.Handler) (map[string]*io_prometheus_cli
func TestOtelProcessTelemetry(t *testing.T) {
tel := setupTelemetry(t)

require.NoError(t, RegisterProcessMetrics(nil, tel.MeterProvider, true, 0))
require.NoError(t, RegisterProcessMetrics(nil, tel.MeterProvider, true, 0, ""))

mp, err := fetchPrometheusMetrics(tel.promHandler)
require.NoError(t, err)
Expand Down Expand Up @@ -137,10 +137,10 @@ func TestOtelProcessTelemetry(t *testing.T) {
func TestOCProcessTelemetry(t *testing.T) {
ocRegistry := metric.NewRegistry()

require.NoError(t, RegisterProcessMetrics(ocRegistry, noop.NewMeterProvider(), false, 0))
require.NoError(t, RegisterProcessMetrics(ocRegistry, noop.NewMeterProvider(), false, 0, ""))

// Check that the metrics are actually filled.
<-time.After(200 * time.Millisecond)
time.Sleep(200 * time.Millisecond)
atoulme marked this conversation as resolved.
Show resolved Hide resolved

metrics := ocRegistry.Read()

Expand All @@ -164,7 +164,6 @@ func TestOCProcessTelemetry(t *testing.T) {
assert.GreaterOrEqual(t, value, float64(0), metricName)
continue
}

assert.Greater(t, value, float64(0), metricName)
}
}
Expand All @@ -175,7 +174,7 @@ func TestProcessTelemetryFailToRegister(t *testing.T) {
ocRegistry := metric.NewRegistry()
_, err := ocRegistry.AddFloat64Gauge(metricName)
require.NoError(t, err)
assert.Error(t, RegisterProcessMetrics(ocRegistry, noop.NewMeterProvider(), false, 0))
assert.Error(t, RegisterProcessMetrics(ocRegistry, noop.NewMeterProvider(), false, 0, ""))
})
}
}
Expand Down
2 changes: 1 addition & 1 deletion service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func (srv *Service) initExtensionsAndPipeline(ctx context.Context, set Settings,

if cfg.Telemetry.Metrics.Level != configtelemetry.LevelNone && cfg.Telemetry.Metrics.Address != "" {
// The process telemetry initialization requires the ballast size, which is available after the extensions are initialized.
if err = proctelemetry.RegisterProcessMetrics(srv.telemetryInitializer.ocRegistry, srv.telemetryInitializer.mp, obsreportconfig.UseOtelForInternalMetricsfeatureGate.IsEnabled(), getBallastSize(srv.host)); err != nil {
if err = proctelemetry.RegisterProcessMetrics(srv.telemetryInitializer.ocRegistry, srv.telemetryInitializer.mp, obsreportconfig.UseOtelForInternalMetricsfeatureGate.IsEnabled(), getBallastSize(srv.host), ""); err != nil {
return fmt.Errorf("failed to register process metrics: %w", err)
}
}
Expand Down