Skip to content

Commit

Permalink
Unset HOST_PROC to make sure we use and report the actual process and…
Browse files Browse the repository at this point in the history
… introduce an option to allow user to overwrite the unset logic and use the value set in the environment variable

Signed-off-by: Dani Louca <[email protected]>
  • Loading branch information
dloucasfx committed Mar 27, 2023
1 parent ff0dd4c commit a9c5b6f
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 8 deletions.
16 changes: 16 additions & 0 deletions .chloggen/metric-server-hostproc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
component: service/proctelemetry

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Make sure OTEL internal metrics is reading and reporting its own process data when HOST_PROC is set set

# One or more tracking issues or pull requests related to the change
issues: []

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: If user wants to use the path defined in the environment variable HOST_PROC, then they can set `useHostProcEnvVar` to true
11 changes: 10 additions & 1 deletion service/internal/proctelemetry/process_telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,27 @@ type processMetrics struct {

// RegisterProcessMetrics creates a new set of processMetrics (mem, cpu) that can be used to measure
// basic information about this process.
func RegisterProcessMetrics(ocRegistry *metric.Registry, mp otelmetric.MeterProvider, useOtel bool, ballastSizeBytes uint64) error {
func RegisterProcessMetrics(ocRegistry *metric.Registry, mp otelmetric.MeterProvider, useOtel bool, ballastSizeBytes uint64, useHostProcEnvVar bool) error {
var err error
pm := &processMetrics{
startTimeUnixNano: time.Now().UnixNano(),
ballastSizeBytes: ballastSizeBytes,
ms: &runtime.MemStats{},
}

hostproc, exists := os.LookupEnv("HOST_PROC")
// unset HOST_PROC env variable so the process search occurs locally
if !useHostProcEnvVar && exists {
os.Unsetenv("HOST_PROC")
}
pm.proc, err = process.NewProcess(int32(os.Getpid()))
if err != nil {
return err
}
// restore immediately if it was previously set, don't use defer
if !useHostProcEnvVar && exists {
os.Setenv("HOST_PROC", hostproc)
}

if useOtel {
return pm.recordWithOtel(mp.Meter(scopeName))
Expand Down
51 changes: 48 additions & 3 deletions service/internal/proctelemetry/process_telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -116,7 +117,7 @@ func fetchPrometheusMetrics(handler http.Handler) (map[string]*io_prometheus_cli
func TestOtelProcessTelemetry(t *testing.T) {
tel := setupTelemetry(t)

require.NoError(t, RegisterProcessMetrics(nil, tel.MeterProvider, true, 0))
require.NoError(t, RegisterProcessMetrics(nil, tel.MeterProvider, true, 0, false))

mp, err := fetchPrometheusMetrics(tel.promHandler)
require.NoError(t, err)
Expand Down Expand Up @@ -148,7 +149,51 @@ func TestOtelProcessTelemetry(t *testing.T) {
func TestOCProcessTelemetry(t *testing.T) {
ocRegistry := metric.NewRegistry()

require.NoError(t, RegisterProcessMetrics(ocRegistry, otelmetric.NewNoopMeterProvider(), false, 0))
require.NoError(t, RegisterProcessMetrics(ocRegistry, otelmetric.NewNoopMeterProvider(), false, 0, false))

// Check that the metrics are actually filled.
<-time.After(200 * time.Millisecond)

metrics := ocRegistry.Read()

for _, metricName := range expectedMetrics {
m := findMetric(metrics, metricName)
require.NotNil(t, m)
require.Len(t, m.TimeSeries, 1)
ts := m.TimeSeries[0]
assert.Len(t, ts.LabelValues, 0)
require.Len(t, ts.Points, 1)

var value float64
if metricName == "process/uptime" || metricName == "process/cpu_seconds" {
value = ts.Points[0].Value.(float64)
} else {
value = float64(ts.Points[0].Value.(int64))
}

if metricName == "process/uptime" || metricName == "process/cpu_seconds" {
// This likely will still be zero when running the test.
assert.GreaterOrEqual(t, value, float64(0), metricName)
continue
}

assert.Greater(t, value, float64(0), metricName)
}
}

func TestOCProcessTelemetryWithHostProc(t *testing.T) {
ocRegistry := metric.NewRegistry()
hostProc := "/host/proc"
os.Setenv("HOST_PROC", hostProc)

require.NoError(t, RegisterProcessMetrics(ocRegistry, otelmetric.NewNoopMeterProvider(), false, 0, false))

// make sure HOST_PROC is restored
envValue, _ := os.LookupEnv("HOST_PROC")
require.Equal(t, envValue, hostProc)

// unset HOST_PROC so other tests are not impacted
defer os.Unsetenv("HOST_PROC")

// Check that the metrics are actually filled.
<-time.After(200 * time.Millisecond)
Expand Down Expand Up @@ -186,7 +231,7 @@ func TestProcessTelemetryFailToRegister(t *testing.T) {
ocRegistry := metric.NewRegistry()
_, err := ocRegistry.AddFloat64Gauge(metricName)
require.NoError(t, err)
assert.Error(t, RegisterProcessMetrics(ocRegistry, otelmetric.NewNoopMeterProvider(), false, 0))
assert.Error(t, RegisterProcessMetrics(ocRegistry, otelmetric.NewNoopMeterProvider(), false, 0, false))
})
}
}
Expand Down
2 changes: 1 addition & 1 deletion service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ func (srv *Service) initExtensionsAndPipeline(ctx context.Context, set Settings,

if cfg.Telemetry.Metrics.Level != configtelemetry.LevelNone && cfg.Telemetry.Metrics.Address != "" {
// The process telemetry initialization requires the ballast size, which is available after the extensions are initialized.
if err = proctelemetry.RegisterProcessMetrics(srv.telemetryInitializer.ocRegistry, srv.telemetryInitializer.mp, obsreportconfig.UseOtelForInternalMetricsfeatureGate.IsEnabled(), getBallastSize(srv.host)); err != nil {
if err = proctelemetry.RegisterProcessMetrics(srv.telemetryInitializer.ocRegistry, srv.telemetryInitializer.mp, obsreportconfig.UseOtelForInternalMetricsfeatureGate.IsEnabled(), getBallastSize(srv.host), cfg.Telemetry.Metrics.UseHostProcEnvVar); err != nil {
return fmt.Errorf("failed to register process metrics: %w", err)
}
}
Expand Down
12 changes: 12 additions & 0 deletions service/telemetry/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package telemetry // import "go.opentelemetry.io/collector/service/telemetry"

import (
"fmt"
"os"

"go.uber.org/zap/zapcore"

Expand Down Expand Up @@ -116,6 +117,10 @@ type MetricsConfig struct {

// Address is the [address]:port that metrics exposition should be bound to.
Address string `mapstructure:"address"`

// UseHostProcEnvVar when set to true, the metric server, through gopsutil,
// lookup for the otel process in the proc path defined in the HOST_PROC environment variable.
UseHostProcEnvVar bool `mapstructure:"useHostProcEnvVar"`
}

// TracesConfig exposes the common Telemetry configuration for collector's internal spans.
Expand All @@ -134,6 +139,13 @@ func (c *Config) Validate() error {
if c.Metrics.Level != configtelemetry.LevelNone && c.Metrics.Address == "" {
return fmt.Errorf("collector telemetry metric address should exist when metric level is not none")
}
// Validate that the hostproc env variable is set when metric server is enabled
if c.Metrics.Level != configtelemetry.LevelNone && c.Metrics.Address != "" && c.Metrics.UseHostProcEnvVar {
if _, exists := os.LookupEnv("HOST_PROC"); !exists {
return fmt.Errorf("collector telemetry metric UueHostProcEnvVar " +
"is set to true, but HOST_PROC env variavle is not set")
}
}

return nil
}
36 changes: 33 additions & 3 deletions service/telemetry/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package telemetry

import (
"os"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -24,9 +25,10 @@ import (

func TestLoadConfig(t *testing.T) {
tests := []struct {
name string
cfg *Config
success bool
name string
cfg *Config
success bool
hostproc string
}{
{
name: "basic metric telemetry",
Expand All @@ -48,10 +50,38 @@ func TestLoadConfig(t *testing.T) {
},
success: false,
},
{
name: "metric telemetry with valid useHostProcEnvVar",
cfg: &Config{
Metrics: MetricsConfig{
Level: configtelemetry.LevelBasic,
Address: "127.0.0.1:3333",
UseHostProcEnvVar: true,
},
},
success: true,
hostproc: "/host/proc",
},
{
name: "metric telemetry with useHostProcEnvVar but actual env variable not set",
cfg: &Config{
Metrics: MetricsConfig{
Level: configtelemetry.LevelBasic,
Address: "127.0.0.1:3333",
UseHostProcEnvVar: true,
},
},
success: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.hostproc != "" {
os.Setenv("HOST_PROC", tt.hostproc)
} else {
os.Unsetenv("HOST_PROC")
}
err := tt.cfg.Validate()
if tt.success {
assert.NoError(t, err)
Expand Down

0 comments on commit a9c5b6f

Please sign in to comment.