From f126bcf3e0218fe3fc6714cc9f4f50a7996c1b68 Mon Sep 17 00:00:00 2001 From: Jongwon Youn Date: Fri, 25 Oct 2024 21:39:45 +0900 Subject: [PATCH] profiler: add enable flag to control profiler activation (#2840) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR introduces a new environment variable DD_PROFILING_ENABLED. If DD_PROFILING_ENABLED is set to false, profiling will be disabled even if profiler.Start() is called. This allows the application code to always call profiler.Start() while dynamically adjusting profiling through the environment variable. Note that DD_PROFILING_ENABLED=true is not sufficient by itself to enable profiling; you must still call profiler.Start(). Co-authored-by: Nick Ripley Co-authored-by: Felix Geisendörfer Co-authored-by: Felix Geisendörfer --- profiler/options.go | 9 +++++++++ profiler/options_test.go | 15 +++++++++++++++ profiler/profiler.go | 6 ++++++ profiler/profiler_test.go | 27 +++++++++++++++++++++++++++ profiler/telemetry.go | 1 + 5 files changed, 58 insertions(+) diff --git a/profiler/options.go b/profiler/options.go index 36e715c156..994ab21ecd 100644 --- a/profiler/options.go +++ b/profiler/options.go @@ -111,6 +111,7 @@ type config struct { logStartup bool traceConfig executionTraceConfig endpointCountEnabled bool + enabled bool } // logStartup records the configuration to the configured logger in JSON format @@ -146,6 +147,7 @@ func logStartup(c *config) { "execution_trace_size_limit": c.traceConfig.Limit, "endpoint_count_enabled": c.endpointCountEnabled, "custom_profiler_label_keys": c.customProfilerLabels, + "enabled": c.enabled, } b, err := json.Marshal(info) if err != nil { @@ -208,6 +210,13 @@ func defaultConfig() (*config, error) { } else { c.agentURL = url.String() + "/profiling/v1/input" } + // If DD_PROFILING_ENABLED is set to "auto", the profiler's activation will be determined by + // the Datadog admission controller, so we set it to true. + if os.Getenv("DD_PROFILING_ENABLED") == "auto" { + c.enabled = true + } else { + c.enabled = internal.BoolEnv("DD_PROFILING_ENABLED", true) + } if v := os.Getenv("DD_PROFILING_UPLOAD_TIMEOUT"); v != "" { d, err := time.ParseDuration(v) if err != nil { diff --git a/profiler/options_test.go b/profiler/options_test.go index 935a8ba18d..7861c55172 100644 --- a/profiler/options_test.go +++ b/profiler/options_test.go @@ -264,6 +264,21 @@ func TestEnvVars(t *testing.T) { assert.Equal(t, "http://localhost:6218/profiling/v1/input", cfg.agentURL) }) + t.Run("DD_PROFILING_ENABLED", func(t *testing.T) { + t.Run("default", func(t *testing.T) { + cfg, err := defaultConfig() + require.NoError(t, err) + assert.Equal(t, true, cfg.enabled) + }) + + t.Run("override", func(t *testing.T) { + t.Setenv("DD_PROFILING_ENABLED", "false") + cfg, err := defaultConfig() + require.NoError(t, err) + assert.Equal(t, false, cfg.enabled) + }) + }) + t.Run("DD_PROFILING_UPLOAD_TIMEOUT", func(t *testing.T) { t.Setenv("DD_PROFILING_UPLOAD_TIMEOUT", "3s") cfg, err := defaultConfig() diff --git a/profiler/profiler.go b/profiler/profiler.go index 0dba2062b8..9c7ae8cf17 100644 --- a/profiler/profiler.go +++ b/profiler/profiler.go @@ -47,6 +47,9 @@ var ( // // It may return an error if an API key is not provided by means of the // WithAPIKey option, or if a hostname is not found. +// +// If DD_PROFILING_ENABLED=false is set in the process environment, it will +// prevent the profiler from starting. func Start(opts ...Option) error { mu.Lock() defer mu.Unlock() @@ -58,6 +61,9 @@ func Start(opts ...Option) error { if err != nil { return err } + if !p.cfg.enabled { + return nil + } activeProfiler = p activeProfiler.run() traceprof.SetProfilerEnabled(true) diff --git a/profiler/profiler_test.go b/profiler/profiler_test.go index 3d65fd8d58..d8b3b1e182 100644 --- a/profiler/profiler_test.go +++ b/profiler/profiler_test.go @@ -83,6 +83,20 @@ func TestStart(t *testing.T) { mu.Unlock() }) + t.Run("dd_profiling_not_enabled", func(t *testing.T) { + t.Setenv("DD_PROFILING_ENABLED", "false") + if err := Start(); err != nil { + t.Fatal(err) + } + defer Stop() + + mu.Lock() + // if DD_PROFILING_ENABLED is false, the profiler should not be started even if Start() is called + // So we should not have an activeProfiler + assert.Nil(t, activeProfiler) + mu.Unlock() + }) + t.Run("options", func(t *testing.T) { if err := Start(); err != nil { t.Fatal(err) @@ -449,6 +463,19 @@ func TestImmediateProfile(t *testing.T) { } } +func TestEnabledFalse(t *testing.T) { + t.Setenv("DD_PROFILING_ENABLED", "false") + ch := startTestProfiler(t, 1, WithPeriod(10*time.Millisecond), WithProfileTypes()) + select { + case <-ch: + t.Fatal("received profile when profiler should have been disabled") + case <-time.After(time.Second): + // This test might succeed incorrectly on an overloaded + // CI server, but is very likely to fail locally given a + // buggy implementation + } +} + func TestExecutionTraceCPUProfileRate(t *testing.T) { // cpuProfileRate is picked randomly so we can check for it in the trace // data to reduce the chance that it occurs in the trace data for some other diff --git a/profiler/telemetry.go b/profiler/telemetry.go index 2e6a5fa88c..0dc2cf9be8 100644 --- a/profiler/telemetry.go +++ b/profiler/telemetry.go @@ -54,6 +54,7 @@ func startTelemetry(c *config) { {Name: "execution_trace_size_limit", Value: c.traceConfig.Limit}, {Name: "endpoint_count_enabled", Value: c.endpointCountEnabled}, {Name: "num_custom_profiler_label_keys", Value: len(c.customProfilerLabels)}, + {Name: "enabled", Value: c.enabled}, }, ) }