diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 78f4a514ebec..452174afcc17 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -269,6 +269,7 @@ ALL_TESTS = [ "//pkg/security/username:username_disallowed_imports_test", "//pkg/security/username:username_test", "//pkg/security:security_test", + "//pkg/server/backgroundprofiler/profiler:profiler_test", "//pkg/server/debug/goroutineui:goroutineui_test", "//pkg/server/debug/pprofui:pprofui_test", "//pkg/server/debug:debug_test", @@ -1413,6 +1414,10 @@ GO_TARGETS = [ "//pkg/security/username:username_test", "//pkg/security:security", "//pkg/security:security_test", + "//pkg/server/backgroundprofiler/profiler:profiler", + "//pkg/server/backgroundprofiler/profiler:profiler_test", + "//pkg/server/backgroundprofiler:backgroundprofiler", + "//pkg/server/backgroundprofiler:executiontracer", "//pkg/server/debug/goroutineui:goroutineui", "//pkg/server/debug/goroutineui:goroutineui_test", "//pkg/server/debug/pprofui:pprofui", @@ -2783,6 +2788,8 @@ GET_X_DATA_TARGETS = [ "//pkg/security/sessionrevival:get_x_data", "//pkg/security/username:get_x_data", "//pkg/server:get_x_data", + "//pkg/server/backgroundprofiler:get_x_data", + "//pkg/server/backgroundprofiler/profiler:get_x_data", "//pkg/server/debug:get_x_data", "//pkg/server/debug/goroutineui:get_x_data", "//pkg/server/debug/pprofui:get_x_data", diff --git a/pkg/base/constants.go b/pkg/base/constants.go index e25f6f9d01f1..924ac08d88fa 100644 --- a/pkg/base/constants.go +++ b/pkg/base/constants.go @@ -46,6 +46,10 @@ const ( // stores profiles when the periodic CPU profile dump is enabled. CPUProfileDir = "pprof_dump" + // RuntimeProfileDir is the directory name where the + // backgroundprofiler.Profiler stores profiles. + RuntimeProfileDir = "runtime_profiler" + // InflightTraceDir is the directory name where the job trace dumper stores traces // when a job opts in to dumping its execution traces. InflightTraceDir = "inflight_trace_dump" diff --git a/pkg/base/test_server_args.go b/pkg/base/test_server_args.go index 2b2edebadfab..6876dec88c26 100644 --- a/pkg/base/test_server_args.go +++ b/pkg/base/test_server_args.go @@ -142,6 +142,11 @@ type TestServerArgs struct { // If set, a TraceDir is initialized at the provided path. TraceDir string + // If set, a RuntimeProfileDir is initialized at the provided path. Runtime + // profiles that are collected by backgroundprofiler.Profiler during the + // execution of the test will be written to this directory. + RuntimeProfileDir string + // DisableSpanConfigs disables the use of the span configs infrastructure // (in favor of the gossiped system config span). It's equivalent to setting // COCKROACH_DISABLE_SPAN_CONFIGS, and is only intended for tests written @@ -353,6 +358,13 @@ type TestTenantArgs struct { // If set, this directory should be cleaned up after the test completes. HeapProfileDirName string + // RuntimeProfileDirName is used to initialize the same named field on the + // SQLServer.BaseConfig field. It is the directory name for runtime profiles + // using backgroundprofiler.Profiler. If empty, no runtime profiles will be + // collected during the test. If set, this directory should be cleaned up + // after the test completes. + RuntimeProfileDirName string + // StartDiagnosticsReporting checks cluster.TelemetryOptOut(), and // if not disabled starts the asynchronous goroutine that checks for // CockroachDB upgrades and periodically reports diagnostics to diff --git a/pkg/cli/log_flags.go b/pkg/cli/log_flags.go index 21e918cfa44d..59f538325f4e 100644 --- a/pkg/cli/log_flags.go +++ b/pkg/cli/log_flags.go @@ -227,6 +227,7 @@ func setupLogging(ctx context.Context, cmd *cobra.Command, isServerCmd, applyCon serverCfg.HeapProfileDirName = filepath.Join(outputDirectory, base.HeapProfileDir) serverCfg.CPUProfileDirName = filepath.Join(outputDirectory, base.CPUProfileDir) serverCfg.InflightTraceDirName = filepath.Join(outputDirectory, base.InflightTraceDir) + serverCfg.RuntimeProfileDirName = filepath.Join(outputDirectory, base.RuntimeProfileDir) return nil } diff --git a/pkg/gen/protobuf.bzl b/pkg/gen/protobuf.bzl index 4aac989b5e90..56f9f5ac75b4 100644 --- a/pkg/gen/protobuf.bzl +++ b/pkg/gen/protobuf.bzl @@ -49,6 +49,7 @@ PROTOBUF_SRCS = [ "//pkg/repstream/streampb:streampb_go_proto", "//pkg/roachpb:roachpb_go_proto", "//pkg/rpc:rpc_go_proto", + "//pkg/server/backgroundprofiler/profiler:profiler_go_proto", "//pkg/server/diagnostics/diagnosticspb:diagnosticspb_go_proto", "//pkg/server/serverpb:serverpb_go_proto", "//pkg/server/status/statuspb:statuspb_go_proto", diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index e901b1ed8eb8..de50130a23ec 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -158,6 +158,7 @@ go_library( "//pkg/security/password", "//pkg/security/securityassets", "//pkg/security/username", + "//pkg/server/backgroundprofiler/profiler", "//pkg/server/debug", "//pkg/server/diagnostics", "//pkg/server/diagnostics/diagnosticspb", diff --git a/pkg/server/backgroundprofiler/BUILD.bazel b/pkg/server/backgroundprofiler/BUILD.bazel new file mode 100644 index 000000000000..36094dce2c3e --- /dev/null +++ b/pkg/server/backgroundprofiler/BUILD.bazel @@ -0,0 +1,20 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "executiontracer", + srcs = ["executiontracer.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/server/executiontracer", + visibility = ["//visibility:public"], + deps = ["//pkg/util/protoutil"], +) + +go_library( + name = "backgroundprofiler", + srcs = ["background_profiler.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler", + visibility = ["//visibility:public"], + deps = ["//pkg/util/protoutil"], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/server/backgroundprofiler/background_profiler.go b/pkg/server/backgroundprofiler/background_profiler.go new file mode 100644 index 000000000000..9302c0c232b9 --- /dev/null +++ b/pkg/server/backgroundprofiler/background_profiler.go @@ -0,0 +1,59 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package backgroundprofiler + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/util/protoutil" +) + +// ProfileID is a unique identifier of the operation being profiled by the +// Profiler. +type ProfileID int + +// SubscriberID is a unique identifier of the Subscriber subscribing to the +// background profile collection. +type SubscriberID int + +// IsSet returns true if the BackgroundProfiler is currently associated with a +// profileID. +func (r ProfileID) IsSet() bool { + return r != 0 +} + +// Subscriber is the interface that describes an object that can subscribe to +// the background profiler. +type Subscriber interface { + // LabelValue returns the value that will be used when setting the pprof + // labels of the Subscriber. The key of the label will always be the ProfileID + // thereby allowing us to identify all samples that describe the operation + // being profiled. + LabelValue() string + // Identifier returns the unique identifier of the Subscriber. + Identifier() SubscriberID + // ProfileID returns the unique identifier of the operation that the + // Subscriber is executing on behalf of. + ProfileID() ProfileID +} + +// Profiler is the interface that exposes methods to subscribe and unsubscribe +// from a background profiler. +type Profiler interface { + // Subscribe registers the subscriber with the background profiler. This + // method returns a context wrapped with pprof labels along with a closure to + // restore the original labels of the context. + Subscribe(ctx context.Context, subscriber Subscriber) (context.Context, func()) + // Unsubscribe unregisters the subscriber from the background profiler. If the + // subscriber is responsible for finishing the profile the method will also + // return metadata describing the collected profile. + Unsubscribe(subscriber Subscriber) (finishedProfile bool, msg protoutil.Message) +} diff --git a/pkg/server/backgroundprofiler/profiler/BUILD.bazel b/pkg/server/backgroundprofiler/profiler/BUILD.bazel new file mode 100644 index 000000000000..e1aa714330c4 --- /dev/null +++ b/pkg/server/backgroundprofiler/profiler/BUILD.bazel @@ -0,0 +1,64 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@rules_proto//proto:defs.bzl", "proto_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") + +proto_library( + name = "profiler_proto", + srcs = ["profiler.proto"], + strip_import_prefix = "/pkg", + visibility = ["//visibility:public"], + deps = ["@com_github_gogo_protobuf//gogoproto:gogo_proto"], +) + +go_proto_library( + name = "profiler_go_proto", + compilers = ["//pkg/cmd/protoc-gen-gogoroach:protoc-gen-gogoroach_compiler"], + importpath = "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler/profiler", + proto = ":profiler_proto", + visibility = ["//visibility:public"], + deps = ["@com_github_gogo_protobuf//gogoproto"], +) + +go_library( + name = "profiler", + srcs = ["profiler.go"], + embed = [":profiler_go_proto"], + importpath = "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler/profiler", + visibility = ["//visibility:public"], + deps = [ + "//pkg/server/backgroundprofiler", + "//pkg/server/dumpstore", + "//pkg/settings", + "//pkg/settings/cluster", + "//pkg/util/log", + "//pkg/util/pprofutil", + "//pkg/util/protoutil", + "//pkg/util/stop", + "//pkg/util/syncutil", + "//pkg/util/timeutil", + "@com_github_cockroachdb_errors//:errors", + "@com_github_google_pprof//profile", + ], +) + +go_test( + name = "profiler_test", + srcs = ["profiler_test.go"], + args = ["-test.timeout=295s"], + deps = [ + ":profiler", + "//pkg/settings/cluster", + "//pkg/testutils", + "//pkg/util/ctxgroup", + "//pkg/util/log", + "//pkg/util/stop", + "//pkg/util/tracing", + "//pkg/util/tracing/tracingpb", + "@com_github_gogo_protobuf//types", + "@com_github_google_pprof//profile", + "@com_github_stretchr_testify//require", + ], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/server/backgroundprofiler/profiler/profiler.go b/pkg/server/backgroundprofiler/profiler/profiler.go new file mode 100644 index 000000000000..978a334081f6 --- /dev/null +++ b/pkg/server/backgroundprofiler/profiler/profiler.go @@ -0,0 +1,342 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package profiler + +import ( + "bytes" + "context" + "fmt" + "os" + "regexp" + "runtime" + "runtime/pprof" + "runtime/trace" + "strings" + + "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler" + "github.com/cockroachdb/cockroach/pkg/server/dumpstore" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/pprofutil" + "github.com/cockroachdb/cockroach/pkg/util/protoutil" + "github.com/cockroachdb/cockroach/pkg/util/stop" + "github.com/cockroachdb/cockroach/pkg/util/syncutil" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/cockroachdb/errors" + "github.com/google/pprof/profile" +) + +var maxCombinedFileSize = settings.RegisterByteSizeSetting( + settings.TenantWritable, + "server.cpu_profiler.total_dump_size_limit", + "maximum combined disk size of preserved runtime traces", + 128<<20, // 128MiB +) + +const timeFormat = "2006-01-02T15_04_05.000" +const CPUProfilerFileNamePrefix = "cpuprofiler." +const RuntimeTraceFileNamePrefix = "runtimetrace." + +// BackgroundProfiler is a background service that runs on every node and is +// capable of collecting on-demand CPU and execution profiles. +// +// The first Subscriber initializes the collection of the CPU and execution +// trace profiles. While the profiles are being collected, only Subscribers with +// the same `profileID` are allowed to join the running profiles. The profiles +// are stopped and persisted to local storage when the last Subscriber +// unsubscribes. +type BackgroundProfiler struct { + nodeID int32 + + st *cluster.Settings + stopper *stop.Stopper + dumpStore *dumpstore.DumpStore + + startProfile chan struct{} + startedProfile chan struct{} + stopProfile chan struct{} + res chan protoutil.Message + mu struct { + syncutil.Mutex + + // profileID is a unique ID that can be associated with the operation that + // we are profiling. Only subscribers that are executing as part of this + // operation are allowed to subscribe to the BackgroundProfiler. + profileID backgroundprofiler.ProfileID + subscribers map[backgroundprofiler.SubscriberID]struct{} + } +} + +// Subscribe implements the BackgroundProfiler interface. +func (r *BackgroundProfiler) Subscribe( + ctx context.Context, subscriber backgroundprofiler.Subscriber, +) (context.Context, func()) { + r.mu.Lock() + defer r.mu.Unlock() + + if r.mu.profileID.IsSet() { + if subscriber.ProfileID() != r.mu.profileID { + // TODO(adityamaru): This error needs to make its way back to the Subscriber. + log.Warningf(ctx, "subscriber with trace ID %d, cannot subscribe to the ongoing trace with ID %d", + subscriber.ProfileID(), r.mu.profileID) + return nil, nil + } + } else { + r.mu.profileID = subscriber.ProfileID() + } + + // Set the profiler labels for the new subscriber. + labelledCtx, undo := pprofutil.SetProfilerLabels(ctx, []string{fmt.Sprintf("%d", subscriber.ProfileID()), + subscriber.LabelValue()}...) + r.mu.subscribers[subscriber.Identifier()] = struct{}{} + + // If this is the first subscriber we should start profiling. + if len(r.mu.subscribers) == 1 { + r.startProfile <- struct{}{} + <-r.startedProfile + } + return labelledCtx, undo +} + +func (r *BackgroundProfiler) reset() { + r.mu.Lock() + defer r.mu.Unlock() + r.mu.subscribers = make(map[backgroundprofiler.SubscriberID]struct{}) + r.mu.profileID = 0 +} + +// Unsubscribe implements the BackgroundProfiler interface. +// +// This method unregisters the Subscriber from the running profile. If this is +// the last Subscriber then this method will stop the running profiles and +// return metadata describing the collected profiles. +func (r *BackgroundProfiler) Unsubscribe( + subscriber backgroundprofiler.Subscriber, +) (finishedProfiling bool, msg protoutil.Message) { + r.mu.Lock() + if _, ok := r.mu.subscribers[subscriber.Identifier()]; !ok { + r.mu.Unlock() + return false, nil + } + + delete(r.mu.subscribers, subscriber.Identifier()) + + // If there are no more subscribers, we should stop collecting runtime traces + // and return the traces we have aggregated so far. + if len(r.mu.subscribers) != 0 { + r.mu.Unlock() + return false, nil + } + + // Don't hold the lock while stopping the profiles. + r.mu.Unlock() + r.stopProfile <- struct{}{} + profileMessage := <-r.res + r.reset() + return true, profileMessage +} + +var _ backgroundprofiler.Profiler = &BackgroundProfiler{} + +// NewBackgroundProfiler returns an instance of a BackgroundProfiler. +func NewBackgroundProfiler( + ctx context.Context, + st *cluster.Settings, + stopper *stop.Stopper, + nodeID int32, + runtimeProfilerDir string, +) *BackgroundProfiler { + if runtimeProfilerDir == "" { + return nil + } + + if err := os.MkdirAll(runtimeProfilerDir, 0755); err != nil { + // This is possible when running with only in-memory stores; in that case + // the start-up code sets the output directory to the current directory (.). + // If running the process from a directory which is not writable, we won't + // be able to create a subdirectory here. + log.Warningf(ctx, "cannot create runtime trace dump dir -- runtime traces will be disabled: %v", err) + return nil + } + + et := &BackgroundProfiler{ + st: st, + stopper: stopper, + dumpStore: dumpstore.NewStore(runtimeProfilerDir, maxCombinedFileSize, st), + nodeID: nodeID, + startProfile: make(chan struct{}), + startedProfile: make(chan struct{}), + stopProfile: make(chan struct{}), + res: make(chan protoutil.Message), + } + et.mu.subscribers = make(map[backgroundprofiler.SubscriberID]struct{}) + et.mu.profileID = 0 + return et +} + +// CompileTagFilter compiles a filter from the passed in tag. This method is a +// stripped down version of `compileTagFilter` in +// https://github.com/google/pprof/blob/main/internal/driver/driver_focus.go#L80. +func CompileTagFilter(tag string) (func(*profile.Sample) bool, error) { + if tag == "" { + return nil, nil + } + + tagValuePair := strings.SplitN(tag, "=", 2) + var wantKey string + if len(tagValuePair) == 2 { + wantKey = tagValuePair[0] + tag = tagValuePair[1] + } + + var rfx []*regexp.Regexp + for _, tagf := range strings.Split(tag, ",") { + fx, err := regexp.Compile(tagf) + if err != nil { + return nil, errors.Wrap(err, "parsing regexp") + } + rfx = append(rfx, fx) + } + if wantKey == "" { + return func(s *profile.Sample) bool { + matchedrx: + for _, rx := range rfx { + for key, vals := range s.Label { + for _, val := range vals { + if rx.MatchString(key + ":" + val) { + continue matchedrx + } + } + } + return false + } + return true + }, nil + } + return func(s *profile.Sample) bool { + if vals, ok := s.Label[wantKey]; ok { + for _, rx := range rfx { + for _, val := range vals { + if rx.MatchString(val) { + return true + } + } + } + } + return false + }, nil +} + +// filterCPUProfileByLabel removes all samples from the profile, except those +// that match the label regular expression. The filtered profile is written to +// the passed in profileBytesBuf. +func filterCPUProfileByLabel(label string, profileBytesBuf *bytes.Buffer) error { + p, err := profile.Parse(profileBytesBuf) + if err != nil { + return err + } + + tm, err := CompileTagFilter(label) + if err != nil { + return err + } + fm, _ := p.FilterSamplesByTag(tm, nil) + if !fm { + return nil + } + + profileBytesBuf.Reset() + return p.Write(profileBytesBuf) +} + +// Start starts the BackgroundProfiler that listens for new Subscribers. +func (r *BackgroundProfiler) Start(ctx context.Context) error { + return r.stopper.RunAsyncTaskEx(ctx, stop.TaskOpts{ + TaskName: "backgroundprofiler", + SpanOpt: stop.ChildSpan, + }, func(ctx context.Context) { + ctxDone := ctx.Done() + for { + select { + case <-r.stopper.ShouldQuiesce(): + return + case <-ctxDone: + return + case <-r.startProfile: + + // Start collecting a CPU profile. + var cpuProfileBuf bytes.Buffer + + // TODO(during review): This is an arbitrarily chosen frequency, the + // runtime default is a 100Hz. This value will probably require tuning + // depending on our use case. Egs: profiling a single query execution vs + // profiling a long running job for a few minutes vs profiling a + // statement fingerprint. + runtime.SetCPUProfileRate(500) + if err := pprof.StartCPUProfile(&cpuProfileBuf); err != nil { + log.Warningf(ctx, "failed to start CPU profile %+v", err) + } + + // Start collecting an execution trace. + // + // Doing this after we start collecting CPU profiles means that our + // execution traces will also have CPU samples as events - + // https://go-review.googlesource.com/c/go/+/400795 + var executionTraceBuf bytes.Buffer + if err := trace.Start(&executionTraceBuf); err != nil { + log.Warningf(ctx, "failed to start execution trace %+v", err) + } + + r.startedProfile <- struct{}{} + + // TODO(during review): We should prevent the trace from growing too big + // and stop the trace every few seconds. Though the visualizer seems to + // chunk large traces into 1 second chunks so maybe we don't need to? + <-r.stopProfile + trace.Stop() + pprof.StopCPUProfile() + + // Filter entries from the CPU profile that are not labelled with our + // profile ID. + r.mu.Lock() + filterKey := fmt.Sprintf("%d", r.mu.profileID) + r.mu.Unlock() + if err := filterCPUProfileByLabel(filterKey, &cpuProfileBuf); err != nil { + log.Warningf(ctx, "failed to filer profile labels") + } + + // Persist the CPU profile + execution trace to local storage. + now := timeutil.Now() + cpuProfileName := CPUProfilerFileNamePrefix + now.Format(timeFormat) + cpuProfilePath := r.dumpStore.GetFullPath(cpuProfileName) + if err := os.WriteFile(cpuProfilePath, cpuProfileBuf.Bytes(), 0644); err != nil { + log.Warningf(ctx, "failed to write profiler to %s", cpuProfilePath) + } + + executionTraceName := RuntimeTraceFileNamePrefix + now.Format(timeFormat) + executionTracePath := r.dumpStore.GetFullPath(executionTraceName) + if err := os.WriteFile(executionTracePath, executionTraceBuf.Bytes(), 0644); err != nil { + log.Warningf(ctx, "failed to write execution trace to %s", executionTracePath) + } + + // TODO(adityamaru): Define GC semantics of the dumper. + + // Return a summary of the profiles collected. + r.res <- &Profile{ + NodeID: r.nodeID, + RuntimeTrace: executionTracePath, + CPUProfile: cpuProfilePath, + } + } + } + }) +} diff --git a/pkg/server/backgroundprofiler/profiler/profiler.proto b/pkg/server/backgroundprofiler/profiler/profiler.proto new file mode 100644 index 000000000000..7d6b21ce3155 --- /dev/null +++ b/pkg/server/backgroundprofiler/profiler/profiler.proto @@ -0,0 +1,20 @@ +// Copyright 2023 The Cockroach Authors. +// +// Licensed as a CockroachDB Enterprise file under the Cockroach Community +// License (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt + +syntax = "proto3"; +package cockroach.server.backgroundprofiler.profiler; +option go_package = "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler/profiler"; + +import "gogoproto/gogo.proto"; + + +message Profile { + int32 node_id = 1 [(gogoproto.customname) = "NodeID"]; + string runtime_trace = 2; + string cpu_profile = 3 [(gogoproto.customname) = "CPUProfile"]; +} diff --git a/pkg/server/backgroundprofiler/profiler/profiler_test.go b/pkg/server/backgroundprofiler/profiler/profiler_test.go new file mode 100644 index 000000000000..58efae0cfec3 --- /dev/null +++ b/pkg/server/backgroundprofiler/profiler/profiler_test.go @@ -0,0 +1,126 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package profiler_test + +import ( + "context" + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler/profiler" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/stop" + "github.com/cockroachdb/cockroach/pkg/util/tracing" + "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb" + "github.com/gogo/protobuf/types" + "github.com/google/pprof/profile" + "github.com/stretchr/testify/require" +) + +func TestBackgroundProfiler(t *testing.T) { + ctx := context.Background() + st := cluster.MakeTestingClusterSettings() + stopper := stop.NewStopper() + defer stopper.Stop(ctx) + dir, _ := testutils.TempDir(t) + //defer cleanup() + + bp := profiler.NewBackgroundProfiler(ctx, st, stopper, 1, dir) + wg := ctxgroup.WithContext(ctx) + wg.GoCtx(func(ctx context.Context) error { + if err := bp.Start(ctx); err != nil { + t.Fatal(err) + } + return nil + }) + defer func() { + if err := wg.Wait(); err != nil { + t.Fatal(err) + } + }() + + tr := tracing.NewTracerWithOpt(context.Background(), + tracing.WithTracingMode(tracing.TracingModeActiveSpansRegistry)) + tr.SetBackgroundProfiler(bp) + + _, sp := tr.StartSpanCtx(ctx, "root", + tracing.WithRecording(tracingpb.RecordingStructured), tracing.WithBackgroundProfiling()) + + done := make(chan int) + wg2 := ctxgroup.WithContext(ctx) + for i := 0; i < runtime.NumCPU(); i++ { + i := i + wg2.GoCtx(func(ctx context.Context) error { + // Start a new "root" span for each goroutine that is part of the same + // trace as sp. + operationName := fmt.Sprintf("op%d", i) + innerSp := tr.StartSpan(operationName, tracing.WithRemoteParentFromSpanMeta(sp.Meta())) + defer innerSp.Finish() + for { + select { + case <-done: + return nil + default: + log.Infof(ctx, "spinning in %d", i) + } + } + }) + } + + <-time.After(time.Second * 10) + close(done) + require.NoError(t, wg2.Wait()) + traceID := sp.TraceID() + rec := sp.FinishAndGetConfiguredRecording() + + // At this point we expect the background profiler to have written the + // profiles (CPU and execution traces) to dir. + dirEntry, err := os.ReadDir(dir) + require.NoError(t, err) + require.Len(t, dirEntry, 2) + + expectedProfileEvent := profiler.Profile{ + NodeID: 1, + } + for _, entry := range dirEntry { + // For CPU profiles we can verify that only samples labelled with our + // traceID are outputted in the profile. + profilePath := filepath.Join(dir, entry.Name()) + if strings.Contains(entry.Name(), profiler.CPUProfilerFileNamePrefix) { + expectedProfileEvent.CPUProfile = profilePath + f, err := os.Open(profilePath) + require.NoError(t, err) + prof, err := profile.Parse(f) + require.NoError(t, err) + ignoreTagFilter, err := profiler.CompileTagFilter(fmt.Sprintf("%d", traceID)) + require.NoError(t, err) + _, ignoreMatch := prof.FilterSamplesByTag(nil, ignoreTagFilter) + require.True(t, ignoreMatch) + require.Empty(t, prof.Sample) + } else { + expectedProfileEvent.RuntimeTrace = profilePath + } + } + + require.Len(t, rec[0].StructuredRecords, 1) + var p profiler.Profile + require.NoError(t, types.UnmarshalAny(rec[0].StructuredRecords[0].Payload, &p)) + require.Equal(t, expectedProfileEvent, p) + log.Infof(ctx, "dir is %s", dir) +} diff --git a/pkg/server/config.go b/pkg/server/config.go index 7d9f7d7a2217..8ace72defd68 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -181,6 +181,9 @@ type BaseConfig struct { // Only used if DisableRuntimeStatsMonitor is false. CPUProfileDirName string + // RuntimeProfileDirName is the directory name for runtime traces. + RuntimeProfileDirName string + // InflightTraceDirName is the directory name for job traces. InflightTraceDirName string diff --git a/pkg/server/server.go b/pkg/server/server.go index 52164de6b440..9cea13dc90d8 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -61,6 +61,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/rpc/nodedialer" "github.com/cockroachdb/cockroach/pkg/security/clientsecopts" "github.com/cockroachdb/cockroach/pkg/security/username" + "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler/profiler" "github.com/cockroachdb/cockroach/pkg/server/debug" "github.com/cockroachdb/cockroach/pkg/server/diagnostics" "github.com/cockroachdb/cockroach/pkg/server/serverpb" @@ -1747,6 +1748,15 @@ func (s *Server) PreStart(ctx context.Context) error { return err } + backgroundProfiler := profiler.NewBackgroundProfiler(workersCtx, s.ClusterSettings(), + s.stopper, int32(state.nodeID), s.cfg.RuntimeProfileDirName) + if backgroundProfiler != nil { + if err := backgroundProfiler.Start(workersCtx); err != nil { + return err + } + s.cfg.Tracer.SetBackgroundProfiler(backgroundProfiler) + } + // Export statistics to graphite, if enabled by configuration. var graphiteOnce sync.Once graphiteEndpoint.SetOnChange(&s.st.SV, func(context.Context) { diff --git a/pkg/server/server_controller_new_server.go b/pkg/server/server_controller_new_server.go index b57a4e37e85b..0bebd2e469aa 100644 --- a/pkg/server/server_controller_new_server.go +++ b/pkg/server/server_controller_new_server.go @@ -301,6 +301,7 @@ func makeSharedProcessTenantServerConfig( baseCfg.GoroutineDumpDirName = "" baseCfg.HeapProfileDirName = "" baseCfg.CPUProfileDirName = "" + baseCfg.RuntimeProfileDirName = "" // Expose the process-wide runtime metrics to the tenant's metric // collector. Since they are process-wide, all tenants can see them. diff --git a/pkg/server/testserver.go b/pkg/server/testserver.go index 6fcbdba9b1ec..32d5556c9c4b 100644 --- a/pkg/server/testserver.go +++ b/pkg/server/testserver.go @@ -164,6 +164,7 @@ func makeTestConfigFromParams(params base.TestServerArgs) Config { cfg.InflightTraceDirName = params.TraceDir } } + cfg.RuntimeProfileDirName = params.RuntimeProfileDir if knobs := params.Knobs.Store; knobs != nil { if mo := knobs.(*kvserver.StoreTestingKnobs).MaxOffset; mo != 0 { cfg.MaxOffset = MaxOffsetType(mo) @@ -280,6 +281,9 @@ func makeTestConfigFromParams(params base.TestServerArgs) Config { if cfg.CPUProfileDirName == "" { cfg.CPUProfileDirName = filepath.Join(storeSpec.Path, "logs", base.CPUProfileDir) } + if cfg.RuntimeProfileDirName == "" { + cfg.RuntimeProfileDirName = filepath.Join(storeSpec.Path, "logs", base.RuntimeProfileDir) + } } } cfg.Stores = base.StoreSpecList{Specs: params.StoreSpecs} @@ -1066,6 +1070,7 @@ func (ts *TestServer) StartTenant( baseCfg.Locality = params.Locality baseCfg.HeapProfileDirName = params.HeapProfileDirName baseCfg.GoroutineDumpDirName = params.GoroutineDumpDirName + baseCfg.RuntimeProfileDirName = params.RuntimeProfileDirName baseCfg.ClusterName = ts.Cfg.ClusterName baseCfg.StartDiagnosticsReporting = params.StartDiagnosticsReporting baseCfg.DisableTLSForHTTP = params.DisableTLSForHTTP diff --git a/pkg/util/tracing/BUILD.bazel b/pkg/util/tracing/BUILD.bazel index 4c9c6f06eaba..14ffffbd7f00 100644 --- a/pkg/util/tracing/BUILD.bazel +++ b/pkg/util/tracing/BUILD.bazel @@ -21,6 +21,7 @@ go_library( importpath = "github.com/cockroachdb/cockroach/pkg/util/tracing", visibility = ["//visibility:public"], deps = [ + "//pkg/server/backgroundprofiler", "//pkg/settings", "//pkg/util", "//pkg/util/buildutil", diff --git a/pkg/util/tracing/crdbspan.go b/pkg/util/tracing/crdbspan.go index 20c27903d1b8..f4330c73d5ba 100644 --- a/pkg/util/tracing/crdbspan.go +++ b/pkg/util/tracing/crdbspan.go @@ -61,6 +61,12 @@ type crdbSpan struct { // whenever a Structured event is recorded by the span and its children. eventListeners []EventListener + // enableBackgroundProfiling is true if the span is part of an operation that + // is being profiled on-demand by the backgroundprofiler.Profiler. If true and + // if the span is a local, root span, then it will be Subscribed to the + // background profiler on creation and unsubscribed on Finish(). + enableBackgroundProfiling bool + // Locking rules: // - If locking both a parent and a child, the parent must be locked first. In // practice, children don't take the parent's lock. @@ -1446,6 +1452,12 @@ func (s *crdbSpan) setGoroutineID(gid int64) { s.mu.goroutineID = uint64(gid) } +func (s *crdbSpan) getGoroutineID() uint64 { + s.mu.Lock() + defer s.mu.Unlock() + return s.mu.goroutineID +} + type atomicRecordingType tracingpb.RecordingType // load returns the recording type. diff --git a/pkg/util/tracing/span.go b/pkg/util/tracing/span.go index 5a71f2e15ed8..69bccdc86fd7 100644 --- a/pkg/util/tracing/span.go +++ b/pkg/util/tracing/span.go @@ -17,6 +17,7 @@ import ( "sync/atomic" "time" + "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler" "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb" "github.com/cockroachdb/logtags" @@ -140,6 +141,32 @@ type Span struct { finishStack string } +var _ backgroundprofiler.Subscriber = &Span{} + +func (sp *Span) LabelValue() string { + if sp.detectUseAfterFinish() { + return "" + } + + return sp.OperationName() +} + +func (sp *Span) Identifier() backgroundprofiler.SubscriberID { + if sp.detectUseAfterFinish() { + return 0 + } + + return backgroundprofiler.SubscriberID(sp.SpanID()) +} + +func (sp *Span) ProfileID() backgroundprofiler.ProfileID { + if sp.detectUseAfterFinish() { + return 0 + } + + return backgroundprofiler.ProfileID(sp.i.TraceID()) +} + // IsNoop returns true if this span is a black hole - it doesn't correspond to a // CRDB span and it doesn't output either to an OpenTelemetry tracer, or to // net.Trace. @@ -243,9 +270,29 @@ func (sp *Span) Redactable() bool { // // Finishing a nil *Span is a noop. func (sp *Span) Finish() { + sp.maybeUnsubscribeFromBackgroundProfiler() sp.finishInternal() } +// maybeUnsubscribeFromBackgroundProfiler unsubscribes the span from the +// backgroundProfiler if the span was subscribed to it on creation. +// If the span is responsible for finishing the profile it also emits a +// StructuredEvent that captures metadata about the collected profiles. +func (sp *Span) maybeUnsubscribeFromBackgroundProfiler() { + if sp == nil || sp.IsNoop() || sp.detectUseAfterFinish() { + return + } + + if sp.i.crdb.enableBackgroundProfiling { + if sp.Tracer().backgroundProfiler != nil { + finishedProfiling, profile := sp.Tracer().backgroundProfiler.Unsubscribe(sp) + if finishedProfiling { + sp.RecordStructured(profile) + } + } + } +} + // finishInternal finishes the span. func (sp *Span) finishInternal() { if sp == nil || sp.IsNoop() || sp.detectUseAfterFinish() { @@ -273,6 +320,7 @@ func (sp *Span) finishInternal() { func (sp *Span) FinishAndGetRecording(recType tracingpb.RecordingType) tracingpb.Recording { rec := tracingpb.Recording(nil) if sp.RecordingType() != tracingpb.RecordingOff { + sp.maybeUnsubscribeFromBackgroundProfiler() rec = sp.i.GetRecording(recType, true /* finishing */) } // Reach directly into sp.i to pass the finishing argument. @@ -290,6 +338,7 @@ func (sp *Span) FinishAndGetConfiguredRecording() tracingpb.Recording { rec := tracingpb.Recording(nil) recType := sp.RecordingType() if recType != tracingpb.RecordingOff { + sp.maybeUnsubscribeFromBackgroundProfiler() // Reach directly into sp.i to pass the finishing argument. rec = sp.i.GetRecording(recType, true /* finishing */) } @@ -876,6 +925,16 @@ type SpanMeta struct { // any info about the span in order to not have a child be created on the // other side. Similarly, ExtractMetaFrom does not deserialize this field. sterile bool + + // enableBackgroundProfiling is set if this span is part of an operation that + // should be profiled by the backgroundprofiler.Profiler. If true and if the + // span is a local, root span, then it will be Subscribed to the background + // profiler on creation and unsubscribed on Finish(). + // + // Note this field is propagated across the wire through a carrier so that + // remote child spans of an operation that is being profiled on the source + // node, start or join profiles being collected on the remote nodes. + enableBackgroundProfiling bool } // Empty returns whether or not the SpanMeta is a zero value. @@ -926,10 +985,11 @@ func SpanMetaFromProto(info tracingpb.TraceInfo) SpanMeta { } sm := SpanMeta{ - traceID: info.TraceID, - spanID: info.ParentSpanID, - otelCtx: otelCtx, - sterile: false, + traceID: info.TraceID, + spanID: info.ParentSpanID, + otelCtx: otelCtx, + sterile: false, + enableBackgroundProfiling: info.EnableBackgroundProfiling, } switch info.RecordingMode { case tracingpb.RecordingMode_OFF: diff --git a/pkg/util/tracing/span_inner.go b/pkg/util/tracing/span_inner.go index d5714d27dc29..3be819f04ee2 100644 --- a/pkg/util/tracing/span_inner.go +++ b/pkg/util/tracing/span_inner.go @@ -174,12 +174,13 @@ func (s *spanInner) Meta() SpanMeta { var traceID tracingpb.TraceID var spanID tracingpb.SpanID var recordingType tracingpb.RecordingType - var sterile bool + var sterile, enableBackgroundProfiling bool if s.crdb != nil { traceID, spanID = s.crdb.traceID, s.crdb.spanID recordingType = s.crdb.mu.recording.recordingType.load() sterile = s.isSterile() + enableBackgroundProfiling = s.crdb.enableBackgroundProfiling } var otelCtx oteltrace.SpanContext @@ -191,15 +192,17 @@ func (s *spanInner) Meta() SpanMeta { spanID == 0 && !otelCtx.TraceID().IsValid() && recordingType == 0 && - !sterile { + !sterile && + !enableBackgroundProfiling { return SpanMeta{} } return SpanMeta{ - traceID: traceID, - spanID: spanID, - otelCtx: otelCtx, - recordingType: recordingType, - sterile: sterile, + traceID: traceID, + spanID: spanID, + otelCtx: otelCtx, + recordingType: recordingType, + sterile: sterile, + enableBackgroundProfiling: enableBackgroundProfiling, } } diff --git a/pkg/util/tracing/span_options.go b/pkg/util/tracing/span_options.go index 0b7f970cadcd..692705d0a7fb 100644 --- a/pkg/util/tracing/span_options.go +++ b/pkg/util/tracing/span_options.go @@ -76,6 +76,7 @@ type spanOptions struct { SpanKind oteltrace.SpanKind // see WithSpanKind Sterile bool // see WithSterile EventListeners []EventListener // see WithEventListeners + EnableBackgroundProfiling bool // see WithBackgroundProfiling // recordingTypeExplicit is set if the WithRecording() option was used. In // that case, spanOptions.recordingType() returns recordingTypeOpt below. If @@ -93,6 +94,22 @@ func (opts *spanOptions) parentTraceID() tracingpb.TraceID { return 0 } +// enableBackgroundProfiling returns true if the span was created +// WithBackgroundProfiling() or the span has a remote/local parent that has +// background profiling enabled. +func (opts *spanOptions) enableBackgroundProfiling() bool { + enableBackgroundProfiling := opts.EnableBackgroundProfiling + if !opts.Parent.empty() && !opts.Parent.IsNoop() { + enableBackgroundProfiling = enableBackgroundProfiling || opts.Parent.i.crdb.enableBackgroundProfiling + } + + if !opts.RemoteParent.Empty() { + enableBackgroundProfiling = enableBackgroundProfiling || opts.RemoteParent.enableBackgroundProfiling + } + + return enableBackgroundProfiling +} + func (opts *spanOptions) parentSpanID() tracingpb.SpanID { if !opts.Parent.empty() && !opts.Parent.IsNoop() { return opts.Parent.i.crdb.spanID @@ -466,6 +483,27 @@ func (w withSterileOption) apply(opts spanOptions) spanOptions { return opts } +type backgroundProfilingOption struct{} + +var _ SpanOption = backgroundProfilingOption{} + +func (et backgroundProfilingOption) apply(opts spanOptions) spanOptions { + // Applying an EventListener span option implies the span has at least + // `RecordingStructured` recording type. If the span explicitly specifies a + // `RecordingVerbose` recording type via the `WithRecording(...)` option, that + // will be respected instead. + if !opts.recordingTypeExplicit { + opts.recordingTypeExplicit = true + opts.recordingTypeOpt = tracingpb.RecordingStructured + } + opts.EnableBackgroundProfiling = true + return opts +} + +func WithBackgroundProfiling() SpanOption { + return backgroundProfilingOption{} +} + type eventListenersOption []EventListener var _ SpanOption = eventListenersOption{} diff --git a/pkg/util/tracing/tracer.go b/pkg/util/tracing/tracer.go index 330a6ec5575d..c2d586c573b8 100644 --- a/pkg/util/tracing/tracer.go +++ b/pkg/util/tracing/tracer.go @@ -22,6 +22,7 @@ import ( "time" "unsafe" + "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler" "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/buildutil" @@ -91,7 +92,8 @@ const ( fieldNameTraceID = prefixTracerState + "traceid" fieldNameSpanID = prefixTracerState + "spanid" // fieldNameRecordingType will contain the desired type of trace recording. - fieldNameRecordingType = "rec" + fieldNameRecordingType = "rec" + fieldNameEnableBackgroundProfiling = "backgroundprofile" // fieldNameOtel{TraceID,SpanID} will contain the OpenTelemetry span info, hex // encoded. @@ -217,7 +219,7 @@ type TracingMode int const ( // TracingModeFromEnv configures tracing according to enableTracingByDefault. TracingModeFromEnv TracingMode = iota - // TracingModeOnDemand means that Spans will no be created unless there's a + // TracingModeOnDemand means that Spans will not be created unless there's a // particular reason to create them (i.e. a span being created with // WithForceRealSpan(), a net.Trace or OpenTelemetry tracers attached). TracingModeOnDemand @@ -294,6 +296,11 @@ type Tracer struct { // them at the network boundary from KV. _redactable int32 // accessed atomically + // backgroundProfiler is a handle to the profiler service that can be + // subscribed to/unsubscribed from by spans that have been instructed to + // collect runtime profiles. + backgroundProfiler backgroundprofiler.Profiler + // Pointer to an OpenTelemetry tracer used as a "shadow tracer", if any. If // not nil, the respective *otel.Tracer will be used to create mirror spans // for all spans that the parent Tracer creates. @@ -384,7 +391,7 @@ type SpanRegistry struct { syncutil.Mutex // m stores all the currently open spans. Note that a span being present // here proves that the corresponding Span.Finish() call has not yet - // completed (but crdbSpan.Finish() might have finished), therefor the span + // completed (but crdbSpan.Finish() might have finished), therefore the span // cannot be reused while present in the registry. At the same time, note // that Span.Finish() can be called on these spans so, when using one of // these spans, we need to be prepared for that use to be concurrent with @@ -585,6 +592,12 @@ func (t *Tracer) ActiveSpansRegistryEnabled() bool { return atomic.LoadInt32(&t._activeSpansRegistryEnabled) != 0 } +// SetBackgroundProfiler sets a handle on the Tracer to the BackgroundProfiler +// service that can be used to trigger on-demand profiles. +func (t *Tracer) SetBackgroundProfiler(profiler backgroundprofiler.Profiler) { + t.backgroundProfiler = profiler +} + // NewTracer creates a Tracer with default options. // // See NewTracerWithOpt() for controlling various configuration options. @@ -1203,6 +1216,7 @@ child operation: %s, tracer created at: s.i.crdb.SetRecordingType(opts.recordingType()) s.i.crdb.parentSpanID = opts.parentSpanID() + s.i.crdb.enableBackgroundProfiling = opts.enableBackgroundProfiling() var localRoot bool { @@ -1251,6 +1265,28 @@ child operation: %s, tracer created at: // spans. Span.Finish will take care of removing it. if localRoot { t.activeSpansRegistry.addSpan(s.i.crdb) + + // TODO(during review): Do we want to tie this to Verbose recording or have + // its own flag. An advantage of making it an independent flag is that we do + // not have to enable the verbose collection of traces to trigger background + // profile collection. + // + // If the span is a local root and has background profiling enabled, we + // subscribe to the background profiler. Subscribing involves joining a + // running profile or starting a new profile if this is the first root span + // of the operation that is being profiled. + if s.i.crdb.enableBackgroundProfiling { + if t.backgroundProfiler != nil { + // We want the profiler labels to remain set for the lifetime of the + // tracing span. Given that we Unsubscribe only when the span is + // Finish()ing, i.e. when the goroutine is returning, we do not need to + // undo the profiler labels set by the backgroundProfiler. + labelledCtx, _ := t.backgroundProfiler.Subscribe(ctx, s) + if labelledCtx != nil { + ctx = labelledCtx + } + } + } } return maybeWrapCtx(ctx, s) @@ -1309,6 +1345,9 @@ func (t *Tracer) InjectMetaInto(sm SpanMeta, carrier Carrier) { carrier.Set(fieldNameTraceID, strconv.FormatUint(uint64(sm.traceID), 16)) carrier.Set(fieldNameSpanID, strconv.FormatUint(uint64(sm.spanID), 16)) carrier.Set(fieldNameRecordingType, sm.recordingType.ToCarrierValue()) + if sm.enableBackgroundProfiling { + carrier.Set(fieldNameEnableBackgroundProfiling, "") + } } var noopSpanMeta = SpanMeta{} @@ -1323,6 +1362,7 @@ func (t *Tracer) ExtractMetaFrom(carrier Carrier) (SpanMeta, error) { var otelSpanID oteltrace.SpanID var recordingTypeExplicit bool var recordingType tracingpb.RecordingType + var enableBackgroundProfiling bool iterFn := func(k, v string) error { switch k = strings.ToLower(k); k { @@ -1355,6 +1395,8 @@ func (t *Tracer) ExtractMetaFrom(carrier Carrier) (SpanMeta, error) { case fieldNameRecordingType: recordingTypeExplicit = true recordingType = tracingpb.RecordingTypeFromCarrierValue(v) + case fieldNameEnableBackgroundProfiling: + enableBackgroundProfiling = true } return nil } @@ -1398,7 +1440,8 @@ func (t *Tracer) ExtractMetaFrom(carrier Carrier) (SpanMeta, error) { // The sterile field doesn't make it across the wire. The simple fact that // there was any tracing info in the carrier means that the parent span was // not sterile. - sterile: false, + sterile: false, + enableBackgroundProfiling: enableBackgroundProfiling, }, nil } diff --git a/pkg/util/tracing/tracer_test.go b/pkg/util/tracing/tracer_test.go index 49234df1e5bc..048e71492457 100644 --- a/pkg/util/tracing/tracer_test.go +++ b/pkg/util/tracing/tracer_test.go @@ -300,7 +300,7 @@ func TestTracerInjectExtract(t *testing.T) { // Verify that verbose tracing is propagated and triggers verbosity on the // remote side. - s1 := tr.StartSpan("a", WithRecording(tracingpb.RecordingVerbose)) + s1 := tr.StartSpan("a", WithRecording(tracingpb.RecordingVerbose), WithBackgroundProfiling()) carrier := MetadataCarrier{metadata.MD{}} tr.InjectMetaInto(s1.Meta(), carrier) @@ -317,6 +317,7 @@ func TestTracerInjectExtract(t *testing.T) { if trace1 != trace2 { t.Errorf("traceID doesn't match: parent %d child %d", trace1, trace2) } + require.Equal(t, s1.Meta().enableBackgroundProfiling, s2.Meta().enableBackgroundProfiling) s2.Recordf("x=%d", 1) // Verify that recording was started automatically. diff --git a/pkg/util/tracing/tracingpb/tracing.proto b/pkg/util/tracing/tracingpb/tracing.proto index a5e4413fa5e3..c542f649aa91 100644 --- a/pkg/util/tracing/tracingpb/tracing.proto +++ b/pkg/util/tracing/tracingpb/tracing.proto @@ -43,5 +43,9 @@ message TraceInfo { } OtelInfo otel = 4; + + // EnableBackgroundProfiling is true when the operation is expected to collect + // profiles using the backgroundprofiler.Profiler. + bool enable_background_profiling = 5; }