From b12cbd1b41d395f1d3f31a6268ca4f3489aa336e Mon Sep 17 00:00:00 2001 From: adityamaru Date: Tue, 7 Mar 2023 17:35:34 -0500 Subject: [PATCH] server,tracing: integrate on-demand profiling with CRDB tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change introduces a BackgroundProfiler service that is started during server startup on each node in the cluster. The BackgroundProfiler is responsible for collecting on-demand CPU profiles and runtime traces for a particular operation. The profiler can be subscribed to by an in-process listener. The first Subscriber initializes the collection of the CPU and execution trace profiles. While the profiles are being collected, only Subscribers carrying the same `profileID` are allowed to join the running profiles. The profiles are stopped and persisted to local storage when the last Subscriber unsubscribes. The `profileID` is a unique identifier of the operation that is being traced. Since only one profile can be running in a process at a time, any Subscribers with different `profileID`s than the current one will be rejected. The in-process listeners described above will be CRDBs internal tracing spans. This change introduces a `WithBackgroudProfiling` option that can be used to instruct a tracing span to subscribe to the BackgroundProfiler. This option is propogated to all local and remote child spans created as part of the trace. Only local, root spans that have background profiling enabled will Subscribe to the profiler on creation. As mentioned above only one operation can be profiled at a time. We use the first root span's `TraceID` as the BackgroundProfiler's `profileID`. All subsequent root span's that are part of the same trace will be able to join the running profile. Tracing span's unsubscribe from the profile on Finish(). Every Susbcriber is returned a wrapped ctx with pprof labels that tie its execution to the profile being collected by the BackgroundProfiler. These labels are used to post-process the collected CPU profile and filter out samples that only correspond to our subscribers. The end result is filtered CPU profile prefixed `cpuprofiler.` and a process wide execution trace `runtimetrace.` persisted to local storage. This change only introduces the infrastructure to enable on-demand profiling. The test in `profiler_test.go` results in a CPU profile with information about each labelled root operation collected on-demand: ❯ go tool pprof cpuprofiler.2023-03-08T14_51_52.402 Type: cpu Time: Mar 8, 2023 at 9:51am (EST) Duration: 10.11s, Total samples = 8.57s (84.77%) Entering interactive mode (type "help" for commands, "o" for options) (pprof) tags 9171346462634118014: Total 8.6s 906.0ms (10.57%): op2 902.0ms (10.53%): op1 890.0ms (10.39%): op0 886.0ms (10.34%): op7 866.0ms (10.11%): op4 866.0ms (10.11%): op5 854.0ms ( 9.96%): op3 806.0ms ( 9.40%): op8 804.0ms ( 9.38%): op6 790.0ms ( 9.22%): op9 Execution traces do not surface pprof labels in golang yet but a future patch could consider cherry-picking https://go-review.googlesource.com/c/go/+/446975. This allows the user to focus on goroutines run with the specified pprof labels: With this framework in place one could envision the following use cases: - stmt diagnostics requests get a new option to request profiling. When requested, any local root trace span (i.e. while any part of the trace is active on a given node) subscribes to profiles, and references to the profiles collected are stored as payloads in the span. They're then included in the stmt bundle. - even outside of diagnostics, could mark traces as wanting to capture debug info for "slow spans". Such spans on creation could set a timer that, once it fires, subscribes to (short) execution traces periodically as a way to snapshot the goroutine's actions. These could be referenced in the span for later retrieval. Informs: #97215 --- pkg/BUILD.bazel | 7 + pkg/base/constants.go | 4 + pkg/base/test_server_args.go | 12 + pkg/cli/log_flags.go | 1 + pkg/gen/protobuf.bzl | 1 + pkg/server/BUILD.bazel | 1 + pkg/server/backgroundprofiler/BUILD.bazel | 20 + .../backgroundprofiler/background_profiler.go | 59 +++ .../backgroundprofiler/profiler/BUILD.bazel | 64 ++++ .../backgroundprofiler/profiler/profiler.go | 342 ++++++++++++++++++ .../profiler/profiler.proto | 20 + .../profiler/profiler_test.go | 126 +++++++ pkg/server/config.go | 3 + pkg/server/server.go | 10 + pkg/server/server_controller_new_server.go | 1 + pkg/server/testserver.go | 5 + pkg/util/tracing/BUILD.bazel | 1 + pkg/util/tracing/crdbspan.go | 12 + pkg/util/tracing/span.go | 68 +++- pkg/util/tracing/span_inner.go | 17 +- pkg/util/tracing/span_options.go | 38 ++ pkg/util/tracing/tracer.go | 51 ++- pkg/util/tracing/tracer_test.go | 3 +- pkg/util/tracing/tracingpb/tracing.proto | 4 + 24 files changed, 854 insertions(+), 16 deletions(-) create mode 100644 pkg/server/backgroundprofiler/BUILD.bazel create mode 100644 pkg/server/backgroundprofiler/background_profiler.go create mode 100644 pkg/server/backgroundprofiler/profiler/BUILD.bazel create mode 100644 pkg/server/backgroundprofiler/profiler/profiler.go create mode 100644 pkg/server/backgroundprofiler/profiler/profiler.proto create mode 100644 pkg/server/backgroundprofiler/profiler/profiler_test.go diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 78f4a514ebec..452174afcc17 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -269,6 +269,7 @@ ALL_TESTS = [ "//pkg/security/username:username_disallowed_imports_test", "//pkg/security/username:username_test", "//pkg/security:security_test", + "//pkg/server/backgroundprofiler/profiler:profiler_test", "//pkg/server/debug/goroutineui:goroutineui_test", "//pkg/server/debug/pprofui:pprofui_test", "//pkg/server/debug:debug_test", @@ -1413,6 +1414,10 @@ GO_TARGETS = [ "//pkg/security/username:username_test", "//pkg/security:security", "//pkg/security:security_test", + "//pkg/server/backgroundprofiler/profiler:profiler", + "//pkg/server/backgroundprofiler/profiler:profiler_test", + "//pkg/server/backgroundprofiler:backgroundprofiler", + "//pkg/server/backgroundprofiler:executiontracer", "//pkg/server/debug/goroutineui:goroutineui", "//pkg/server/debug/goroutineui:goroutineui_test", "//pkg/server/debug/pprofui:pprofui", @@ -2783,6 +2788,8 @@ GET_X_DATA_TARGETS = [ "//pkg/security/sessionrevival:get_x_data", "//pkg/security/username:get_x_data", "//pkg/server:get_x_data", + "//pkg/server/backgroundprofiler:get_x_data", + "//pkg/server/backgroundprofiler/profiler:get_x_data", "//pkg/server/debug:get_x_data", "//pkg/server/debug/goroutineui:get_x_data", "//pkg/server/debug/pprofui:get_x_data", diff --git a/pkg/base/constants.go b/pkg/base/constants.go index e25f6f9d01f1..924ac08d88fa 100644 --- a/pkg/base/constants.go +++ b/pkg/base/constants.go @@ -46,6 +46,10 @@ const ( // stores profiles when the periodic CPU profile dump is enabled. CPUProfileDir = "pprof_dump" + // RuntimeProfileDir is the directory name where the + // backgroundprofiler.Profiler stores profiles. + RuntimeProfileDir = "runtime_profiler" + // InflightTraceDir is the directory name where the job trace dumper stores traces // when a job opts in to dumping its execution traces. InflightTraceDir = "inflight_trace_dump" diff --git a/pkg/base/test_server_args.go b/pkg/base/test_server_args.go index 2b2edebadfab..6876dec88c26 100644 --- a/pkg/base/test_server_args.go +++ b/pkg/base/test_server_args.go @@ -142,6 +142,11 @@ type TestServerArgs struct { // If set, a TraceDir is initialized at the provided path. TraceDir string + // If set, a RuntimeProfileDir is initialized at the provided path. Runtime + // profiles that are collected by backgroundprofiler.Profiler during the + // execution of the test will be written to this directory. + RuntimeProfileDir string + // DisableSpanConfigs disables the use of the span configs infrastructure // (in favor of the gossiped system config span). It's equivalent to setting // COCKROACH_DISABLE_SPAN_CONFIGS, and is only intended for tests written @@ -353,6 +358,13 @@ type TestTenantArgs struct { // If set, this directory should be cleaned up after the test completes. HeapProfileDirName string + // RuntimeProfileDirName is used to initialize the same named field on the + // SQLServer.BaseConfig field. It is the directory name for runtime profiles + // using backgroundprofiler.Profiler. If empty, no runtime profiles will be + // collected during the test. If set, this directory should be cleaned up + // after the test completes. + RuntimeProfileDirName string + // StartDiagnosticsReporting checks cluster.TelemetryOptOut(), and // if not disabled starts the asynchronous goroutine that checks for // CockroachDB upgrades and periodically reports diagnostics to diff --git a/pkg/cli/log_flags.go b/pkg/cli/log_flags.go index 21e918cfa44d..59f538325f4e 100644 --- a/pkg/cli/log_flags.go +++ b/pkg/cli/log_flags.go @@ -227,6 +227,7 @@ func setupLogging(ctx context.Context, cmd *cobra.Command, isServerCmd, applyCon serverCfg.HeapProfileDirName = filepath.Join(outputDirectory, base.HeapProfileDir) serverCfg.CPUProfileDirName = filepath.Join(outputDirectory, base.CPUProfileDir) serverCfg.InflightTraceDirName = filepath.Join(outputDirectory, base.InflightTraceDir) + serverCfg.RuntimeProfileDirName = filepath.Join(outputDirectory, base.RuntimeProfileDir) return nil } diff --git a/pkg/gen/protobuf.bzl b/pkg/gen/protobuf.bzl index 4aac989b5e90..56f9f5ac75b4 100644 --- a/pkg/gen/protobuf.bzl +++ b/pkg/gen/protobuf.bzl @@ -49,6 +49,7 @@ PROTOBUF_SRCS = [ "//pkg/repstream/streampb:streampb_go_proto", "//pkg/roachpb:roachpb_go_proto", "//pkg/rpc:rpc_go_proto", + "//pkg/server/backgroundprofiler/profiler:profiler_go_proto", "//pkg/server/diagnostics/diagnosticspb:diagnosticspb_go_proto", "//pkg/server/serverpb:serverpb_go_proto", "//pkg/server/status/statuspb:statuspb_go_proto", diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index e901b1ed8eb8..de50130a23ec 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -158,6 +158,7 @@ go_library( "//pkg/security/password", "//pkg/security/securityassets", "//pkg/security/username", + "//pkg/server/backgroundprofiler/profiler", "//pkg/server/debug", "//pkg/server/diagnostics", "//pkg/server/diagnostics/diagnosticspb", diff --git a/pkg/server/backgroundprofiler/BUILD.bazel b/pkg/server/backgroundprofiler/BUILD.bazel new file mode 100644 index 000000000000..36094dce2c3e --- /dev/null +++ b/pkg/server/backgroundprofiler/BUILD.bazel @@ -0,0 +1,20 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "executiontracer", + srcs = ["executiontracer.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/server/executiontracer", + visibility = ["//visibility:public"], + deps = ["//pkg/util/protoutil"], +) + +go_library( + name = "backgroundprofiler", + srcs = ["background_profiler.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler", + visibility = ["//visibility:public"], + deps = ["//pkg/util/protoutil"], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/server/backgroundprofiler/background_profiler.go b/pkg/server/backgroundprofiler/background_profiler.go new file mode 100644 index 000000000000..9302c0c232b9 --- /dev/null +++ b/pkg/server/backgroundprofiler/background_profiler.go @@ -0,0 +1,59 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package backgroundprofiler + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/util/protoutil" +) + +// ProfileID is a unique identifier of the operation being profiled by the +// Profiler. +type ProfileID int + +// SubscriberID is a unique identifier of the Subscriber subscribing to the +// background profile collection. +type SubscriberID int + +// IsSet returns true if the BackgroundProfiler is currently associated with a +// profileID. +func (r ProfileID) IsSet() bool { + return r != 0 +} + +// Subscriber is the interface that describes an object that can subscribe to +// the background profiler. +type Subscriber interface { + // LabelValue returns the value that will be used when setting the pprof + // labels of the Subscriber. The key of the label will always be the ProfileID + // thereby allowing us to identify all samples that describe the operation + // being profiled. + LabelValue() string + // Identifier returns the unique identifier of the Subscriber. + Identifier() SubscriberID + // ProfileID returns the unique identifier of the operation that the + // Subscriber is executing on behalf of. + ProfileID() ProfileID +} + +// Profiler is the interface that exposes methods to subscribe and unsubscribe +// from a background profiler. +type Profiler interface { + // Subscribe registers the subscriber with the background profiler. This + // method returns a context wrapped with pprof labels along with a closure to + // restore the original labels of the context. + Subscribe(ctx context.Context, subscriber Subscriber) (context.Context, func()) + // Unsubscribe unregisters the subscriber from the background profiler. If the + // subscriber is responsible for finishing the profile the method will also + // return metadata describing the collected profile. + Unsubscribe(subscriber Subscriber) (finishedProfile bool, msg protoutil.Message) +} diff --git a/pkg/server/backgroundprofiler/profiler/BUILD.bazel b/pkg/server/backgroundprofiler/profiler/BUILD.bazel new file mode 100644 index 000000000000..e1aa714330c4 --- /dev/null +++ b/pkg/server/backgroundprofiler/profiler/BUILD.bazel @@ -0,0 +1,64 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@rules_proto//proto:defs.bzl", "proto_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") + +proto_library( + name = "profiler_proto", + srcs = ["profiler.proto"], + strip_import_prefix = "/pkg", + visibility = ["//visibility:public"], + deps = ["@com_github_gogo_protobuf//gogoproto:gogo_proto"], +) + +go_proto_library( + name = "profiler_go_proto", + compilers = ["//pkg/cmd/protoc-gen-gogoroach:protoc-gen-gogoroach_compiler"], + importpath = "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler/profiler", + proto = ":profiler_proto", + visibility = ["//visibility:public"], + deps = ["@com_github_gogo_protobuf//gogoproto"], +) + +go_library( + name = "profiler", + srcs = ["profiler.go"], + embed = [":profiler_go_proto"], + importpath = "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler/profiler", + visibility = ["//visibility:public"], + deps = [ + "//pkg/server/backgroundprofiler", + "//pkg/server/dumpstore", + "//pkg/settings", + "//pkg/settings/cluster", + "//pkg/util/log", + "//pkg/util/pprofutil", + "//pkg/util/protoutil", + "//pkg/util/stop", + "//pkg/util/syncutil", + "//pkg/util/timeutil", + "@com_github_cockroachdb_errors//:errors", + "@com_github_google_pprof//profile", + ], +) + +go_test( + name = "profiler_test", + srcs = ["profiler_test.go"], + args = ["-test.timeout=295s"], + deps = [ + ":profiler", + "//pkg/settings/cluster", + "//pkg/testutils", + "//pkg/util/ctxgroup", + "//pkg/util/log", + "//pkg/util/stop", + "//pkg/util/tracing", + "//pkg/util/tracing/tracingpb", + "@com_github_gogo_protobuf//types", + "@com_github_google_pprof//profile", + "@com_github_stretchr_testify//require", + ], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/server/backgroundprofiler/profiler/profiler.go b/pkg/server/backgroundprofiler/profiler/profiler.go new file mode 100644 index 000000000000..978a334081f6 --- /dev/null +++ b/pkg/server/backgroundprofiler/profiler/profiler.go @@ -0,0 +1,342 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package profiler + +import ( + "bytes" + "context" + "fmt" + "os" + "regexp" + "runtime" + "runtime/pprof" + "runtime/trace" + "strings" + + "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler" + "github.com/cockroachdb/cockroach/pkg/server/dumpstore" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/pprofutil" + "github.com/cockroachdb/cockroach/pkg/util/protoutil" + "github.com/cockroachdb/cockroach/pkg/util/stop" + "github.com/cockroachdb/cockroach/pkg/util/syncutil" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/cockroachdb/errors" + "github.com/google/pprof/profile" +) + +var maxCombinedFileSize = settings.RegisterByteSizeSetting( + settings.TenantWritable, + "server.cpu_profiler.total_dump_size_limit", + "maximum combined disk size of preserved runtime traces", + 128<<20, // 128MiB +) + +const timeFormat = "2006-01-02T15_04_05.000" +const CPUProfilerFileNamePrefix = "cpuprofiler." +const RuntimeTraceFileNamePrefix = "runtimetrace." + +// BackgroundProfiler is a background service that runs on every node and is +// capable of collecting on-demand CPU and execution profiles. +// +// The first Subscriber initializes the collection of the CPU and execution +// trace profiles. While the profiles are being collected, only Subscribers with +// the same `profileID` are allowed to join the running profiles. The profiles +// are stopped and persisted to local storage when the last Subscriber +// unsubscribes. +type BackgroundProfiler struct { + nodeID int32 + + st *cluster.Settings + stopper *stop.Stopper + dumpStore *dumpstore.DumpStore + + startProfile chan struct{} + startedProfile chan struct{} + stopProfile chan struct{} + res chan protoutil.Message + mu struct { + syncutil.Mutex + + // profileID is a unique ID that can be associated with the operation that + // we are profiling. Only subscribers that are executing as part of this + // operation are allowed to subscribe to the BackgroundProfiler. + profileID backgroundprofiler.ProfileID + subscribers map[backgroundprofiler.SubscriberID]struct{} + } +} + +// Subscribe implements the BackgroundProfiler interface. +func (r *BackgroundProfiler) Subscribe( + ctx context.Context, subscriber backgroundprofiler.Subscriber, +) (context.Context, func()) { + r.mu.Lock() + defer r.mu.Unlock() + + if r.mu.profileID.IsSet() { + if subscriber.ProfileID() != r.mu.profileID { + // TODO(adityamaru): This error needs to make its way back to the Subscriber. + log.Warningf(ctx, "subscriber with trace ID %d, cannot subscribe to the ongoing trace with ID %d", + subscriber.ProfileID(), r.mu.profileID) + return nil, nil + } + } else { + r.mu.profileID = subscriber.ProfileID() + } + + // Set the profiler labels for the new subscriber. + labelledCtx, undo := pprofutil.SetProfilerLabels(ctx, []string{fmt.Sprintf("%d", subscriber.ProfileID()), + subscriber.LabelValue()}...) + r.mu.subscribers[subscriber.Identifier()] = struct{}{} + + // If this is the first subscriber we should start profiling. + if len(r.mu.subscribers) == 1 { + r.startProfile <- struct{}{} + <-r.startedProfile + } + return labelledCtx, undo +} + +func (r *BackgroundProfiler) reset() { + r.mu.Lock() + defer r.mu.Unlock() + r.mu.subscribers = make(map[backgroundprofiler.SubscriberID]struct{}) + r.mu.profileID = 0 +} + +// Unsubscribe implements the BackgroundProfiler interface. +// +// This method unregisters the Subscriber from the running profile. If this is +// the last Subscriber then this method will stop the running profiles and +// return metadata describing the collected profiles. +func (r *BackgroundProfiler) Unsubscribe( + subscriber backgroundprofiler.Subscriber, +) (finishedProfiling bool, msg protoutil.Message) { + r.mu.Lock() + if _, ok := r.mu.subscribers[subscriber.Identifier()]; !ok { + r.mu.Unlock() + return false, nil + } + + delete(r.mu.subscribers, subscriber.Identifier()) + + // If there are no more subscribers, we should stop collecting runtime traces + // and return the traces we have aggregated so far. + if len(r.mu.subscribers) != 0 { + r.mu.Unlock() + return false, nil + } + + // Don't hold the lock while stopping the profiles. + r.mu.Unlock() + r.stopProfile <- struct{}{} + profileMessage := <-r.res + r.reset() + return true, profileMessage +} + +var _ backgroundprofiler.Profiler = &BackgroundProfiler{} + +// NewBackgroundProfiler returns an instance of a BackgroundProfiler. +func NewBackgroundProfiler( + ctx context.Context, + st *cluster.Settings, + stopper *stop.Stopper, + nodeID int32, + runtimeProfilerDir string, +) *BackgroundProfiler { + if runtimeProfilerDir == "" { + return nil + } + + if err := os.MkdirAll(runtimeProfilerDir, 0755); err != nil { + // This is possible when running with only in-memory stores; in that case + // the start-up code sets the output directory to the current directory (.). + // If running the process from a directory which is not writable, we won't + // be able to create a subdirectory here. + log.Warningf(ctx, "cannot create runtime trace dump dir -- runtime traces will be disabled: %v", err) + return nil + } + + et := &BackgroundProfiler{ + st: st, + stopper: stopper, + dumpStore: dumpstore.NewStore(runtimeProfilerDir, maxCombinedFileSize, st), + nodeID: nodeID, + startProfile: make(chan struct{}), + startedProfile: make(chan struct{}), + stopProfile: make(chan struct{}), + res: make(chan protoutil.Message), + } + et.mu.subscribers = make(map[backgroundprofiler.SubscriberID]struct{}) + et.mu.profileID = 0 + return et +} + +// CompileTagFilter compiles a filter from the passed in tag. This method is a +// stripped down version of `compileTagFilter` in +// https://github.com/google/pprof/blob/main/internal/driver/driver_focus.go#L80. +func CompileTagFilter(tag string) (func(*profile.Sample) bool, error) { + if tag == "" { + return nil, nil + } + + tagValuePair := strings.SplitN(tag, "=", 2) + var wantKey string + if len(tagValuePair) == 2 { + wantKey = tagValuePair[0] + tag = tagValuePair[1] + } + + var rfx []*regexp.Regexp + for _, tagf := range strings.Split(tag, ",") { + fx, err := regexp.Compile(tagf) + if err != nil { + return nil, errors.Wrap(err, "parsing regexp") + } + rfx = append(rfx, fx) + } + if wantKey == "" { + return func(s *profile.Sample) bool { + matchedrx: + for _, rx := range rfx { + for key, vals := range s.Label { + for _, val := range vals { + if rx.MatchString(key + ":" + val) { + continue matchedrx + } + } + } + return false + } + return true + }, nil + } + return func(s *profile.Sample) bool { + if vals, ok := s.Label[wantKey]; ok { + for _, rx := range rfx { + for _, val := range vals { + if rx.MatchString(val) { + return true + } + } + } + } + return false + }, nil +} + +// filterCPUProfileByLabel removes all samples from the profile, except those +// that match the label regular expression. The filtered profile is written to +// the passed in profileBytesBuf. +func filterCPUProfileByLabel(label string, profileBytesBuf *bytes.Buffer) error { + p, err := profile.Parse(profileBytesBuf) + if err != nil { + return err + } + + tm, err := CompileTagFilter(label) + if err != nil { + return err + } + fm, _ := p.FilterSamplesByTag(tm, nil) + if !fm { + return nil + } + + profileBytesBuf.Reset() + return p.Write(profileBytesBuf) +} + +// Start starts the BackgroundProfiler that listens for new Subscribers. +func (r *BackgroundProfiler) Start(ctx context.Context) error { + return r.stopper.RunAsyncTaskEx(ctx, stop.TaskOpts{ + TaskName: "backgroundprofiler", + SpanOpt: stop.ChildSpan, + }, func(ctx context.Context) { + ctxDone := ctx.Done() + for { + select { + case <-r.stopper.ShouldQuiesce(): + return + case <-ctxDone: + return + case <-r.startProfile: + + // Start collecting a CPU profile. + var cpuProfileBuf bytes.Buffer + + // TODO(during review): This is an arbitrarily chosen frequency, the + // runtime default is a 100Hz. This value will probably require tuning + // depending on our use case. Egs: profiling a single query execution vs + // profiling a long running job for a few minutes vs profiling a + // statement fingerprint. + runtime.SetCPUProfileRate(500) + if err := pprof.StartCPUProfile(&cpuProfileBuf); err != nil { + log.Warningf(ctx, "failed to start CPU profile %+v", err) + } + + // Start collecting an execution trace. + // + // Doing this after we start collecting CPU profiles means that our + // execution traces will also have CPU samples as events - + // https://go-review.googlesource.com/c/go/+/400795 + var executionTraceBuf bytes.Buffer + if err := trace.Start(&executionTraceBuf); err != nil { + log.Warningf(ctx, "failed to start execution trace %+v", err) + } + + r.startedProfile <- struct{}{} + + // TODO(during review): We should prevent the trace from growing too big + // and stop the trace every few seconds. Though the visualizer seems to + // chunk large traces into 1 second chunks so maybe we don't need to? + <-r.stopProfile + trace.Stop() + pprof.StopCPUProfile() + + // Filter entries from the CPU profile that are not labelled with our + // profile ID. + r.mu.Lock() + filterKey := fmt.Sprintf("%d", r.mu.profileID) + r.mu.Unlock() + if err := filterCPUProfileByLabel(filterKey, &cpuProfileBuf); err != nil { + log.Warningf(ctx, "failed to filer profile labels") + } + + // Persist the CPU profile + execution trace to local storage. + now := timeutil.Now() + cpuProfileName := CPUProfilerFileNamePrefix + now.Format(timeFormat) + cpuProfilePath := r.dumpStore.GetFullPath(cpuProfileName) + if err := os.WriteFile(cpuProfilePath, cpuProfileBuf.Bytes(), 0644); err != nil { + log.Warningf(ctx, "failed to write profiler to %s", cpuProfilePath) + } + + executionTraceName := RuntimeTraceFileNamePrefix + now.Format(timeFormat) + executionTracePath := r.dumpStore.GetFullPath(executionTraceName) + if err := os.WriteFile(executionTracePath, executionTraceBuf.Bytes(), 0644); err != nil { + log.Warningf(ctx, "failed to write execution trace to %s", executionTracePath) + } + + // TODO(adityamaru): Define GC semantics of the dumper. + + // Return a summary of the profiles collected. + r.res <- &Profile{ + NodeID: r.nodeID, + RuntimeTrace: executionTracePath, + CPUProfile: cpuProfilePath, + } + } + } + }) +} diff --git a/pkg/server/backgroundprofiler/profiler/profiler.proto b/pkg/server/backgroundprofiler/profiler/profiler.proto new file mode 100644 index 000000000000..7d6b21ce3155 --- /dev/null +++ b/pkg/server/backgroundprofiler/profiler/profiler.proto @@ -0,0 +1,20 @@ +// Copyright 2023 The Cockroach Authors. +// +// Licensed as a CockroachDB Enterprise file under the Cockroach Community +// License (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt + +syntax = "proto3"; +package cockroach.server.backgroundprofiler.profiler; +option go_package = "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler/profiler"; + +import "gogoproto/gogo.proto"; + + +message Profile { + int32 node_id = 1 [(gogoproto.customname) = "NodeID"]; + string runtime_trace = 2; + string cpu_profile = 3 [(gogoproto.customname) = "CPUProfile"]; +} diff --git a/pkg/server/backgroundprofiler/profiler/profiler_test.go b/pkg/server/backgroundprofiler/profiler/profiler_test.go new file mode 100644 index 000000000000..58efae0cfec3 --- /dev/null +++ b/pkg/server/backgroundprofiler/profiler/profiler_test.go @@ -0,0 +1,126 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package profiler_test + +import ( + "context" + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler/profiler" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/stop" + "github.com/cockroachdb/cockroach/pkg/util/tracing" + "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb" + "github.com/gogo/protobuf/types" + "github.com/google/pprof/profile" + "github.com/stretchr/testify/require" +) + +func TestBackgroundProfiler(t *testing.T) { + ctx := context.Background() + st := cluster.MakeTestingClusterSettings() + stopper := stop.NewStopper() + defer stopper.Stop(ctx) + dir, _ := testutils.TempDir(t) + //defer cleanup() + + bp := profiler.NewBackgroundProfiler(ctx, st, stopper, 1, dir) + wg := ctxgroup.WithContext(ctx) + wg.GoCtx(func(ctx context.Context) error { + if err := bp.Start(ctx); err != nil { + t.Fatal(err) + } + return nil + }) + defer func() { + if err := wg.Wait(); err != nil { + t.Fatal(err) + } + }() + + tr := tracing.NewTracerWithOpt(context.Background(), + tracing.WithTracingMode(tracing.TracingModeActiveSpansRegistry)) + tr.SetBackgroundProfiler(bp) + + _, sp := tr.StartSpanCtx(ctx, "root", + tracing.WithRecording(tracingpb.RecordingStructured), tracing.WithBackgroundProfiling()) + + done := make(chan int) + wg2 := ctxgroup.WithContext(ctx) + for i := 0; i < runtime.NumCPU(); i++ { + i := i + wg2.GoCtx(func(ctx context.Context) error { + // Start a new "root" span for each goroutine that is part of the same + // trace as sp. + operationName := fmt.Sprintf("op%d", i) + innerSp := tr.StartSpan(operationName, tracing.WithRemoteParentFromSpanMeta(sp.Meta())) + defer innerSp.Finish() + for { + select { + case <-done: + return nil + default: + log.Infof(ctx, "spinning in %d", i) + } + } + }) + } + + <-time.After(time.Second * 10) + close(done) + require.NoError(t, wg2.Wait()) + traceID := sp.TraceID() + rec := sp.FinishAndGetConfiguredRecording() + + // At this point we expect the background profiler to have written the + // profiles (CPU and execution traces) to dir. + dirEntry, err := os.ReadDir(dir) + require.NoError(t, err) + require.Len(t, dirEntry, 2) + + expectedProfileEvent := profiler.Profile{ + NodeID: 1, + } + for _, entry := range dirEntry { + // For CPU profiles we can verify that only samples labelled with our + // traceID are outputted in the profile. + profilePath := filepath.Join(dir, entry.Name()) + if strings.Contains(entry.Name(), profiler.CPUProfilerFileNamePrefix) { + expectedProfileEvent.CPUProfile = profilePath + f, err := os.Open(profilePath) + require.NoError(t, err) + prof, err := profile.Parse(f) + require.NoError(t, err) + ignoreTagFilter, err := profiler.CompileTagFilter(fmt.Sprintf("%d", traceID)) + require.NoError(t, err) + _, ignoreMatch := prof.FilterSamplesByTag(nil, ignoreTagFilter) + require.True(t, ignoreMatch) + require.Empty(t, prof.Sample) + } else { + expectedProfileEvent.RuntimeTrace = profilePath + } + } + + require.Len(t, rec[0].StructuredRecords, 1) + var p profiler.Profile + require.NoError(t, types.UnmarshalAny(rec[0].StructuredRecords[0].Payload, &p)) + require.Equal(t, expectedProfileEvent, p) + log.Infof(ctx, "dir is %s", dir) +} diff --git a/pkg/server/config.go b/pkg/server/config.go index 7d9f7d7a2217..8ace72defd68 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -181,6 +181,9 @@ type BaseConfig struct { // Only used if DisableRuntimeStatsMonitor is false. CPUProfileDirName string + // RuntimeProfileDirName is the directory name for runtime traces. + RuntimeProfileDirName string + // InflightTraceDirName is the directory name for job traces. InflightTraceDirName string diff --git a/pkg/server/server.go b/pkg/server/server.go index 52164de6b440..9cea13dc90d8 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -61,6 +61,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/rpc/nodedialer" "github.com/cockroachdb/cockroach/pkg/security/clientsecopts" "github.com/cockroachdb/cockroach/pkg/security/username" + "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler/profiler" "github.com/cockroachdb/cockroach/pkg/server/debug" "github.com/cockroachdb/cockroach/pkg/server/diagnostics" "github.com/cockroachdb/cockroach/pkg/server/serverpb" @@ -1747,6 +1748,15 @@ func (s *Server) PreStart(ctx context.Context) error { return err } + backgroundProfiler := profiler.NewBackgroundProfiler(workersCtx, s.ClusterSettings(), + s.stopper, int32(state.nodeID), s.cfg.RuntimeProfileDirName) + if backgroundProfiler != nil { + if err := backgroundProfiler.Start(workersCtx); err != nil { + return err + } + s.cfg.Tracer.SetBackgroundProfiler(backgroundProfiler) + } + // Export statistics to graphite, if enabled by configuration. var graphiteOnce sync.Once graphiteEndpoint.SetOnChange(&s.st.SV, func(context.Context) { diff --git a/pkg/server/server_controller_new_server.go b/pkg/server/server_controller_new_server.go index b57a4e37e85b..0bebd2e469aa 100644 --- a/pkg/server/server_controller_new_server.go +++ b/pkg/server/server_controller_new_server.go @@ -301,6 +301,7 @@ func makeSharedProcessTenantServerConfig( baseCfg.GoroutineDumpDirName = "" baseCfg.HeapProfileDirName = "" baseCfg.CPUProfileDirName = "" + baseCfg.RuntimeProfileDirName = "" // Expose the process-wide runtime metrics to the tenant's metric // collector. Since they are process-wide, all tenants can see them. diff --git a/pkg/server/testserver.go b/pkg/server/testserver.go index 6fcbdba9b1ec..32d5556c9c4b 100644 --- a/pkg/server/testserver.go +++ b/pkg/server/testserver.go @@ -164,6 +164,7 @@ func makeTestConfigFromParams(params base.TestServerArgs) Config { cfg.InflightTraceDirName = params.TraceDir } } + cfg.RuntimeProfileDirName = params.RuntimeProfileDir if knobs := params.Knobs.Store; knobs != nil { if mo := knobs.(*kvserver.StoreTestingKnobs).MaxOffset; mo != 0 { cfg.MaxOffset = MaxOffsetType(mo) @@ -280,6 +281,9 @@ func makeTestConfigFromParams(params base.TestServerArgs) Config { if cfg.CPUProfileDirName == "" { cfg.CPUProfileDirName = filepath.Join(storeSpec.Path, "logs", base.CPUProfileDir) } + if cfg.RuntimeProfileDirName == "" { + cfg.RuntimeProfileDirName = filepath.Join(storeSpec.Path, "logs", base.RuntimeProfileDir) + } } } cfg.Stores = base.StoreSpecList{Specs: params.StoreSpecs} @@ -1066,6 +1070,7 @@ func (ts *TestServer) StartTenant( baseCfg.Locality = params.Locality baseCfg.HeapProfileDirName = params.HeapProfileDirName baseCfg.GoroutineDumpDirName = params.GoroutineDumpDirName + baseCfg.RuntimeProfileDirName = params.RuntimeProfileDirName baseCfg.ClusterName = ts.Cfg.ClusterName baseCfg.StartDiagnosticsReporting = params.StartDiagnosticsReporting baseCfg.DisableTLSForHTTP = params.DisableTLSForHTTP diff --git a/pkg/util/tracing/BUILD.bazel b/pkg/util/tracing/BUILD.bazel index 4c9c6f06eaba..14ffffbd7f00 100644 --- a/pkg/util/tracing/BUILD.bazel +++ b/pkg/util/tracing/BUILD.bazel @@ -21,6 +21,7 @@ go_library( importpath = "github.com/cockroachdb/cockroach/pkg/util/tracing", visibility = ["//visibility:public"], deps = [ + "//pkg/server/backgroundprofiler", "//pkg/settings", "//pkg/util", "//pkg/util/buildutil", diff --git a/pkg/util/tracing/crdbspan.go b/pkg/util/tracing/crdbspan.go index 20c27903d1b8..f4330c73d5ba 100644 --- a/pkg/util/tracing/crdbspan.go +++ b/pkg/util/tracing/crdbspan.go @@ -61,6 +61,12 @@ type crdbSpan struct { // whenever a Structured event is recorded by the span and its children. eventListeners []EventListener + // enableBackgroundProfiling is true if the span is part of an operation that + // is being profiled on-demand by the backgroundprofiler.Profiler. If true and + // if the span is a local, root span, then it will be Subscribed to the + // background profiler on creation and unsubscribed on Finish(). + enableBackgroundProfiling bool + // Locking rules: // - If locking both a parent and a child, the parent must be locked first. In // practice, children don't take the parent's lock. @@ -1446,6 +1452,12 @@ func (s *crdbSpan) setGoroutineID(gid int64) { s.mu.goroutineID = uint64(gid) } +func (s *crdbSpan) getGoroutineID() uint64 { + s.mu.Lock() + defer s.mu.Unlock() + return s.mu.goroutineID +} + type atomicRecordingType tracingpb.RecordingType // load returns the recording type. diff --git a/pkg/util/tracing/span.go b/pkg/util/tracing/span.go index 5a71f2e15ed8..69bccdc86fd7 100644 --- a/pkg/util/tracing/span.go +++ b/pkg/util/tracing/span.go @@ -17,6 +17,7 @@ import ( "sync/atomic" "time" + "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler" "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb" "github.com/cockroachdb/logtags" @@ -140,6 +141,32 @@ type Span struct { finishStack string } +var _ backgroundprofiler.Subscriber = &Span{} + +func (sp *Span) LabelValue() string { + if sp.detectUseAfterFinish() { + return "" + } + + return sp.OperationName() +} + +func (sp *Span) Identifier() backgroundprofiler.SubscriberID { + if sp.detectUseAfterFinish() { + return 0 + } + + return backgroundprofiler.SubscriberID(sp.SpanID()) +} + +func (sp *Span) ProfileID() backgroundprofiler.ProfileID { + if sp.detectUseAfterFinish() { + return 0 + } + + return backgroundprofiler.ProfileID(sp.i.TraceID()) +} + // IsNoop returns true if this span is a black hole - it doesn't correspond to a // CRDB span and it doesn't output either to an OpenTelemetry tracer, or to // net.Trace. @@ -243,9 +270,29 @@ func (sp *Span) Redactable() bool { // // Finishing a nil *Span is a noop. func (sp *Span) Finish() { + sp.maybeUnsubscribeFromBackgroundProfiler() sp.finishInternal() } +// maybeUnsubscribeFromBackgroundProfiler unsubscribes the span from the +// backgroundProfiler if the span was subscribed to it on creation. +// If the span is responsible for finishing the profile it also emits a +// StructuredEvent that captures metadata about the collected profiles. +func (sp *Span) maybeUnsubscribeFromBackgroundProfiler() { + if sp == nil || sp.IsNoop() || sp.detectUseAfterFinish() { + return + } + + if sp.i.crdb.enableBackgroundProfiling { + if sp.Tracer().backgroundProfiler != nil { + finishedProfiling, profile := sp.Tracer().backgroundProfiler.Unsubscribe(sp) + if finishedProfiling { + sp.RecordStructured(profile) + } + } + } +} + // finishInternal finishes the span. func (sp *Span) finishInternal() { if sp == nil || sp.IsNoop() || sp.detectUseAfterFinish() { @@ -273,6 +320,7 @@ func (sp *Span) finishInternal() { func (sp *Span) FinishAndGetRecording(recType tracingpb.RecordingType) tracingpb.Recording { rec := tracingpb.Recording(nil) if sp.RecordingType() != tracingpb.RecordingOff { + sp.maybeUnsubscribeFromBackgroundProfiler() rec = sp.i.GetRecording(recType, true /* finishing */) } // Reach directly into sp.i to pass the finishing argument. @@ -290,6 +338,7 @@ func (sp *Span) FinishAndGetConfiguredRecording() tracingpb.Recording { rec := tracingpb.Recording(nil) recType := sp.RecordingType() if recType != tracingpb.RecordingOff { + sp.maybeUnsubscribeFromBackgroundProfiler() // Reach directly into sp.i to pass the finishing argument. rec = sp.i.GetRecording(recType, true /* finishing */) } @@ -876,6 +925,16 @@ type SpanMeta struct { // any info about the span in order to not have a child be created on the // other side. Similarly, ExtractMetaFrom does not deserialize this field. sterile bool + + // enableBackgroundProfiling is set if this span is part of an operation that + // should be profiled by the backgroundprofiler.Profiler. If true and if the + // span is a local, root span, then it will be Subscribed to the background + // profiler on creation and unsubscribed on Finish(). + // + // Note this field is propagated across the wire through a carrier so that + // remote child spans of an operation that is being profiled on the source + // node, start or join profiles being collected on the remote nodes. + enableBackgroundProfiling bool } // Empty returns whether or not the SpanMeta is a zero value. @@ -926,10 +985,11 @@ func SpanMetaFromProto(info tracingpb.TraceInfo) SpanMeta { } sm := SpanMeta{ - traceID: info.TraceID, - spanID: info.ParentSpanID, - otelCtx: otelCtx, - sterile: false, + traceID: info.TraceID, + spanID: info.ParentSpanID, + otelCtx: otelCtx, + sterile: false, + enableBackgroundProfiling: info.EnableBackgroundProfiling, } switch info.RecordingMode { case tracingpb.RecordingMode_OFF: diff --git a/pkg/util/tracing/span_inner.go b/pkg/util/tracing/span_inner.go index d5714d27dc29..3be819f04ee2 100644 --- a/pkg/util/tracing/span_inner.go +++ b/pkg/util/tracing/span_inner.go @@ -174,12 +174,13 @@ func (s *spanInner) Meta() SpanMeta { var traceID tracingpb.TraceID var spanID tracingpb.SpanID var recordingType tracingpb.RecordingType - var sterile bool + var sterile, enableBackgroundProfiling bool if s.crdb != nil { traceID, spanID = s.crdb.traceID, s.crdb.spanID recordingType = s.crdb.mu.recording.recordingType.load() sterile = s.isSterile() + enableBackgroundProfiling = s.crdb.enableBackgroundProfiling } var otelCtx oteltrace.SpanContext @@ -191,15 +192,17 @@ func (s *spanInner) Meta() SpanMeta { spanID == 0 && !otelCtx.TraceID().IsValid() && recordingType == 0 && - !sterile { + !sterile && + !enableBackgroundProfiling { return SpanMeta{} } return SpanMeta{ - traceID: traceID, - spanID: spanID, - otelCtx: otelCtx, - recordingType: recordingType, - sterile: sterile, + traceID: traceID, + spanID: spanID, + otelCtx: otelCtx, + recordingType: recordingType, + sterile: sterile, + enableBackgroundProfiling: enableBackgroundProfiling, } } diff --git a/pkg/util/tracing/span_options.go b/pkg/util/tracing/span_options.go index 0b7f970cadcd..692705d0a7fb 100644 --- a/pkg/util/tracing/span_options.go +++ b/pkg/util/tracing/span_options.go @@ -76,6 +76,7 @@ type spanOptions struct { SpanKind oteltrace.SpanKind // see WithSpanKind Sterile bool // see WithSterile EventListeners []EventListener // see WithEventListeners + EnableBackgroundProfiling bool // see WithBackgroundProfiling // recordingTypeExplicit is set if the WithRecording() option was used. In // that case, spanOptions.recordingType() returns recordingTypeOpt below. If @@ -93,6 +94,22 @@ func (opts *spanOptions) parentTraceID() tracingpb.TraceID { return 0 } +// enableBackgroundProfiling returns true if the span was created +// WithBackgroundProfiling() or the span has a remote/local parent that has +// background profiling enabled. +func (opts *spanOptions) enableBackgroundProfiling() bool { + enableBackgroundProfiling := opts.EnableBackgroundProfiling + if !opts.Parent.empty() && !opts.Parent.IsNoop() { + enableBackgroundProfiling = enableBackgroundProfiling || opts.Parent.i.crdb.enableBackgroundProfiling + } + + if !opts.RemoteParent.Empty() { + enableBackgroundProfiling = enableBackgroundProfiling || opts.RemoteParent.enableBackgroundProfiling + } + + return enableBackgroundProfiling +} + func (opts *spanOptions) parentSpanID() tracingpb.SpanID { if !opts.Parent.empty() && !opts.Parent.IsNoop() { return opts.Parent.i.crdb.spanID @@ -466,6 +483,27 @@ func (w withSterileOption) apply(opts spanOptions) spanOptions { return opts } +type backgroundProfilingOption struct{} + +var _ SpanOption = backgroundProfilingOption{} + +func (et backgroundProfilingOption) apply(opts spanOptions) spanOptions { + // Applying an EventListener span option implies the span has at least + // `RecordingStructured` recording type. If the span explicitly specifies a + // `RecordingVerbose` recording type via the `WithRecording(...)` option, that + // will be respected instead. + if !opts.recordingTypeExplicit { + opts.recordingTypeExplicit = true + opts.recordingTypeOpt = tracingpb.RecordingStructured + } + opts.EnableBackgroundProfiling = true + return opts +} + +func WithBackgroundProfiling() SpanOption { + return backgroundProfilingOption{} +} + type eventListenersOption []EventListener var _ SpanOption = eventListenersOption{} diff --git a/pkg/util/tracing/tracer.go b/pkg/util/tracing/tracer.go index 330a6ec5575d..c2d586c573b8 100644 --- a/pkg/util/tracing/tracer.go +++ b/pkg/util/tracing/tracer.go @@ -22,6 +22,7 @@ import ( "time" "unsafe" + "github.com/cockroachdb/cockroach/pkg/server/backgroundprofiler" "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/buildutil" @@ -91,7 +92,8 @@ const ( fieldNameTraceID = prefixTracerState + "traceid" fieldNameSpanID = prefixTracerState + "spanid" // fieldNameRecordingType will contain the desired type of trace recording. - fieldNameRecordingType = "rec" + fieldNameRecordingType = "rec" + fieldNameEnableBackgroundProfiling = "backgroundprofile" // fieldNameOtel{TraceID,SpanID} will contain the OpenTelemetry span info, hex // encoded. @@ -217,7 +219,7 @@ type TracingMode int const ( // TracingModeFromEnv configures tracing according to enableTracingByDefault. TracingModeFromEnv TracingMode = iota - // TracingModeOnDemand means that Spans will no be created unless there's a + // TracingModeOnDemand means that Spans will not be created unless there's a // particular reason to create them (i.e. a span being created with // WithForceRealSpan(), a net.Trace or OpenTelemetry tracers attached). TracingModeOnDemand @@ -294,6 +296,11 @@ type Tracer struct { // them at the network boundary from KV. _redactable int32 // accessed atomically + // backgroundProfiler is a handle to the profiler service that can be + // subscribed to/unsubscribed from by spans that have been instructed to + // collect runtime profiles. + backgroundProfiler backgroundprofiler.Profiler + // Pointer to an OpenTelemetry tracer used as a "shadow tracer", if any. If // not nil, the respective *otel.Tracer will be used to create mirror spans // for all spans that the parent Tracer creates. @@ -384,7 +391,7 @@ type SpanRegistry struct { syncutil.Mutex // m stores all the currently open spans. Note that a span being present // here proves that the corresponding Span.Finish() call has not yet - // completed (but crdbSpan.Finish() might have finished), therefor the span + // completed (but crdbSpan.Finish() might have finished), therefore the span // cannot be reused while present in the registry. At the same time, note // that Span.Finish() can be called on these spans so, when using one of // these spans, we need to be prepared for that use to be concurrent with @@ -585,6 +592,12 @@ func (t *Tracer) ActiveSpansRegistryEnabled() bool { return atomic.LoadInt32(&t._activeSpansRegistryEnabled) != 0 } +// SetBackgroundProfiler sets a handle on the Tracer to the BackgroundProfiler +// service that can be used to trigger on-demand profiles. +func (t *Tracer) SetBackgroundProfiler(profiler backgroundprofiler.Profiler) { + t.backgroundProfiler = profiler +} + // NewTracer creates a Tracer with default options. // // See NewTracerWithOpt() for controlling various configuration options. @@ -1203,6 +1216,7 @@ child operation: %s, tracer created at: s.i.crdb.SetRecordingType(opts.recordingType()) s.i.crdb.parentSpanID = opts.parentSpanID() + s.i.crdb.enableBackgroundProfiling = opts.enableBackgroundProfiling() var localRoot bool { @@ -1251,6 +1265,28 @@ child operation: %s, tracer created at: // spans. Span.Finish will take care of removing it. if localRoot { t.activeSpansRegistry.addSpan(s.i.crdb) + + // TODO(during review): Do we want to tie this to Verbose recording or have + // its own flag. An advantage of making it an independent flag is that we do + // not have to enable the verbose collection of traces to trigger background + // profile collection. + // + // If the span is a local root and has background profiling enabled, we + // subscribe to the background profiler. Subscribing involves joining a + // running profile or starting a new profile if this is the first root span + // of the operation that is being profiled. + if s.i.crdb.enableBackgroundProfiling { + if t.backgroundProfiler != nil { + // We want the profiler labels to remain set for the lifetime of the + // tracing span. Given that we Unsubscribe only when the span is + // Finish()ing, i.e. when the goroutine is returning, we do not need to + // undo the profiler labels set by the backgroundProfiler. + labelledCtx, _ := t.backgroundProfiler.Subscribe(ctx, s) + if labelledCtx != nil { + ctx = labelledCtx + } + } + } } return maybeWrapCtx(ctx, s) @@ -1309,6 +1345,9 @@ func (t *Tracer) InjectMetaInto(sm SpanMeta, carrier Carrier) { carrier.Set(fieldNameTraceID, strconv.FormatUint(uint64(sm.traceID), 16)) carrier.Set(fieldNameSpanID, strconv.FormatUint(uint64(sm.spanID), 16)) carrier.Set(fieldNameRecordingType, sm.recordingType.ToCarrierValue()) + if sm.enableBackgroundProfiling { + carrier.Set(fieldNameEnableBackgroundProfiling, "") + } } var noopSpanMeta = SpanMeta{} @@ -1323,6 +1362,7 @@ func (t *Tracer) ExtractMetaFrom(carrier Carrier) (SpanMeta, error) { var otelSpanID oteltrace.SpanID var recordingTypeExplicit bool var recordingType tracingpb.RecordingType + var enableBackgroundProfiling bool iterFn := func(k, v string) error { switch k = strings.ToLower(k); k { @@ -1355,6 +1395,8 @@ func (t *Tracer) ExtractMetaFrom(carrier Carrier) (SpanMeta, error) { case fieldNameRecordingType: recordingTypeExplicit = true recordingType = tracingpb.RecordingTypeFromCarrierValue(v) + case fieldNameEnableBackgroundProfiling: + enableBackgroundProfiling = true } return nil } @@ -1398,7 +1440,8 @@ func (t *Tracer) ExtractMetaFrom(carrier Carrier) (SpanMeta, error) { // The sterile field doesn't make it across the wire. The simple fact that // there was any tracing info in the carrier means that the parent span was // not sterile. - sterile: false, + sterile: false, + enableBackgroundProfiling: enableBackgroundProfiling, }, nil } diff --git a/pkg/util/tracing/tracer_test.go b/pkg/util/tracing/tracer_test.go index 49234df1e5bc..048e71492457 100644 --- a/pkg/util/tracing/tracer_test.go +++ b/pkg/util/tracing/tracer_test.go @@ -300,7 +300,7 @@ func TestTracerInjectExtract(t *testing.T) { // Verify that verbose tracing is propagated and triggers verbosity on the // remote side. - s1 := tr.StartSpan("a", WithRecording(tracingpb.RecordingVerbose)) + s1 := tr.StartSpan("a", WithRecording(tracingpb.RecordingVerbose), WithBackgroundProfiling()) carrier := MetadataCarrier{metadata.MD{}} tr.InjectMetaInto(s1.Meta(), carrier) @@ -317,6 +317,7 @@ func TestTracerInjectExtract(t *testing.T) { if trace1 != trace2 { t.Errorf("traceID doesn't match: parent %d child %d", trace1, trace2) } + require.Equal(t, s1.Meta().enableBackgroundProfiling, s2.Meta().enableBackgroundProfiling) s2.Recordf("x=%d", 1) // Verify that recording was started automatically. diff --git a/pkg/util/tracing/tracingpb/tracing.proto b/pkg/util/tracing/tracingpb/tracing.proto index a5e4413fa5e3..c542f649aa91 100644 --- a/pkg/util/tracing/tracingpb/tracing.proto +++ b/pkg/util/tracing/tracingpb/tracing.proto @@ -43,5 +43,9 @@ message TraceInfo { } OtelInfo otel = 4; + + // EnableBackgroundProfiling is true when the operation is expected to collect + // profiles using the backgroundprofiler.Profiler. + bool enable_background_profiling = 5; }