Skip to content

Commit

Permalink
profiler: Code Hotspots and Endpoint Filtering (#966)
Browse files Browse the repository at this point in the history
tracer: Profiler Endpoints and Code Hotspots

Implement profiler endpoints and code hotspots feature. When enabled,
the tracer automatically applies "span id", "local root span id" and
"trace endpoint" pprof labels to goroutines as spans are started and
finished. These labels end up in the the CPU profiles uploaded by the
profilers and are leveraged by our backend to connect APM Spans to
profiling information and vice versa.

The feature is disabled by default for now, but this might change after
some further testing.

New API:

- tracer.WithProfilerCodeHotspots() option
- tracer.WithProfilerEndpoints() option
- StartSpanFromContext() added to tracer.Tracer interface

New env vars:

- DD_PROFILING_ENDPOINT_COLLECTION_ENABLED
- DD_PROFILING_CODE_HOTSPOTS_COLLECTION_ENABLED
  • Loading branch information
felixge authored Dec 14, 2021
1 parent 4092574 commit beb5a53
Show file tree
Hide file tree
Showing 24 changed files with 1,432 additions and 18 deletions.
5 changes: 4 additions & 1 deletion checkcopyright.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016 Datadog, Inc.

//go:build ignore
// +build ignore

// This tool validates that all *.go files in the repository have the copyright text attached.
Expand All @@ -22,6 +23,8 @@ func main() {
// copyrightRegexp matches years or year ranges like "2016", "2016-2019",
// "2016,2018-2020" in the copyright header.
copyrightRegexp := regexp.MustCompile(`// Copyright 20[0-9]{2}[0-9,\-]* Datadog, Inc.`)
generatedRegexp := regexp.MustCompile(`Code generated by.+DO NOT EDIT`)

if err := filepath.Walk(".", func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
Expand All @@ -40,7 +43,7 @@ func main() {
if err != nil && err != io.EOF {
return err
}
if !copyrightRegexp.Match(snip) {
if !copyrightRegexp.Match(snip) && !generatedRegexp.Match(snip) {
// report missing header
missing = true
log.Printf("Copyright header missing in %q.\n", path)
Expand Down
11 changes: 10 additions & 1 deletion ddtrace/ddtrace.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
// with by accessing the subdirectories of this package: https://godoc.org/gopkg.in/DataDog/dd-trace-go.v1/ddtrace#pkg-subdirectories.
package ddtrace // import "gopkg.in/DataDog/dd-trace-go.v1/ddtrace"

import "time"
import (
"context"
"time"
)

// Tracer specifies an implementation of the Datadog tracer which allows starting
// and propagating spans. The official implementation if exposed as functions
Expand All @@ -22,6 +25,12 @@ type Tracer interface {
// StartSpan starts a span with the given operation name and options.
StartSpan(operationName string, opts ...StartSpanOption) Span

// StartSpanFromContext starts a span with the given operation name and
// options. If a span is found in the context, it will be used as the parent
// of the resulting span. If the ChildOf option is passed, the span from
// context will take precedence over it as the parent span.
StartSpanFromContext(ctx context.Context, operationName string, opts ...StartSpanOption) (Span, context.Context)

// Extract extracts a span context from a given carrier. Note that baggage item
// keys will always be lower-cased to maintain consistency. It is impossible to
// maintain the original casing due to MIME header canonicalization standards.
Expand Down
6 changes: 6 additions & 0 deletions ddtrace/internal/globaltracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package internal // import "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal"

import (
"context"
"sync"

"gopkg.in/DataDog/dd-trace-go.v1/ddtrace"
Expand Down Expand Up @@ -51,6 +52,11 @@ func (NoopTracer) StartSpan(operationName string, opts ...ddtrace.StartSpanOptio
return NoopSpan{}
}

// StartSpanFromContext implements ddtrace.Tracer.
func (NoopTracer) StartSpanFromContext(ctx context.Context, operationName string, options ...ddtrace.StartSpanOption) (ddtrace.Span, context.Context) {
return NoopSpan{}, context.Background()
}

// SetServiceInfo implements ddtrace.Tracer.
func (NoopTracer) SetServiceInfo(name, app, appType string) {}

Expand Down
14 changes: 13 additions & 1 deletion ddtrace/mocktracer/mocktracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
package mocktracer

import (
"context"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -73,17 +74,28 @@ func (*mocktracer) Stop() {
}

func (t *mocktracer) StartSpan(operationName string, opts ...ddtrace.StartSpanOption) ddtrace.Span {
span, _ := t.StartSpanFromContext(context.Background(), operationName, opts...)
return span
}

func (t *mocktracer) StartSpanFromContext(ctx context.Context, operationName string, opts ...ddtrace.StartSpanOption) (ddtrace.Span, context.Context) {
var cfg ddtrace.StartSpanConfig
for _, fn := range opts {
fn(&cfg)
}
if ctx == nil {
ctx = context.Background()
} else if s, ok := tracer.SpanFromContext(ctx); ok {
// span in ctx overwrite ChildOf() parent if any
cfg.Parent = s.Context()
}
span := newSpan(t, operationName, &cfg)

t.Lock()
t.openSpans[span.SpanID()] = span
t.Unlock()

return span
return span, tracer.ContextWithSpan(ctx, span)
}

func (t *mocktracer) OpenSpans() []Span {
Expand Down
10 changes: 1 addition & 9 deletions ddtrace/tracer/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,5 @@ func SpanFromContext(ctx context.Context) (Span, bool) {
// is found in the context, it will be used as the parent of the resulting span. If the ChildOf
// option is passed, the span from context will take precedence over it as the parent span.
func StartSpanFromContext(ctx context.Context, operationName string, opts ...StartSpanOption) (Span, context.Context) {
if ctx == nil {
// default to context.Background() to avoid panics on Go >= 1.15
ctx = context.Background()
}
if s, ok := SpanFromContext(ctx); ok {
opts = append(opts, ChildOf(s.Context()))
}
s := StartSpan(operationName, opts...)
return s, ContextWithSpan(ctx, s)
return internal.GetGlobalTracer().StartSpanFromContext(ctx, operationName, opts...)
}
34 changes: 34 additions & 0 deletions ddtrace/tracer/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"gopkg.in/DataDog/dd-trace-go.v1/internal"
"gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig"
"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
"gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof"
"gopkg.in/DataDog/dd-trace-go.v1/internal/version"

"github.com/DataDog/datadog-go/v5/statsd"
Expand Down Expand Up @@ -122,6 +123,12 @@ type config struct {
// errors will record a stack trace when this option is set.
noDebugStack bool

// profilerHotspots specifies whether profiler Code Hotspots is enabled.
profilerHotspots bool

// profilerEndpoints specifies whether profiler endpoint filtering is enabled.
profilerEndpoints bool

// enabled reports whether tracing is enabled.
enabled bool
}
Expand Down Expand Up @@ -213,6 +220,9 @@ func newConfig(opts ...StartOption) *config {
c.runtimeMetrics = internal.BoolEnv("DD_RUNTIME_METRICS_ENABLED", false)
c.debug = internal.BoolEnv("DD_TRACE_DEBUG", false)
c.enabled = internal.BoolEnv("DD_TRACE_ENABLED", true)
// TODO(fg): set these to true before going GA with this.
c.profilerEndpoints = internal.BoolEnv(traceprof.EndpointEnvVar, false)
c.profilerHotspots = internal.BoolEnv(traceprof.CodeHotspotsEnvVar, false)

for _, fn := range opts {
fn(c)
Expand Down Expand Up @@ -665,6 +675,30 @@ func WithLogStartup(enabled bool) StartOption {
}
}

// WithProfilerCodeHotspots enables the code hotspots integration between the
// tracer and profiler. This is done by automatically attaching pprof labels
// called "span id" and "local root span id" when new spans are created. You
// should not use these label names in your own code when this is enabled. The
// enabled value defaults to the value of the
// DD_PROFILING_CODE_HOTSPOTS_COLLECTION_ENABLED env variable or false.
func WithProfilerCodeHotspots(enabled bool) StartOption {
return func(c *config) {
c.profilerHotspots = enabled
}
}

// WithProfilerEndpoints enables the endpoints integration between the tracer
// and profiler. This is done by automatically attaching a pprof label called
// "trace endpoint" holding the resource name of the top-level service span if
// its type is http or rpc. You should not use this label name in your own code
// when this is enabled. The enabled value defaults to the value of the
// DD_PROFILING_ENDPOINT_COLLECTION_ENABLED env variable or false.
func WithProfilerEndpoints(enabled bool) StartOption {
return func(c *config) {
c.profilerEndpoints = enabled
}
}

// StartSpanOption is a configuration option for StartSpan. It is aliased in order
// to help godoc group all the functions returning it together. It is considered
// more correct to refer to it as the type as the origin, ddtrace.StartSpanOption.
Expand Down
29 changes: 29 additions & 0 deletions ddtrace/tracer/option_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"time"

"gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig"
"gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -363,6 +364,34 @@ func TestTracerOptionsDefaults(t *testing.T) {
assert.Equal(nil, dVal)
})

t.Run("profiler-endpoints", func(t *testing.T) {
t.Run("default", func(t *testing.T) {
c := newConfig()
assert.False(t, c.profilerEndpoints)
})

t.Run("override", func(t *testing.T) {
os.Setenv(traceprof.EndpointEnvVar, "true")
defer os.Unsetenv(traceprof.EndpointEnvVar)
c := newConfig()
assert.True(t, c.profilerEndpoints)
})
})

t.Run("profiler-hotspots", func(t *testing.T) {
t.Run("default", func(t *testing.T) {
c := newConfig()
assert.False(t, c.profilerHotspots)
})

t.Run("override", func(t *testing.T) {
os.Setenv(traceprof.CodeHotspotsEnvVar, "true")
defer os.Unsetenv(traceprof.CodeHotspotsEnvVar)
c := newConfig()
assert.True(t, c.profilerHotspots)
})
})

t.Run("env-mapping", func(t *testing.T) {
os.Setenv("DD_SERVICE_MAPPING", "tracer.test:test2, svc:Newsvc,http.router:myRouter, noval:")
defer os.Unsetenv("DD_SERVICE_MAPPING")
Expand Down
14 changes: 13 additions & 1 deletion ddtrace/tracer/span.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
package tracer

import (
"context"
"fmt"
"os"
"reflect"
"runtime"
"runtime/pprof"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -72,7 +74,11 @@ type span struct {
noDebugStack bool `msg:"-"` // disables debug stack traces
finished bool `msg:"-"` // true if the span has been submitted to a tracer.
context *spanContext `msg:"-"` // span propagation context
taskEnd func() // ends execution tracer (runtime/trace) task, if started

pprofCtxActive context.Context `msg:"-"` // contains pprof.WithLabel labels to tell the profiler more about this span
pprofCtxRestore context.Context `msg:"-"` // contains pprof.WithLabel labels of the parent span (if any) that need to be restored when this span finishes

taskEnd func() // ends execution tracer (runtime/trace) task, if started
}

// Context yields the SpanContext for this Span. Note that the return
Expand Down Expand Up @@ -320,6 +326,12 @@ func (s *span) Finish(opts ...ddtrace.FinishOption) {
s.taskEnd()
}
s.finish(t)

if s.pprofCtxRestore != nil {
// Restore the labels of the parent span so any CPU samples after this
// point are attributed correctly.
pprof.SetGoroutineLabels(s.pprofCtxRestore)
}
}

// SetOperationName sets or changes the operation name.
Expand Down
69 changes: 65 additions & 4 deletions ddtrace/tracer/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
package tracer

import (
"context"
gocontext "context"
"fmt"
"os"
"runtime/pprof"
"strconv"
"sync"
"time"
Expand All @@ -17,6 +20,7 @@ import (
"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal"
"gopkg.in/DataDog/dd-trace-go.v1/internal/appsec"
"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
"gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof"
"gopkg.in/DataDog/dd-trace-go.v1/internal/version"

"github.com/DataDog/datadog-agent/pkg/obfuscate"
Expand Down Expand Up @@ -311,22 +315,42 @@ func (t *tracer) pushTrace(trace []*span) {
}
}

// StartSpan creates, starts, and returns a new Span with the given `operationName`.
// StartSpan implements ddtrace.Tracer.
func (t *tracer) StartSpan(operationName string, options ...ddtrace.StartSpanOption) ddtrace.Span {
span, _ := t.StartSpanFromContext(gocontext.Background(), operationName, options...)
return span
}

// StartSpanFromContext implements ddtrace.Tracer.
func (t *tracer) StartSpanFromContext(ctx gocontext.Context, operationName string, options ...ddtrace.StartSpanOption) (ddtrace.Span, gocontext.Context) {
var opts ddtrace.StartSpanConfig
for _, fn := range options {
fn(&opts)
}
if ctx == nil {
ctx = gocontext.Background()
} else if s, ok := SpanFromContext(ctx); ok {
// span in ctx overwrite ChildOf() parent if any
opts.Parent = s.Context()
}
var startTime int64
if opts.StartTime.IsZero() {
startTime = now()
} else {
startTime = opts.StartTime.UnixNano()
}
var context *spanContext
pprofCtx := ctx
if opts.Parent != nil {
if ctx, ok := opts.Parent.(*spanContext); ok {
context = ctx
if parentContext, ok := opts.Parent.(*spanContext); ok {
context = parentContext
if pprofCtx == gocontext.Background() && parentContext.span != nil && parentContext.span.pprofCtxActive != nil {
// Inherit the pprof labels from parent span if it was propagated using
// ChildOf() rather than StartSpanFromContext(). Having a separate ctx
// and pprofCtx is done to avoid subtle problems with callers relying
// on the details of the ContextWithSpan() wrapping below.
pprofCtx = parentContext.span.pprofCtxActive
}
}
}
id := opts.SpanID
Expand Down Expand Up @@ -400,13 +424,50 @@ func (t *tracer) StartSpan(operationName string, options ...ddtrace.StartSpanOpt
// if not already sampled or a brand new trace, sample it
t.sample(span)
}
if t.config.profilerHotspots || t.config.profilerEndpoints {
ctx = t.applyPPROFLabels(pprofCtx, span)
}
if t.config.serviceMappings != nil {
if newSvc, ok := t.config.serviceMappings[span.Service]; ok {
span.Service = newSvc
}
}
log.Debug("Started Span: %v, Operation: %s, Resource: %s, Tags: %v, %v", span, span.Name, span.Resource, span.Meta, span.Metrics)
return span
return span, ContextWithSpan(ctx, span)
}

// applyPPROFLabels applies pprof labels for the profiler's code hotspots and
// endpoint filtering feature to span. When span finishes, any pprof labels
// found in ctx are restored.
func (t *tracer) applyPPROFLabels(ctx gocontext.Context, span *span) context.Context {
var labels []string
if t.config.profilerHotspots {
labels = append(labels, traceprof.SpanID, strconv.FormatUint(span.SpanID, 10))
}
// nil checks might not be needed, but better be safe than sorry
if span.context.trace != nil && span.context.trace.root != nil {
localRootSpan := span.context.trace.root
if t.config.profilerHotspots {
labels = append(labels, traceprof.LocalRootSpanID, strconv.FormatUint(localRootSpan.SpanID, 10))
}
if t.config.profilerEndpoints && spanResourcePIISafe(localRootSpan) {
labels = append(labels, traceprof.TraceEndpoint, localRootSpan.Resource)
}
}
if len(labels) > 0 {
span.pprofCtxRestore = ctx
span.pprofCtxActive = pprof.WithLabels(ctx, pprof.Labels(labels...))
pprof.SetGoroutineLabels(span.pprofCtxActive)
return span.pprofCtxActive
}
return ctx
}

// spanResourcePIISafe returns true if s.Resource can be considered to not
// include PII with reasonable confidence. E.g. SQL queries may contain PII,
// but http or rpc endpoint names generally do not.
func spanResourcePIISafe(s *span) bool {
return s.Type == ext.SpanTypeWeb || s.Type == ext.AppTypeRPC
}

// Stop stops the tracer.
Expand Down
8 changes: 8 additions & 0 deletions internal/traceprof/testapp/test_app.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2021 Datadog, Inc.

// Package testapp has the protbuf/grpc definitions for the test application
// implemented in traceproftest.
package testapp
Loading

0 comments on commit beb5a53

Please sign in to comment.