diff --git a/checkcopyright.go b/checkcopyright.go index 678b5d246b..f0f9849eb0 100644 --- a/checkcopyright.go +++ b/checkcopyright.go @@ -3,6 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2016 Datadog, Inc. +//go:build ignore // +build ignore // This tool validates that all *.go files in the repository have the copyright text attached. @@ -22,6 +23,8 @@ func main() { // copyrightRegexp matches years or year ranges like "2016", "2016-2019", // "2016,2018-2020" in the copyright header. copyrightRegexp := regexp.MustCompile(`// Copyright 20[0-9]{2}[0-9,\-]* Datadog, Inc.`) + generatedRegexp := regexp.MustCompile(`Code generated by.+DO NOT EDIT`) + if err := filepath.Walk(".", func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -40,7 +43,7 @@ func main() { if err != nil && err != io.EOF { return err } - if !copyrightRegexp.Match(snip) { + if !copyrightRegexp.Match(snip) && !generatedRegexp.Match(snip) { // report missing header missing = true log.Printf("Copyright header missing in %q.\n", path) diff --git a/ddtrace/ddtrace.go b/ddtrace/ddtrace.go index 9fe41b1eb1..9aeafebe8d 100644 --- a/ddtrace/ddtrace.go +++ b/ddtrace/ddtrace.go @@ -13,7 +13,10 @@ // with by accessing the subdirectories of this package: https://godoc.org/gopkg.in/DataDog/dd-trace-go.v1/ddtrace#pkg-subdirectories. package ddtrace // import "gopkg.in/DataDog/dd-trace-go.v1/ddtrace" -import "time" +import ( + "context" + "time" +) // Tracer specifies an implementation of the Datadog tracer which allows starting // and propagating spans. The official implementation if exposed as functions @@ -22,6 +25,12 @@ type Tracer interface { // StartSpan starts a span with the given operation name and options. StartSpan(operationName string, opts ...StartSpanOption) Span + // StartSpanFromContext starts a span with the given operation name and + // options. If a span is found in the context, it will be used as the parent + // of the resulting span. If the ChildOf option is passed, the span from + // context will take precedence over it as the parent span. + StartSpanFromContext(ctx context.Context, operationName string, opts ...StartSpanOption) (Span, context.Context) + // Extract extracts a span context from a given carrier. Note that baggage item // keys will always be lower-cased to maintain consistency. It is impossible to // maintain the original casing due to MIME header canonicalization standards. diff --git a/ddtrace/internal/globaltracer.go b/ddtrace/internal/globaltracer.go index 239e976267..c086d0ee01 100644 --- a/ddtrace/internal/globaltracer.go +++ b/ddtrace/internal/globaltracer.go @@ -6,6 +6,7 @@ package internal // import "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal" import ( + "context" "sync" "gopkg.in/DataDog/dd-trace-go.v1/ddtrace" @@ -51,6 +52,11 @@ func (NoopTracer) StartSpan(operationName string, opts ...ddtrace.StartSpanOptio return NoopSpan{} } +// StartSpanFromContext implements ddtrace.Tracer. +func (NoopTracer) StartSpanFromContext(ctx context.Context, operationName string, options ...ddtrace.StartSpanOption) (ddtrace.Span, context.Context) { + return NoopSpan{}, context.Background() +} + // SetServiceInfo implements ddtrace.Tracer. func (NoopTracer) SetServiceInfo(name, app, appType string) {} diff --git a/ddtrace/mocktracer/mocktracer.go b/ddtrace/mocktracer/mocktracer.go index bcdb6953e9..8edda50181 100644 --- a/ddtrace/mocktracer/mocktracer.go +++ b/ddtrace/mocktracer/mocktracer.go @@ -13,6 +13,7 @@ package mocktracer import ( + "context" "strconv" "strings" "sync" @@ -73,17 +74,28 @@ func (*mocktracer) Stop() { } func (t *mocktracer) StartSpan(operationName string, opts ...ddtrace.StartSpanOption) ddtrace.Span { + span, _ := t.StartSpanFromContext(context.Background(), operationName, opts...) + return span +} + +func (t *mocktracer) StartSpanFromContext(ctx context.Context, operationName string, opts ...ddtrace.StartSpanOption) (ddtrace.Span, context.Context) { var cfg ddtrace.StartSpanConfig for _, fn := range opts { fn(&cfg) } + if ctx == nil { + ctx = context.Background() + } else if s, ok := tracer.SpanFromContext(ctx); ok { + // span in ctx overwrite ChildOf() parent if any + cfg.Parent = s.Context() + } span := newSpan(t, operationName, &cfg) t.Lock() t.openSpans[span.SpanID()] = span t.Unlock() - return span + return span, tracer.ContextWithSpan(ctx, span) } func (t *mocktracer) OpenSpans() []Span { diff --git a/ddtrace/tracer/context.go b/ddtrace/tracer/context.go index 0add06e99a..a8c0edfba5 100644 --- a/ddtrace/tracer/context.go +++ b/ddtrace/tracer/context.go @@ -39,13 +39,5 @@ func SpanFromContext(ctx context.Context) (Span, bool) { // is found in the context, it will be used as the parent of the resulting span. If the ChildOf // option is passed, the span from context will take precedence over it as the parent span. func StartSpanFromContext(ctx context.Context, operationName string, opts ...StartSpanOption) (Span, context.Context) { - if ctx == nil { - // default to context.Background() to avoid panics on Go >= 1.15 - ctx = context.Background() - } - if s, ok := SpanFromContext(ctx); ok { - opts = append(opts, ChildOf(s.Context())) - } - s := StartSpan(operationName, opts...) - return s, ContextWithSpan(ctx, s) + return internal.GetGlobalTracer().StartSpanFromContext(ctx, operationName, opts...) } diff --git a/ddtrace/tracer/option.go b/ddtrace/tracer/option.go index 93c490405b..f055679c9b 100644 --- a/ddtrace/tracer/option.go +++ b/ddtrace/tracer/option.go @@ -24,6 +24,7 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof" "gopkg.in/DataDog/dd-trace-go.v1/internal/version" "github.com/DataDog/datadog-go/v5/statsd" @@ -122,6 +123,12 @@ type config struct { // errors will record a stack trace when this option is set. noDebugStack bool + // profilerHotspots specifies whether profiler Code Hotspots is enabled. + profilerHotspots bool + + // profilerEndpoints specifies whether profiler endpoint filtering is enabled. + profilerEndpoints bool + // enabled reports whether tracing is enabled. enabled bool } @@ -213,6 +220,9 @@ func newConfig(opts ...StartOption) *config { c.runtimeMetrics = internal.BoolEnv("DD_RUNTIME_METRICS_ENABLED", false) c.debug = internal.BoolEnv("DD_TRACE_DEBUG", false) c.enabled = internal.BoolEnv("DD_TRACE_ENABLED", true) + // TODO(fg): set these to true before going GA with this. + c.profilerEndpoints = internal.BoolEnv(traceprof.EndpointEnvVar, false) + c.profilerHotspots = internal.BoolEnv(traceprof.CodeHotspotsEnvVar, false) for _, fn := range opts { fn(c) @@ -665,6 +675,30 @@ func WithLogStartup(enabled bool) StartOption { } } +// WithProfilerCodeHotspots enables the code hotspots integration between the +// tracer and profiler. This is done by automatically attaching pprof labels +// called "span id" and "local root span id" when new spans are created. You +// should not use these label names in your own code when this is enabled. The +// enabled value defaults to the value of the +// DD_PROFILING_CODE_HOTSPOTS_COLLECTION_ENABLED env variable or false. +func WithProfilerCodeHotspots(enabled bool) StartOption { + return func(c *config) { + c.profilerHotspots = enabled + } +} + +// WithProfilerEndpoints enables the endpoints integration between the tracer +// and profiler. This is done by automatically attaching a pprof label called +// "trace endpoint" holding the resource name of the top-level service span if +// its type is http or rpc. You should not use this label name in your own code +// when this is enabled. The enabled value defaults to the value of the +// DD_PROFILING_ENDPOINT_COLLECTION_ENABLED env variable or false. +func WithProfilerEndpoints(enabled bool) StartOption { + return func(c *config) { + c.profilerEndpoints = enabled + } +} + // StartSpanOption is a configuration option for StartSpan. It is aliased in order // to help godoc group all the functions returning it together. It is considered // more correct to refer to it as the type as the origin, ddtrace.StartSpanOption. diff --git a/ddtrace/tracer/option_test.go b/ddtrace/tracer/option_test.go index 592b683559..410b4e43ee 100644 --- a/ddtrace/tracer/option_test.go +++ b/ddtrace/tracer/option_test.go @@ -20,6 +20,7 @@ import ( "time" "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" + "gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -363,6 +364,34 @@ func TestTracerOptionsDefaults(t *testing.T) { assert.Equal(nil, dVal) }) + t.Run("profiler-endpoints", func(t *testing.T) { + t.Run("default", func(t *testing.T) { + c := newConfig() + assert.False(t, c.profilerEndpoints) + }) + + t.Run("override", func(t *testing.T) { + os.Setenv(traceprof.EndpointEnvVar, "true") + defer os.Unsetenv(traceprof.EndpointEnvVar) + c := newConfig() + assert.True(t, c.profilerEndpoints) + }) + }) + + t.Run("profiler-hotspots", func(t *testing.T) { + t.Run("default", func(t *testing.T) { + c := newConfig() + assert.False(t, c.profilerHotspots) + }) + + t.Run("override", func(t *testing.T) { + os.Setenv(traceprof.CodeHotspotsEnvVar, "true") + defer os.Unsetenv(traceprof.CodeHotspotsEnvVar) + c := newConfig() + assert.True(t, c.profilerHotspots) + }) + }) + t.Run("env-mapping", func(t *testing.T) { os.Setenv("DD_SERVICE_MAPPING", "tracer.test:test2, svc:Newsvc,http.router:myRouter, noval:") defer os.Unsetenv("DD_SERVICE_MAPPING") diff --git a/ddtrace/tracer/span.go b/ddtrace/tracer/span.go index c6d0162c53..630271989f 100644 --- a/ddtrace/tracer/span.go +++ b/ddtrace/tracer/span.go @@ -8,10 +8,12 @@ package tracer import ( + "context" "fmt" "os" "reflect" "runtime" + "runtime/pprof" "strconv" "strings" "sync" @@ -72,7 +74,11 @@ type span struct { noDebugStack bool `msg:"-"` // disables debug stack traces finished bool `msg:"-"` // true if the span has been submitted to a tracer. context *spanContext `msg:"-"` // span propagation context - taskEnd func() // ends execution tracer (runtime/trace) task, if started + + pprofCtxActive context.Context `msg:"-"` // contains pprof.WithLabel labels to tell the profiler more about this span + pprofCtxRestore context.Context `msg:"-"` // contains pprof.WithLabel labels of the parent span (if any) that need to be restored when this span finishes + + taskEnd func() // ends execution tracer (runtime/trace) task, if started } // Context yields the SpanContext for this Span. Note that the return @@ -320,6 +326,12 @@ func (s *span) Finish(opts ...ddtrace.FinishOption) { s.taskEnd() } s.finish(t) + + if s.pprofCtxRestore != nil { + // Restore the labels of the parent span so any CPU samples after this + // point are attributed correctly. + pprof.SetGoroutineLabels(s.pprofCtxRestore) + } } // SetOperationName sets or changes the operation name. diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index e64502d098..1f7e4cc737 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -6,8 +6,11 @@ package tracer import ( + "context" + gocontext "context" "fmt" "os" + "runtime/pprof" "strconv" "sync" "time" @@ -17,6 +20,7 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/appsec" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof" "gopkg.in/DataDog/dd-trace-go.v1/internal/version" "github.com/DataDog/datadog-agent/pkg/obfuscate" @@ -311,12 +315,24 @@ func (t *tracer) pushTrace(trace []*span) { } } -// StartSpan creates, starts, and returns a new Span with the given `operationName`. +// StartSpan implements ddtrace.Tracer. func (t *tracer) StartSpan(operationName string, options ...ddtrace.StartSpanOption) ddtrace.Span { + span, _ := t.StartSpanFromContext(gocontext.Background(), operationName, options...) + return span +} + +// StartSpanFromContext implements ddtrace.Tracer. +func (t *tracer) StartSpanFromContext(ctx gocontext.Context, operationName string, options ...ddtrace.StartSpanOption) (ddtrace.Span, gocontext.Context) { var opts ddtrace.StartSpanConfig for _, fn := range options { fn(&opts) } + if ctx == nil { + ctx = gocontext.Background() + } else if s, ok := SpanFromContext(ctx); ok { + // span in ctx overwrite ChildOf() parent if any + opts.Parent = s.Context() + } var startTime int64 if opts.StartTime.IsZero() { startTime = now() @@ -324,9 +340,17 @@ func (t *tracer) StartSpan(operationName string, options ...ddtrace.StartSpanOpt startTime = opts.StartTime.UnixNano() } var context *spanContext + pprofCtx := ctx if opts.Parent != nil { - if ctx, ok := opts.Parent.(*spanContext); ok { - context = ctx + if parentContext, ok := opts.Parent.(*spanContext); ok { + context = parentContext + if pprofCtx == gocontext.Background() && parentContext.span != nil && parentContext.span.pprofCtxActive != nil { + // Inherit the pprof labels from parent span if it was propagated using + // ChildOf() rather than StartSpanFromContext(). Having a separate ctx + // and pprofCtx is done to avoid subtle problems with callers relying + // on the details of the ContextWithSpan() wrapping below. + pprofCtx = parentContext.span.pprofCtxActive + } } } id := opts.SpanID @@ -400,13 +424,50 @@ func (t *tracer) StartSpan(operationName string, options ...ddtrace.StartSpanOpt // if not already sampled or a brand new trace, sample it t.sample(span) } + if t.config.profilerHotspots || t.config.profilerEndpoints { + ctx = t.applyPPROFLabels(pprofCtx, span) + } if t.config.serviceMappings != nil { if newSvc, ok := t.config.serviceMappings[span.Service]; ok { span.Service = newSvc } } log.Debug("Started Span: %v, Operation: %s, Resource: %s, Tags: %v, %v", span, span.Name, span.Resource, span.Meta, span.Metrics) - return span + return span, ContextWithSpan(ctx, span) +} + +// applyPPROFLabels applies pprof labels for the profiler's code hotspots and +// endpoint filtering feature to span. When span finishes, any pprof labels +// found in ctx are restored. +func (t *tracer) applyPPROFLabels(ctx gocontext.Context, span *span) context.Context { + var labels []string + if t.config.profilerHotspots { + labels = append(labels, traceprof.SpanID, strconv.FormatUint(span.SpanID, 10)) + } + // nil checks might not be needed, but better be safe than sorry + if span.context.trace != nil && span.context.trace.root != nil { + localRootSpan := span.context.trace.root + if t.config.profilerHotspots { + labels = append(labels, traceprof.LocalRootSpanID, strconv.FormatUint(localRootSpan.SpanID, 10)) + } + if t.config.profilerEndpoints && spanResourcePIISafe(localRootSpan) { + labels = append(labels, traceprof.TraceEndpoint, localRootSpan.Resource) + } + } + if len(labels) > 0 { + span.pprofCtxRestore = ctx + span.pprofCtxActive = pprof.WithLabels(ctx, pprof.Labels(labels...)) + pprof.SetGoroutineLabels(span.pprofCtxActive) + return span.pprofCtxActive + } + return ctx +} + +// spanResourcePIISafe returns true if s.Resource can be considered to not +// include PII with reasonable confidence. E.g. SQL queries may contain PII, +// but http or rpc endpoint names generally do not. +func spanResourcePIISafe(s *span) bool { + return s.Type == ext.SpanTypeWeb || s.Type == ext.AppTypeRPC } // Stop stops the tracer. diff --git a/internal/traceprof/testapp/test_app.go b/internal/traceprof/testapp/test_app.go new file mode 100644 index 0000000000..bc66d4f4e5 --- /dev/null +++ b/internal/traceprof/testapp/test_app.go @@ -0,0 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2021 Datadog, Inc. + +// Package testapp has the protbuf/grpc definitions for the test application +// implemented in traceproftest. +package testapp diff --git a/internal/traceprof/testapp/test_app.pb.go b/internal/traceprof/testapp/test_app.pb.go new file mode 100644 index 0000000000..06ba2cc07e --- /dev/null +++ b/internal/traceprof/testapp/test_app.pb.go @@ -0,0 +1,242 @@ +// +//Run this generate Go code after changing things below: +// +//protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative internal/traceprof/testapp/test_app.proto + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.27.1 +// protoc v3.6.1 +// source: internal/traceprof/testapp/test_app.proto + +package testapp + +import ( + reflect "reflect" + sync "sync" + + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type WorkReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + CpuDuration int64 `protobuf:"varint,1,opt,name=cpu_duration,json=cpuDuration,proto3" json:"cpu_duration,omitempty"` + SqlDuration int64 `protobuf:"varint,2,opt,name=sql_duration,json=sqlDuration,proto3" json:"sql_duration,omitempty"` +} + +func (x *WorkReq) Reset() { + *x = WorkReq{} + if protoimpl.UnsafeEnabled { + mi := &file_internal_traceprof_testapp_test_app_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *WorkReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*WorkReq) ProtoMessage() {} + +func (x *WorkReq) ProtoReflect() protoreflect.Message { + mi := &file_internal_traceprof_testapp_test_app_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use WorkReq.ProtoReflect.Descriptor instead. +func (*WorkReq) Descriptor() ([]byte, []int) { + return file_internal_traceprof_testapp_test_app_proto_rawDescGZIP(), []int{0} +} + +func (x *WorkReq) GetCpuDuration() int64 { + if x != nil { + return x.CpuDuration + } + return 0 +} + +func (x *WorkReq) GetSqlDuration() int64 { + if x != nil { + return x.SqlDuration + } + return 0 +} + +type WorkRes struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + SpanId string `protobuf:"bytes,1,opt,name=span_id,json=spanId,proto3" json:"span_id,omitempty"` + LocalRootSpanId string `protobuf:"bytes,2,opt,name=local_root_span_id,json=localRootSpanId,proto3" json:"local_root_span_id,omitempty"` +} + +func (x *WorkRes) Reset() { + *x = WorkRes{} + if protoimpl.UnsafeEnabled { + mi := &file_internal_traceprof_testapp_test_app_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *WorkRes) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*WorkRes) ProtoMessage() {} + +func (x *WorkRes) ProtoReflect() protoreflect.Message { + mi := &file_internal_traceprof_testapp_test_app_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use WorkRes.ProtoReflect.Descriptor instead. +func (*WorkRes) Descriptor() ([]byte, []int) { + return file_internal_traceprof_testapp_test_app_proto_rawDescGZIP(), []int{1} +} + +func (x *WorkRes) GetSpanId() string { + if x != nil { + return x.SpanId + } + return "" +} + +func (x *WorkRes) GetLocalRootSpanId() string { + if x != nil { + return x.LocalRootSpanId + } + return "" +} + +var File_internal_traceprof_testapp_test_app_proto protoreflect.FileDescriptor + +var file_internal_traceprof_testapp_test_app_proto_rawDesc = []byte{ + 0x0a, 0x29, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x74, 0x72, 0x61, 0x63, 0x65, + 0x70, 0x72, 0x6f, 0x66, 0x2f, 0x74, 0x65, 0x73, 0x74, 0x61, 0x70, 0x70, 0x2f, 0x74, 0x65, 0x73, + 0x74, 0x5f, 0x61, 0x70, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x07, 0x74, 0x65, 0x73, + 0x74, 0x61, 0x70, 0x70, 0x22, 0x4f, 0x0a, 0x07, 0x57, 0x6f, 0x72, 0x6b, 0x52, 0x65, 0x71, 0x12, + 0x21, 0x0a, 0x0c, 0x63, 0x70, 0x75, 0x5f, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x63, 0x70, 0x75, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x0c, 0x73, 0x71, 0x6c, 0x5f, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x73, 0x71, 0x6c, 0x44, 0x75, 0x72, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x4f, 0x0a, 0x07, 0x57, 0x6f, 0x72, 0x6b, 0x52, 0x65, 0x73, + 0x12, 0x17, 0x0a, 0x07, 0x73, 0x70, 0x61, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x06, 0x73, 0x70, 0x61, 0x6e, 0x49, 0x64, 0x12, 0x2b, 0x0a, 0x12, 0x6c, 0x6f, 0x63, + 0x61, 0x6c, 0x5f, 0x72, 0x6f, 0x6f, 0x74, 0x5f, 0x73, 0x70, 0x61, 0x6e, 0x5f, 0x69, 0x64, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x52, 0x6f, 0x6f, 0x74, + 0x53, 0x70, 0x61, 0x6e, 0x49, 0x64, 0x32, 0x37, 0x0a, 0x07, 0x54, 0x65, 0x73, 0x74, 0x41, 0x70, + 0x70, 0x12, 0x2c, 0x0a, 0x04, 0x57, 0x6f, 0x72, 0x6b, 0x12, 0x10, 0x2e, 0x74, 0x65, 0x73, 0x74, + 0x61, 0x70, 0x70, 0x2e, 0x57, 0x6f, 0x72, 0x6b, 0x52, 0x65, 0x71, 0x1a, 0x10, 0x2e, 0x74, 0x65, + 0x73, 0x74, 0x61, 0x70, 0x70, 0x2e, 0x57, 0x6f, 0x72, 0x6b, 0x52, 0x65, 0x73, 0x22, 0x00, 0x42, + 0x3c, 0x5a, 0x3a, 0x67, 0x6f, 0x70, 0x6b, 0x67, 0x2e, 0x69, 0x6e, 0x2f, 0x44, 0x61, 0x74, 0x61, + 0x44, 0x6f, 0x67, 0x2f, 0x64, 0x64, 0x2d, 0x74, 0x72, 0x61, 0x63, 0x65, 0x2d, 0x67, 0x6f, 0x2e, + 0x76, 0x31, 0x2f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x74, 0x72, 0x61, 0x63, + 0x65, 0x70, 0x72, 0x6f, 0x66, 0x2f, 0x74, 0x65, 0x73, 0x74, 0x61, 0x70, 0x70, 0x62, 0x06, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_internal_traceprof_testapp_test_app_proto_rawDescOnce sync.Once + file_internal_traceprof_testapp_test_app_proto_rawDescData = file_internal_traceprof_testapp_test_app_proto_rawDesc +) + +func file_internal_traceprof_testapp_test_app_proto_rawDescGZIP() []byte { + file_internal_traceprof_testapp_test_app_proto_rawDescOnce.Do(func() { + file_internal_traceprof_testapp_test_app_proto_rawDescData = protoimpl.X.CompressGZIP(file_internal_traceprof_testapp_test_app_proto_rawDescData) + }) + return file_internal_traceprof_testapp_test_app_proto_rawDescData +} + +var file_internal_traceprof_testapp_test_app_proto_msgTypes = make([]protoimpl.MessageInfo, 2) +var file_internal_traceprof_testapp_test_app_proto_goTypes = []interface{}{ + (*WorkReq)(nil), // 0: testapp.WorkReq + (*WorkRes)(nil), // 1: testapp.WorkRes +} +var file_internal_traceprof_testapp_test_app_proto_depIdxs = []int32{ + 0, // 0: testapp.TestApp.Work:input_type -> testapp.WorkReq + 1, // 1: testapp.TestApp.Work:output_type -> testapp.WorkRes + 1, // [1:2] is the sub-list for method output_type + 0, // [0:1] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_internal_traceprof_testapp_test_app_proto_init() } +func file_internal_traceprof_testapp_test_app_proto_init() { + if File_internal_traceprof_testapp_test_app_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_internal_traceprof_testapp_test_app_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*WorkReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_internal_traceprof_testapp_test_app_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*WorkRes); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_internal_traceprof_testapp_test_app_proto_rawDesc, + NumEnums: 0, + NumMessages: 2, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_internal_traceprof_testapp_test_app_proto_goTypes, + DependencyIndexes: file_internal_traceprof_testapp_test_app_proto_depIdxs, + MessageInfos: file_internal_traceprof_testapp_test_app_proto_msgTypes, + }.Build() + File_internal_traceprof_testapp_test_app_proto = out.File + file_internal_traceprof_testapp_test_app_proto_rawDesc = nil + file_internal_traceprof_testapp_test_app_proto_goTypes = nil + file_internal_traceprof_testapp_test_app_proto_depIdxs = nil +} diff --git a/internal/traceprof/testapp/test_app.proto b/internal/traceprof/testapp/test_app.proto new file mode 100644 index 0000000000..763136be32 --- /dev/null +++ b/internal/traceprof/testapp/test_app.proto @@ -0,0 +1,24 @@ +/* +Run this generate Go code after changing things below: + +protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative internal/traceprof/testapp/test_app.proto +*/ + +syntax = "proto3"; + +option go_package = "gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof/testapp"; +package testapp; + +service TestApp { + rpc Work(WorkReq) returns (WorkRes) {} +} + +message WorkReq { + int64 cpu_duration = 1; + int64 sql_duration = 2; +} + +message WorkRes { + string span_id = 1; + string local_root_span_id = 2; +} diff --git a/internal/traceprof/testapp/test_app_grpc.pb.go b/internal/traceprof/testapp/test_app_grpc.pb.go new file mode 100644 index 0000000000..5b0cdce070 --- /dev/null +++ b/internal/traceprof/testapp/test_app_grpc.pb.go @@ -0,0 +1,102 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. + +package testapp + +import ( + context "context" + + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +// TestAppClient is the client API for TestApp service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type TestAppClient interface { + Work(ctx context.Context, in *WorkReq, opts ...grpc.CallOption) (*WorkRes, error) +} + +type testAppClient struct { + cc grpc.ClientConnInterface +} + +func NewTestAppClient(cc grpc.ClientConnInterface) TestAppClient { + return &testAppClient{cc} +} + +func (c *testAppClient) Work(ctx context.Context, in *WorkReq, opts ...grpc.CallOption) (*WorkRes, error) { + out := new(WorkRes) + err := c.cc.Invoke(ctx, "/testapp.TestApp/Work", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// TestAppServer is the server API for TestApp service. +// All implementations must embed UnimplementedTestAppServer +// for forward compatibility +type TestAppServer interface { + Work(context.Context, *WorkReq) (*WorkRes, error) + mustEmbedUnimplementedTestAppServer() +} + +// UnimplementedTestAppServer must be embedded to have forward compatible implementations. +type UnimplementedTestAppServer struct { +} + +func (UnimplementedTestAppServer) Work(context.Context, *WorkReq) (*WorkRes, error) { + return nil, status.Errorf(codes.Unimplemented, "method Work not implemented") +} +func (UnimplementedTestAppServer) mustEmbedUnimplementedTestAppServer() {} + +// UnsafeTestAppServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to TestAppServer will +// result in compilation errors. +type UnsafeTestAppServer interface { + mustEmbedUnimplementedTestAppServer() +} + +func RegisterTestAppServer(s grpc.ServiceRegistrar, srv TestAppServer) { + s.RegisterService(&TestApp_ServiceDesc, srv) +} + +func _TestApp_Work_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(WorkReq) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(TestAppServer).Work(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/testapp.TestApp/Work", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(TestAppServer).Work(ctx, req.(*WorkReq)) + } + return interceptor(ctx, in, info, handler) +} + +// TestApp_ServiceDesc is the grpc.ServiceDesc for TestApp service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var TestApp_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "testapp.TestApp", + HandlerType: (*TestAppServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "Work", + Handler: _TestApp_Work_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "internal/traceprof/testapp/test_app.proto", +} diff --git a/internal/traceprof/traceprof.go b/internal/traceprof/traceprof.go new file mode 100644 index 0000000000..d2a7ec087f --- /dev/null +++ b/internal/traceprof/traceprof.go @@ -0,0 +1,20 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2021 Datadog, Inc. + +// Package traceprof contains shared logic for cross-cutting tracer/profiler features. +package traceprof + +// pprof labels applied by the tracer to show up in the profiler's profiles. +const ( + SpanID = "span id" + LocalRootSpanID = "local root span id" + TraceEndpoint = "trace endpoint" +) + +// env variables used to control cross-cutting tracer/profiling features. +const ( + EndpointEnvVar = "DD_PROFILING_ENDPOINT_COLLECTION_ENABLED" + CodeHotspotsEnvVar = "DD_PROFILING_CODE_HOTSPOTS_COLLECTION_ENABLED" +) diff --git a/internal/traceprof/traceproftest/app.go b/internal/traceprof/traceproftest/app.go new file mode 100644 index 0000000000..02e698c11e --- /dev/null +++ b/internal/traceprof/traceproftest/app.go @@ -0,0 +1,285 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2021 Datadog, Inc. + +package traceproftest + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io/ioutil" + "net" + "net/http" + "net/http/httptest" + "runtime/pprof" + "testing" + "time" + + grpctrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/google.golang.org/grpc" + httptrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/julienschmidt/httprouter" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer" + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + pb "gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof/testapp" + + "github.com/julienschmidt/httprouter" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" +) + +const ( + // HTTPWorkEndpointMethod is the http method used for the demo app. + HTTPWorkEndpointMethod = "POST" + // HTTPWorkEndpoint is the http endpoint used for the demo app. + HTTPWorkEndpoint = "/work/:secret" + // GRPCWorkEndpoint is the grpc endpoint used for the demo app. + GRPCWorkEndpoint = "/testapp.TestApp/Work" +) + +// CustomLabels are the user-defined pprof labels to apply in the work endpoint +// to simulate user label interacting with our own labels. +var CustomLabels = map[string]string{"user label": "user val"} + +// AppConfig defines the behavior and profiling options for the demo app. +type AppConfig struct { + // Endpoints is passed to tracer.WithProfilerEndpoints() + Endpoints bool + // CodeHotspots is passed to tracer.WithProfilerCodeHotspots() + CodeHotspots bool + // ChildOf uses tracer.ChildOf() to declare the parent of cpuSpan instead of + // tracer.StartSpanFromContext(). + ChildOf bool + // AppType is the type of the test app that is being simulated. + AppType testAppType +} + +type testAppType string + +const ( + // Direct directly executes requests logic without any transport overhead. + Direct testAppType = "direct" + // GRPC executes requests via GRPC. + GRPC testAppType = "grpc" + // HTTP executes requests via HTTP. + HTTP testAppType = "http" +) + +// Endpoint returns the "trace endpoint" label value for this app type. +func (a testAppType) Endpoint() string { + switch a { + case Direct: + return "" + case GRPC: + return GRPCWorkEndpoint + case HTTP: + return HTTPWorkEndpointMethod + " " + HTTPWorkEndpoint + default: + panic("unreachable") + } +} + +// Start starts the demo app, including tracer and profiler. +func (c AppConfig) Start(t testing.TB) *App { + a := &App{config: c} + a.start(t) + return a +} + +// App is an instance of the demo app. +type App struct { + httpServer *httptest.Server + grpcServer *grpc.Server + grpcClientConn *grpc.ClientConn + CPUProfiler + prof *CPUProfile + config AppConfig + stopped bool + + pb.UnimplementedTestAppServer +} + +func (a *App) start(t testing.TB) { + tracer.Start( + tracer.WithLogger(log.DiscardLogger{}), + tracer.WithProfilerCodeHotspots(a.config.CodeHotspots), + tracer.WithProfilerEndpoints(a.config.Endpoints), + ) + + switch a.config.AppType { + case Direct: + // nothing to setup + case GRPC: + l, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + si := grpctrace.StreamServerInterceptor(grpctrace.WithServiceName("my-grpc-client")) + ui := grpctrace.UnaryServerInterceptor(grpctrace.WithServiceName("my-grpc-client")) + a.grpcServer = grpc.NewServer(grpc.StreamInterceptor(si), grpc.UnaryInterceptor(ui)) + pb.RegisterTestAppServer(a.grpcServer, a) + go a.grpcServer.Serve(l) + a.grpcClientConn, err = grpc.Dial(l.Addr().String(), grpc.WithInsecure()) + require.NoError(t, err) + case HTTP: + router := httptrace.New() + // We use a routing pattern here so our test can validate that potential + // Personal Identifiable Information (PII) values, in this case :secret, + // isn't beeing collected in the "trace endpoint" label. + router.Handle("POST", HTTPWorkEndpoint, a.workHandler) + a.httpServer = httptest.NewServer(router) + default: + panic("unreachable") + } + a.CPUProfiler.start(t) +} + +// Stop stops the app, tracer and cpu profiler in an idempotent fashion. +func (a *App) Stop(t testing.TB) { + if a.stopped { + return + } + a.prof = a.CPUProfiler.Stop(t) + tracer.Stop() + switch a.config.AppType { + case Direct: + // nothing to tear down + case GRPC: + a.grpcServer.GracefulStop() + a.grpcClientConn.Close() + case HTTP: + a.httpServer.Close() + default: + panic("unreachable") + } + a.stopped = true +} + +// WorkRequest sends the given req to the demo app and returns the response. +// The config.AppType determines how the request is made. +func (a *App) WorkRequest(t testing.TB, req *pb.WorkReq) *pb.WorkRes { + switch a.config.AppType { + case Direct: + res, err := a.Work(context.Background(), req) + require.NoError(t, err) + return res + case GRPC: + client := pb.NewTestAppClient(a.grpcClientConn) + res, err := client.Work(context.Background(), req) + require.NoError(t, err) + return res + case HTTP: + body, err := json.Marshal(req) + require.NoError(t, err) + url := a.httpServer.URL + "/work/secret-pii" + res, err := http.Post(url, "text/plain", bytes.NewReader(body)) + require.NoError(t, err) + + defer res.Body.Close() + workRes := &pb.WorkRes{} + require.NoError(t, json.NewDecoder(res.Body).Decode(&workRes)) + return workRes + default: + panic("unreachable") + } +} + +// CPUProfile stops the app and returns its CPU profile. +func (a *App) CPUProfile(t testing.TB) *CPUProfile { + a.Stop(t) + return a.prof +} + +func (a *App) workHandler(w http.ResponseWriter, r *http.Request, p httprouter.Params) { + req := &pb.WorkReq{} + if err := json.NewDecoder(r.Body).Decode(req); err != nil { + http.Error(w, "bad body", http.StatusBadRequest) + } + res, err := a.Work(r.Context(), req) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + json.NewEncoder(w).Encode(res) +} + +// Work implements the request handler for the demo app. It's reused regardless +// of the config.AppType. +func (a *App) Work(ctx context.Context, req *pb.WorkReq) (*pb.WorkRes, error) { + // Simulate user-defined custom labels to make sure we don't overwrite them + // when we apply our own labels. + ctx = pprof.WithLabels(ctx, toLabelSet(CustomLabels)) + pprof.SetGoroutineLabels(ctx) + + localRootSpan, ok := tracer.SpanFromContext(ctx) + // We run our handler in a reqSpan so we can test that we still include the + // correct "local root span id" in the profiler labels. + reqSpan, reqSpanCtx := tracer.StartSpanFromContext(ctx, "workHandler") + defer reqSpan.Finish() + if !ok { + // when app type is Direct, reqSpan is our local root span + localRootSpan = reqSpan + } + + // fakeSQLQuery pretends to execute an APM instrumented SQL query. This tests + // that the parent goroutine labels are correctly restored when it finishes. + fakeSQLQuery(reqSpanCtx, "SELECT * FROM foo", time.Duration(req.SqlDuration)) + + // cpuSpan is the one that should show up in our profile while cpuHog is + // running. decoySpan is a child span that finishes before the cpuHog work + // begins to test that span's restore their parent span labels when + // finishing. + var cpuSpan, decoySpan ddtrace.Span + if a.config.ChildOf { + cpuSpan = tracer.StartSpan("cpuHog", tracer.ChildOf(reqSpan.Context())) + decoySpan = tracer.StartSpan("decoy", tracer.ChildOf(cpuSpan.Context())) + } else { + var ctx context.Context + cpuSpan, ctx = tracer.StartSpanFromContext(reqSpanCtx, "cpuHog") + decoySpan, _ = tracer.StartSpanFromContext(ctx, "decoy") + } + decoySpan.Finish() + + // Perform CPU intense work on another goroutine. This should still be + // tracked to the childSpan thanks to goroutines inheriting labels. + stop := make(chan struct{}) + go cpuHogUntil(stop) + if req.CpuDuration > 0 { + time.Sleep(time.Duration(req.CpuDuration)) + } + close(stop) + cpuSpan.Finish() + + return &pb.WorkRes{ + LocalRootSpanId: fmt.Sprintf("%d", localRootSpan.Context().SpanID()), + SpanId: fmt.Sprintf("%d", cpuSpan.Context().SpanID()), + }, nil +} + +func toLabelSet(m map[string]string) pprof.LabelSet { + var args []string + for k, v := range m { + args = append(args, k, v) + } + return pprof.Labels(args...) +} + +func fakeSQLQuery(ctx context.Context, sql string, d time.Duration) { + span, _ := tracer.StartSpanFromContext(ctx, "pgx.query") + defer span.Finish() + span.SetTag(ext.ResourceName, sql) + time.Sleep(d) +} + +func cpuHogUntil(stop chan struct{}) { + for i := 0; ; i++ { + select { + case <-stop: + return + default: + // burn cpu + fmt.Fprintf(ioutil.Discard, "%d", i) + } + } +} diff --git a/internal/traceprof/traceproftest/baseline.txt b/internal/traceprof/traceproftest/baseline.txt new file mode 100644 index 0000000000..5e0f2ba727 --- /dev/null +++ b/internal/traceprof/traceproftest/baseline.txt @@ -0,0 +1,51 @@ +goos: linux +goarch: amd64 +pkg: gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof/traceproftest +cpu: Intel(R) Xeon(R) Platinum 8275CL CPU @ 3.00GHz +BenchmarkEndpointsAndHotspots/direct/hello-world-4 4921081 15785 ns/op 15211 cpu-ns/op 0.01048 pprof-B/op 0.0008820 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/hello-world-4 4892020 15767 ns/op 15132 cpu-ns/op 0.01063 pprof-B/op 0.0008847 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/hello-world-4 4952154 15927 ns/op 15323 cpu-ns/op 0.01060 pprof-B/op 0.0008896 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/hello-world-4 4965084 15699 ns/op 15149 cpu-ns/op 0.01059 pprof-B/op 0.0008939 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/hello-world-4 4904989 15752 ns/op 15159 cpu-ns/op 0.01072 pprof-B/op 0.0008873 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/cpu-bound-4 668 105784413 ns/op 95501038 cpu-ns/op 39.92 pprof-B/op 7.385 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/cpu-bound-4 669 105511102 ns/op 95269102 cpu-ns/op 40.84 pprof-B/op 7.488 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/cpu-bound-4 668 105763000 ns/op 95486729 cpu-ns/op 40.04 pprof-B/op 7.475 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/cpu-bound-4 670 105647129 ns/op 95398376 cpu-ns/op 40.30 pprof-B/op 7.469 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/cpu-bound-4 669 105676301 ns/op 95442234 cpu-ns/op 40.31 pprof-B/op 7.528 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/io-bound-4 660 108170816 ns/op 17895706 cpu-ns/op 12.30 pprof-B/op 1.411 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/io-bound-4 663 108555643 ns/op 18282702 cpu-ns/op 12.25 pprof-B/op 1.410 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/io-bound-4 662 108640373 ns/op 18400929 cpu-ns/op 12.50 pprof-B/op 1.424 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/io-bound-4 662 108709161 ns/op 18387981 cpu-ns/op 12.40 pprof-B/op 1.439 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/io-bound-4 663 108537476 ns/op 18182355 cpu-ns/op 11.98 pprof-B/op 1.419 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/hello-world-4 802096 90957 ns/op 89848 cpu-ns/op 0.09243 pprof-B/op 0.005170 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/hello-world-4 816661 91077 ns/op 89966 cpu-ns/op 0.09163 pprof-B/op 0.005147 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/hello-world-4 805726 91305 ns/op 90170 cpu-ns/op 0.09191 pprof-B/op 0.005182 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/hello-world-4 804795 91338 ns/op 90216 cpu-ns/op 0.09206 pprof-B/op 0.005130 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/hello-world-4 810649 90982 ns/op 89901 cpu-ns/op 0.09210 pprof-B/op 0.005101 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/cpu-bound-4 662 106231692 ns/op 95926358 cpu-ns/op 43.62 pprof-B/op 7.565 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/cpu-bound-4 662 106496325 ns/op 96186578 cpu-ns/op 43.37 pprof-B/op 7.548 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/cpu-bound-4 666 106091798 ns/op 95819793 cpu-ns/op 43.04 pprof-B/op 7.466 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/cpu-bound-4 672 106021833 ns/op 95720101 cpu-ns/op 42.70 pprof-B/op 7.506 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/cpu-bound-4 666 106408102 ns/op 96104234 cpu-ns/op 43.23 pprof-B/op 7.538 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/io-bound-4 666 108952881 ns/op 18584796 cpu-ns/op 15.14 pprof-B/op 1.398 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/io-bound-4 663 108989035 ns/op 18578857 cpu-ns/op 14.84 pprof-B/op 1.466 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/io-bound-4 666 108930125 ns/op 18509011 cpu-ns/op 14.89 pprof-B/op 1.429 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/io-bound-4 662 109004308 ns/op 18616398 cpu-ns/op 13.92 pprof-B/op 1.424 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/io-bound-4 664 108848134 ns/op 18456902 cpu-ns/op 14.25 pprof-B/op 1.415 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/hello-world-4 579132 124004 ns/op 93700 cpu-ns/op 0.08870 pprof-B/op 0.004400 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/hello-world-4 576729 123850 ns/op 93492 cpu-ns/op 0.08960 pprof-B/op 0.004363 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/hello-world-4 575628 124019 ns/op 93802 cpu-ns/op 0.08961 pprof-B/op 0.004413 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/hello-world-4 566127 124432 ns/op 93980 cpu-ns/op 0.08884 pprof-B/op 0.004376 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/hello-world-4 580369 124002 ns/op 93632 cpu-ns/op 0.08970 pprof-B/op 0.004423 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/cpu-bound-4 663 105846471 ns/op 95631702 cpu-ns/op 42.92 pprof-B/op 7.497 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/cpu-bound-4 673 106236784 ns/op 95994293 cpu-ns/op 42.65 pprof-B/op 7.607 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/cpu-bound-4 662 105743029 ns/op 95509385 cpu-ns/op 42.31 pprof-B/op 7.532 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/cpu-bound-4 668 105871782 ns/op 95643201 cpu-ns/op 42.28 pprof-B/op 7.528 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/cpu-bound-4 664 106567770 ns/op 96303681 cpu-ns/op 42.53 pprof-B/op 7.528 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/io-bound-4 661 109036804 ns/op 18570105 cpu-ns/op 14.94 pprof-B/op 1.446 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/io-bound-4 662 108958663 ns/op 18495461 cpu-ns/op 14.66 pprof-B/op 1.472 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/io-bound-4 660 109053169 ns/op 18575706 cpu-ns/op 14.18 pprof-B/op 1.436 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/io-bound-4 656 108724289 ns/op 18297399 cpu-ns/op 13.97 pprof-B/op 1.427 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/io-bound-4 660 108614401 ns/op 18179128 cpu-ns/op 15.08 pprof-B/op 1.428 pprof-samples/op +PASS +ok gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof/traceproftest 3690.445s diff --git a/internal/traceprof/traceproftest/bench.bash b/internal/traceprof/traceproftest/bench.bash new file mode 100755 index 0000000000..e7473872e6 --- /dev/null +++ b/internal/traceprof/traceproftest/bench.bash @@ -0,0 +1,18 @@ +set -eu +echo "-> Starting benchmark (ETA: 90min)" +BENCH_CMD="go test -benchtime 60s -count 5 -timeout 24h -run ^$ -bench . ." + +if ! which benchstat; then + echo "error: needs benchstat, install via:" + echo "go install golang.org/x/perf/cmd/benchstat@latest" + exit 1 +fi + +$BENCH_CMD | tee baseline.txt + +env \ + BENCH_HOTSPOTS=true \ + BENCH_ENDPOINTS=true \ + $BENCH_CMD | tee endpoints-and-hotspots.txt + +benchstat -sort delta baseline.txt endpoints-and-hotspots.txt | tee overhead.txt diff --git a/internal/traceprof/traceproftest/cpu_profiler.go b/internal/traceprof/traceproftest/cpu_profiler.go new file mode 100644 index 0000000000..148de661c5 --- /dev/null +++ b/internal/traceprof/traceproftest/cpu_profiler.go @@ -0,0 +1,135 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2021 Datadog, Inc. + +package traceproftest + +import ( + "bytes" + "os" + "runtime/pprof" + "strings" + "testing" + "time" + + "github.com/google/pprof/profile" + "github.com/stretchr/testify/require" +) + +// StartCPUProfile starts a new CPU profile. +func StartCPUProfile(t testing.TB) *CPUProfiler { + cp := &CPUProfiler{} + cp.start(t) + return cp +} + +// CPUProfiler is a simplified implementation of the CPU profiler found in pkg +// profiler that retains essential performance characteristics but is more +// convenient for testing. +// +// TODO(fg) Would be nice to figure out a clean way to use the actual profiler +// pkg for this in the future. +type CPUProfiler struct { + buf bytes.Buffer + prof *CPUProfile + stopped bool +} + +// Stop stops the CPU profiler and returns the CPU profile. +func (c *CPUProfiler) Stop(t testing.TB) *CPUProfile { + if c.stopped { + return c.prof + } + c.stopped = true + pprof.StopCPUProfile() + var err error + c.prof, err = NewCPUProfile(c.buf.Bytes()) + require.NoError(t, err) + return c.prof +} + +// start starts the cpu profiler. +func (c *CPUProfiler) start(t testing.TB) { + require.NoError(t, pprof.StartCPUProfile(&c.buf)) +} + +// NewCPUProfile returns a new CPU profile for the given data. +func NewCPUProfile(data []byte) (*CPUProfile, error) { + cp := &CPUProfile{data: data} + prof, err := profile.ParseData(data) + cp.prof = prof + return cp, err +} + +// CPUProfile is a test utility to extract data from a CPU profile for testing. +type CPUProfile struct { + data []byte + prof *profile.Profile +} + +// Duration returns the total amont of CPU time in this profile. +func (c *CPUProfile) Duration() (d time.Duration) { + return c.LabelsDuration(nil) +} + +// LabelDuration returns the CPU time for the given pprof label in this +// profile. The special val "*" can be used to match any label value. +func (c *CPUProfile) LabelDuration(label, val string) (d time.Duration) { + return c.LabelsDuration(map[string]string{label: val}) +} + +// LabelsDuration returns the CPU time for the given pprof labels in this +// profile. The special val "*" can be used to match any label value. +func (c *CPUProfile) LabelsDuration(labels map[string]string) (d time.Duration) { +sampleloop: + for _, s := range c.prof.Sample { + for k, v := range labels { + if vals := s.Label[k]; len(vals) != 1 || (vals[0] != v && v != "*") { + continue sampleloop + } + } + d += time.Duration(s.Value[1]) + } + return d +} + +// Samples returns the number of samples in the CPU profile. +func (c *CPUProfile) Samples() int { + return len(c.prof.Sample) +} + +// Size returns the size of the pprof encoded CPU profile in bytes. +func (c *CPUProfile) Size() int { + return len(c.data) +} + +// Labels returns the number of samples per individual label in this profile. +func (c *CPUProfile) Labels() map[Label]int { + m := map[Label]int{} + for _, s := range c.prof.Sample { + for k, v := range s.Label { + val := strings.Join(v, ",") + lbl := Label{Key: k, Val: val} + m[lbl]++ + } + } + return m +} + +// WriteFile writes the profile to the given path. +func (c *CPUProfile) WriteFile(path string) error { + file, err := os.Create(path) + if err != nil { + return err + } + defer file.Close() + return c.prof.Write(file) +} + +// Label represents a simplified pprof label where the value is a +// comma-separated string rather than a []string. +type Label struct { + Key string + Val string +} diff --git a/internal/traceprof/traceproftest/endpoints-and-hotspots.txt b/internal/traceprof/traceproftest/endpoints-and-hotspots.txt new file mode 100644 index 0000000000..3eaf97a323 --- /dev/null +++ b/internal/traceprof/traceproftest/endpoints-and-hotspots.txt @@ -0,0 +1,51 @@ +goos: linux +goarch: amd64 +pkg: gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof/traceproftest +cpu: Intel(R) Xeon(R) Platinum 8275CL CPU @ 3.00GHz +BenchmarkEndpointsAndHotspots/direct/hello-world-4 3739210 20677 ns/op 20019 cpu-ns/op 0.03021 pprof-B/op 0.001218 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/hello-world-4 3623416 20979 ns/op 20130 cpu-ns/op 0.02966 pprof-B/op 0.001207 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/hello-world-4 3631983 20825 ns/op 19961 cpu-ns/op 0.02999 pprof-B/op 0.001205 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/hello-world-4 3636554 20747 ns/op 20001 cpu-ns/op 0.02942 pprof-B/op 0.001191 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/hello-world-4 3694231 20601 ns/op 19881 cpu-ns/op 0.02950 pprof-B/op 0.001188 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/cpu-bound-4 669 106220497 ns/op 95934055 cpu-ns/op 90.13 pprof-B/op 7.404 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/cpu-bound-4 670 105507300 ns/op 95270978 cpu-ns/op 89.46 pprof-B/op 7.352 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/cpu-bound-4 667 105778813 ns/op 95539941 cpu-ns/op 90.05 pprof-B/op 7.456 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/cpu-bound-4 666 105961107 ns/op 95729228 cpu-ns/op 90.64 pprof-B/op 7.434 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/cpu-bound-4 670 105634368 ns/op 95398926 cpu-ns/op 90.68 pprof-B/op 7.453 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/io-bound-4 664 108494230 ns/op 18235434 cpu-ns/op 36.85 pprof-B/op 1.407 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/io-bound-4 663 108659659 ns/op 18359072 cpu-ns/op 37.25 pprof-B/op 1.406 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/io-bound-4 668 108443400 ns/op 18182275 cpu-ns/op 37.64 pprof-B/op 1.409 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/io-bound-4 667 108651265 ns/op 18411639 cpu-ns/op 37.05 pprof-B/op 1.424 pprof-samples/op +BenchmarkEndpointsAndHotspots/direct/io-bound-4 667 108755768 ns/op 18465596 cpu-ns/op 36.66 pprof-B/op 1.428 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/hello-world-4 748431 96830 ns/op 95688 cpu-ns/op 0.1325 pprof-B/op 0.004800 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/hello-world-4 745563 96831 ns/op 95628 cpu-ns/op 0.1321 pprof-B/op 0.004741 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/hello-world-4 754944 96760 ns/op 95613 cpu-ns/op 0.1308 pprof-B/op 0.004718 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/hello-world-4 756376 96990 ns/op 95782 cpu-ns/op 0.1304 pprof-B/op 0.004737 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/hello-world-4 749299 96551 ns/op 95391 cpu-ns/op 0.1297 pprof-B/op 0.004694 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/cpu-bound-4 663 105973269 ns/op 95700064 cpu-ns/op 102.4 pprof-B/op 7.402 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/cpu-bound-4 664 106425759 ns/op 96124757 cpu-ns/op 102.1 pprof-B/op 7.412 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/cpu-bound-4 666 106072086 ns/op 95784385 cpu-ns/op 103.3 pprof-B/op 7.487 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/cpu-bound-4 667 106539706 ns/op 96258328 cpu-ns/op 102.5 pprof-B/op 7.460 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/cpu-bound-4 670 106118464 ns/op 95860915 cpu-ns/op 101.9 pprof-B/op 7.390 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/io-bound-4 666 109171557 ns/op 18810566 cpu-ns/op 42.36 pprof-B/op 1.447 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/io-bound-4 666 108967007 ns/op 18633837 cpu-ns/op 42.86 pprof-B/op 1.409 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/io-bound-4 660 109051839 ns/op 18663982 cpu-ns/op 41.78 pprof-B/op 1.416 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/io-bound-4 662 109222717 ns/op 18859824 cpu-ns/op 43.55 pprof-B/op 1.442 pprof-samples/op +BenchmarkEndpointsAndHotspots/http/io-bound-4 667 108748793 ns/op 18357311 cpu-ns/op 40.40 pprof-B/op 1.393 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/hello-world-4 548379 132653 ns/op 106989 cpu-ns/op 0.1349 pprof-B/op 0.005111 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/hello-world-4 549154 132422 ns/op 106848 cpu-ns/op 0.1374 pprof-B/op 0.005132 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/hello-world-4 546322 132898 ns/op 107045 cpu-ns/op 0.1343 pprof-B/op 0.005120 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/hello-world-4 546170 132744 ns/op 107097 cpu-ns/op 0.1358 pprof-B/op 0.005166 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/hello-world-4 538756 132750 ns/op 107020 cpu-ns/op 0.1351 pprof-B/op 0.005114 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/cpu-bound-4 663 106066994 ns/op 95838461 cpu-ns/op 101.6 pprof-B/op 7.452 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/cpu-bound-4 664 105968183 ns/op 95735758 cpu-ns/op 103.0 pprof-B/op 7.409 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/cpu-bound-4 664 105896722 ns/op 95664111 cpu-ns/op 101.3 pprof-B/op 7.362 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/cpu-bound-4 664 105989749 ns/op 95751738 cpu-ns/op 101.4 pprof-B/op 7.466 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/cpu-bound-4 667 105812325 ns/op 95554936 cpu-ns/op 101.3 pprof-B/op 7.440 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/io-bound-4 662 109102443 ns/op 18646395 cpu-ns/op 42.54 pprof-B/op 1.448 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/io-bound-4 662 109286255 ns/op 18855193 cpu-ns/op 41.62 pprof-B/op 1.447 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/io-bound-4 662 109202894 ns/op 18748116 cpu-ns/op 42.46 pprof-B/op 1.466 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/io-bound-4 662 109110671 ns/op 18681041 cpu-ns/op 41.90 pprof-B/op 1.435 pprof-samples/op +BenchmarkEndpointsAndHotspots/grpc/io-bound-4 658 108596149 ns/op 18191310 cpu-ns/op 41.77 pprof-B/op 1.416 pprof-samples/op +PASS +ok gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof/traceproftest 3712.759s diff --git a/internal/traceprof/traceproftest/overhead.txt b/internal/traceprof/traceproftest/overhead.txt new file mode 100644 index 0000000000..7c68d216f5 --- /dev/null +++ b/internal/traceprof/traceproftest/overhead.txt @@ -0,0 +1,43 @@ +name old time/op new time/op delta +EndpointsAndHotspots/direct/hello-world-4 15.8µs ± 1% 20.8µs ± 1% +31.55% (p=0.008 n=5+5) +EndpointsAndHotspots/grpc/hello-world-4 124µs ± 0% 133µs ± 0% +6.96% (p=0.008 n=5+5) +EndpointsAndHotspots/http/hello-world-4 91.1µs ± 0% 96.8µs ± 0% +6.21% (p=0.008 n=5+5) +EndpointsAndHotspots/direct/cpu-bound-4 106ms ± 0% 106ms ± 0% ~ (p=0.690 n=5+5) +EndpointsAndHotspots/direct/io-bound-4 109ms ± 0% 109ms ± 0% ~ (p=0.690 n=5+5) +EndpointsAndHotspots/http/cpu-bound-4 106ms ± 0% 106ms ± 0% ~ (p=1.000 n=5+5) +EndpointsAndHotspots/http/io-bound-4 109ms ± 0% 109ms ± 0% ~ (p=0.310 n=5+5) +EndpointsAndHotspots/grpc/cpu-bound-4 106ms ± 0% 106ms ± 0% ~ (p=1.000 n=5+5) +EndpointsAndHotspots/grpc/io-bound-4 109ms ± 0% 109ms ± 0% ~ (p=0.151 n=5+5) + +name old cpu-time/op new cpu-time/op delta +EndpointsAndHotspots/direct/hello-world-4 15.2µs ± 1% 20.0µs ± 1% +31.61% (p=0.008 n=5+5) +EndpointsAndHotspots/grpc/hello-world-4 93.7µs ± 0% 107.0µs ± 0% +14.17% (p=0.008 n=5+5) +EndpointsAndHotspots/http/hello-world-4 90.0µs ± 0% 95.6µs ± 0% +6.22% (p=0.008 n=5+5) +EndpointsAndHotspots/direct/cpu-bound-4 95.4ms ± 0% 95.6ms ± 0% ~ (p=0.310 n=5+5) +EndpointsAndHotspots/direct/io-bound-4 18.2ms ± 2% 18.3ms ± 1% ~ (p=0.548 n=5+5) +EndpointsAndHotspots/http/cpu-bound-4 96.0ms ± 0% 95.9ms ± 0% ~ (p=1.000 n=5+5) +EndpointsAndHotspots/http/io-bound-4 18.5ms ± 0% 18.7ms ± 2% ~ (p=0.151 n=5+5) +EndpointsAndHotspots/grpc/cpu-bound-4 95.8ms ± 1% 95.7ms ± 0% ~ (p=1.000 n=5+5) +EndpointsAndHotspots/grpc/io-bound-4 18.4ms ± 1% 18.6ms ± 2% ~ (p=0.095 n=5+5) + +name old pprof-alloc/op new pprof-alloc/op delta +EndpointsAndHotspots/direct/io-bound-4 12.3B ± 2% 37.1B ± 1% +201.89% (p=0.008 n=5+5) +EndpointsAndHotspots/http/io-bound-4 14.6B ± 5% 42.2B ± 4% +188.81% (p=0.008 n=5+5) +EndpointsAndHotspots/grpc/io-bound-4 14.6B ± 4% 42.1B ± 1% +188.74% (p=0.008 n=5+5) +EndpointsAndHotspots/direct/hello-world-4 0.01B ± 1% 0.03B ± 2% +180.61% (p=0.008 n=5+5) +EndpointsAndHotspots/grpc/cpu-bound-4 42.5B ± 1% 101.7B ± 1% +139.13% (p=0.008 n=5+5) +EndpointsAndHotspots/http/cpu-bound-4 43.2B ± 1% 102.4B ± 1% +137.17% (p=0.008 n=5+5) +EndpointsAndHotspots/direct/cpu-bound-4 40.3B ± 1% 90.2B ± 1% +123.90% (p=0.008 n=5+5) +EndpointsAndHotspots/grpc/hello-world-4 0.09B ± 1% 0.14B ± 1% +51.75% (p=0.008 n=5+5) +EndpointsAndHotspots/http/hello-world-4 0.09B ± 0% 0.13B ± 1% +42.46% (p=0.008 n=5+5) + +name old pprof-samples/op new pprof-samples/op delta +EndpointsAndHotspots/direct/hello-world-4 0.00 ± 1% 0.00 ± 1% +35.41% (p=0.008 n=5+5) +EndpointsAndHotspots/grpc/hello-world-4 0.00 ± 1% 0.01 ± 1% +16.69% (p=0.008 n=5+5) +EndpointsAndHotspots/direct/cpu-bound-4 7.47 ± 1% 7.42 ± 1% ~ (p=0.095 n=5+5) +EndpointsAndHotspots/direct/io-bound-4 1.42 ± 1% 1.41 ± 1% ~ (p=0.333 n=5+5) +EndpointsAndHotspots/http/io-bound-4 1.43 ± 3% 1.42 ± 2% ~ (p=0.841 n=5+5) +EndpointsAndHotspots/grpc/io-bound-4 1.44 ± 2% 1.44 ± 2% ~ (p=0.841 n=5+5) +EndpointsAndHotspots/http/cpu-bound-4 7.52 ± 1% 7.43 ± 1% -1.25% (p=0.016 n=5+5) +EndpointsAndHotspots/grpc/cpu-bound-4 7.54 ± 1% 7.43 ± 1% -1.49% (p=0.008 n=5+5) +EndpointsAndHotspots/http/hello-world-4 0.01 ± 1% 0.00 ± 1% -7.93% (p=0.008 n=5+5) diff --git a/internal/traceprof/traceproftest/traceprof_test.go b/internal/traceprof/traceproftest/traceprof_test.go new file mode 100644 index 0000000000..6f57b17f3f --- /dev/null +++ b/internal/traceprof/traceproftest/traceprof_test.go @@ -0,0 +1,228 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2021 Datadog, Inc. + +// traceproftest provides testing for cross-cutting tracer/profiler features. +// It's a separate package from traceprof to avoid circular dependencies. +package traceproftest + +import ( + "os" + "runtime" + "sync" + "testing" + "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof" + pb "gopkg.in/DataDog/dd-trace-go.v1/internal/traceprof/testapp" + + "github.com/stretchr/testify/require" +) + +// TestEndpointsAndCodeHotspots tests the code hotspots and endpoint filtering +// feature implemented using pprof labels in the tracer which are picked up by +// the CPU profiler. This is done using a small test application that simulates +// a simple http or grpc workload. +func TestEndpointsAndCodeHotspots(t *testing.T) { + // The amount of time the profiler should be able to detect. It's much lower + // than CpuDuration to avoid flaky test behavior and because we're not trying + // assert the quality of the profiler beyond the presence of the right + // labels. + const minCPUDuration = 10 * time.Millisecond + + // testCommon runs the common parts of this test. + testCommon := func(t *testing.T, c AppConfig) (*pb.WorkRes, *CPUProfile) { + // Simulate a cpu-heavy request with a short sql query. This is a best-case + // scenario for CPU Code Hotspots. + req := &pb.WorkReq{ + CpuDuration: int64(90 * time.Millisecond), + SqlDuration: int64(10 * time.Millisecond), + } + + // Rerun test a few times with doubled duration until it passes to avoid + // flaky behavior in CI. + for attempt := 1; ; attempt++ { + app := c.Start(t) + defer app.Stop(t) + + res := app.WorkRequest(t, req) + prof := app.CPUProfile(t) + + notEnoughSamples := (prof.Duration() < minCPUDuration) || + (prof.LabelsDuration(CustomLabels) < minCPUDuration) || + (c.CodeHotspots && prof.LabelDuration(traceprof.SpanID, "*") < minCPUDuration) || + (c.AppType != Direct && c.Endpoints && prof.LabelDuration(traceprof.TraceEndpoint, "*") < minCPUDuration) + if attempt <= 5 && notEnoughSamples { + req.CpuDuration *= 2 + req.SqlDuration *= 2 + t.Logf("attempt %d: not enough cpu samples, doubling duration", attempt) + continue + } + require.True(t, ValidSpanID(res.SpanId)) + require.True(t, ValidSpanID(res.LocalRootSpanId)) + require.GreaterOrEqual(t, prof.Duration(), minCPUDuration) + require.GreaterOrEqual(t, prof.LabelsDuration(CustomLabels), minCPUDuration) + return res, prof + } + } + + for _, appType := range []testAppType{Direct, HTTP, GRPC} { + t.Run(string(appType), func(t *testing.T) { + t.Run("none", func(t *testing.T) { + res, prof := testCommon(t, AppConfig{ + AppType: appType, + }) + require.Zero(t, prof.LabelDuration(traceprof.SpanID, res.SpanId)) + require.Zero(t, prof.LabelDuration(traceprof.LocalRootSpanID, res.LocalRootSpanId)) + require.Zero(t, prof.LabelDuration(traceprof.TraceEndpoint, appType.Endpoint())) + }) + + t.Run("endpoints", func(t *testing.T) { + res, prof := testCommon(t, AppConfig{ + AppType: appType, + Endpoints: true, + }) + require.Zero(t, prof.LabelDuration(traceprof.SpanID, res.SpanId)) + require.Zero(t, prof.LabelDuration(traceprof.LocalRootSpanID, res.LocalRootSpanId)) + if appType != Direct { + require.GreaterOrEqual(t, prof.LabelDuration(traceprof.TraceEndpoint, appType.Endpoint()), minCPUDuration) + } + }) + + t.Run("code-hotspots", func(t *testing.T) { + res, prof := testCommon(t, AppConfig{ + AppType: appType, + CodeHotspots: true, + }) + require.GreaterOrEqual(t, prof.LabelsDuration(map[string]string{ + traceprof.SpanID: res.SpanId, + traceprof.LocalRootSpanID: res.LocalRootSpanId, + }), minCPUDuration) + require.Zero(t, prof.LabelDuration(traceprof.TraceEndpoint, appType.Endpoint())) + }) + + t.Run("all", func(t *testing.T) { + res, prof := testCommon(t, AppConfig{ + AppType: appType, + CodeHotspots: true, + Endpoints: true, + }) + wantLabels := map[string]string{ + traceprof.SpanID: res.SpanId, + traceprof.LocalRootSpanID: res.LocalRootSpanId, + } + if appType != Direct { + wantLabels[traceprof.TraceEndpoint] = appType.Endpoint() + } + require.GreaterOrEqual(t, prof.LabelsDuration(wantLabels), minCPUDuration) + }) + + t.Run("none-child-of", func(t *testing.T) { + res, prof := testCommon(t, AppConfig{ + AppType: appType, + ChildOf: true, + }) + require.Zero(t, prof.LabelDuration(traceprof.SpanID, res.SpanId)) + require.Zero(t, prof.LabelDuration(traceprof.LocalRootSpanID, res.LocalRootSpanId)) + require.Zero(t, prof.LabelDuration(traceprof.TraceEndpoint, appType.Endpoint())) + }) + + t.Run("all-child-of", func(t *testing.T) { + res, prof := testCommon(t, AppConfig{ + AppType: appType, + CodeHotspots: true, + Endpoints: true, + ChildOf: true, + }) + wantLabels := map[string]string{ + traceprof.SpanID: res.SpanId, + traceprof.LocalRootSpanID: res.LocalRootSpanId, + } + if appType != Direct { + wantLabels[traceprof.TraceEndpoint] = appType.Endpoint() + } + require.GreaterOrEqual(t, prof.LabelsDuration(wantLabels), minCPUDuration) + }) + }) + } +} + +// BenchmarkEndpointsAndHotspots tries to quantify the overhead (latency, pprof +// samples, cpu time, pprof size) of profiler endpoints and code hotspots. Use +// ./bench.sh for executing these benchmarks. +func BenchmarkEndpointsAndHotspots(b *testing.B) { + benchCommon := func(b *testing.B, appType testAppType, req *pb.WorkReq) { + config := AppConfig{ + CodeHotspots: os.Getenv("BENCH_ENDPOINTS") == "true", + Endpoints: os.Getenv("BENCH_HOTSPOTS") == "true", + AppType: appType, + } + app := config.Start(b) + defer app.Stop(b) + + b.ResetTimer() + var ( + wg sync.WaitGroup + concurrency = runtime.GOMAXPROCS(0) + startCPUTime = CPURusage(b) + ) + for g := 0; g < concurrency; g++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < b.N; i++ { + app.WorkRequest(b, req) + } + }() + } + wg.Wait() + cpuTime := CPURusage(b) - startCPUTime + b.StopTimer() + + prof := app.CPUProfile(b) + expectedCPUTime := time.Duration(b.N) * time.Duration(req.CpuDuration) + if expectedCPUTime >= 90*time.Millisecond { + // sanity check profile results if enough samples can be expected + require.Greater(b, prof.Duration(), time.Duration(0)) + if config.CodeHotspots { + require.Greater(b, prof.LabelDuration(traceprof.SpanID, "*"), time.Duration(0)) + require.Greater(b, prof.LabelDuration(traceprof.LocalRootSpanID, "*"), time.Duration(0)) + } + if config.Endpoints && appType != Direct { + require.Greater(b, prof.LabelDuration(traceprof.TraceEndpoint, appType.Endpoint()), time.Duration(0)) + } + } + + b.ReportMetric(float64(prof.Samples())/float64(b.N*concurrency), "pprof-samples/op") + b.ReportMetric(float64(prof.Size())/float64(b.N*concurrency), "pprof-B/op") + b.ReportMetric(float64(cpuTime)/float64(b.N*concurrency), "cpu-ns/op") + } + + for _, appType := range []testAppType{Direct, HTTP, GRPC} { + b.Run(string(appType), func(b *testing.B) { + b.Run("hello-world", func(b *testing.B) { + // Simulates a handler that does no actual work other than paying for + // the instrumentation overhead. + benchCommon(b, appType, &pb.WorkReq{ + CpuDuration: int64(0 * time.Millisecond), + SqlDuration: int64(0 * time.Millisecond), + }) + }) + + b.Run("cpu-bound", func(b *testing.B) { + benchCommon(b, appType, &pb.WorkReq{ + CpuDuration: int64(90 * time.Millisecond), + SqlDuration: int64(10 * time.Millisecond), + }) + }) + + b.Run("io-bound", func(b *testing.B) { + benchCommon(b, appType, &pb.WorkReq{ + CpuDuration: int64(10 * time.Millisecond), + SqlDuration: int64(90 * time.Millisecond), + }) + }) + }) + } +} diff --git a/internal/traceprof/traceproftest/traceproftest.go b/internal/traceprof/traceproftest/traceproftest.go new file mode 100644 index 0000000000..7b4a55c6f5 --- /dev/null +++ b/internal/traceprof/traceproftest/traceproftest.go @@ -0,0 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2021 Datadog, Inc. + +// Package traceproftest contains test cases and test helpers for testing +// cross-cutting tracer/profiler features such as code hotspots and endpoints. +// The package is not supposed to be imported by other packages. +package traceproftest diff --git a/internal/traceprof/traceproftest/util.go b/internal/traceprof/traceproftest/util.go new file mode 100644 index 0000000000..727b1b7f18 --- /dev/null +++ b/internal/traceprof/traceproftest/util.go @@ -0,0 +1,37 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2021 Datadog, Inc. + +package traceproftest + +import ( + "strconv" + "syscall" + "testing" + "time" +) + +// ValidSpanID returns true if id is a valid span id (random.Uint64()). +func ValidSpanID(id string) bool { + val, err := strconv.ParseUint(id, 10, 64) + return err == nil && val > 0 +} + +// CPURusage returns the amount of On-CPU time for the process (sys+user) since +// it has been started. It uses getrusage(2). +func CPURusage(t testing.TB) time.Duration { + // Note: If this becomes a portability issue in the future (windows?) it's okay + // to implement a non-working version for those platforms. We only use this for + // benchmarking on linux right now. + var rusage syscall.Rusage + if err := syscall.Getrusage(syscall.RUSAGE_SELF, &rusage); err != nil { + t.Fatal(err) + panic(err) + } + return timevalDuration(rusage.Stime) + timevalDuration(rusage.Utime) +} + +func timevalDuration(tv syscall.Timeval) time.Duration { + return time.Duration(tv.Nano()) +} diff --git a/profiler/profile_test.go b/profiler/profile_test.go index fc599c7a67..c8978f7975 100644 --- a/profiler/profile_test.go +++ b/profiler/profile_test.go @@ -186,6 +186,7 @@ main;bar 0 0 8 16 stopCPUProfile = func() {} p, err := unstartedProfiler(CPUDuration(10 * time.Millisecond)) + require.NoError(t, err) start := time.Now() profs, err := p.runProfile(CPUProfile) end := time.Now() @@ -203,6 +204,7 @@ main;bar 0 0 8 16 } p, err := unstartedProfiler() + require.NoError(t, err) profs, err := p.runProfile(GoroutineProfile) require.NoError(t, err) assert.Equal(t, "goroutines.pprof", profs[0].name) @@ -233,12 +235,13 @@ main.main() ` defer func(old func(_ string, _ io.Writer, _ int) error) { lookupProfile = old }(lookupProfile) - lookupProfile = func(name string, w io.Writer, _ int) error { + lookupProfile = func(_ string, w io.Writer, _ int) error { _, err := w.Write([]byte(sample)) return err } p, err := unstartedProfiler() + require.NoError(t, err) profs, err := p.runProfile(expGoroutineWaitProfile) require.NoError(t, err) require.Equal(t, "goroutineswait.pprof", profs[0].name)