Skip to content

Commit

Permalink
tracing: add StartSpanCtx
Browse files Browse the repository at this point in the history
Prior to this commit, the Tracer dealt only with Spans directly and any
kind of context wrapping had to be done out-of-band via a number of
helper methods.
This isn't ideal from a performance point of view. Context wrapping
figures prominently in the allocation overhead of tracing, so we ought
to be able to optimize it.

To this end, Tracer gets a StartSpanCtx method that internalizes the
context handling and will make it more straightforward to optimize.
StartSpanCtx also automates the propagation of log tags from the
incoming Context to the created Spans, reducing the number of options
passed in the common case. In particular, the WithCtxLogTags option was
removed.

The StartSpanCtx method was applied to the codebase where it was
straightforward. As far as I can tell, this includes all Spans created
in the critical path, so we should expect to see a measurable decrease
in allocation overhead related to ContextWithSpan once we add an
optimization to (the impl of) StartSpanCtx that allocates the wrapping
Context together with the Span.

Release note: None
  • Loading branch information
tbg committed Dec 21, 2020
1 parent 71cf114 commit e1f9e34
Show file tree
Hide file tree
Showing 15 changed files with 160 additions and 155 deletions.
3 changes: 2 additions & 1 deletion pkg/jobs/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,8 @@ func (r *Registry) CreateStartableJobWithTxn(
}
// Construct a context which contains a tracing span that follows from the
// span in the parent context. We don't directly use the parent span because
// we want independent lifetimes and cancellation.
// we want independent lifetimes and cancellation. For the same reason, we
// don't use the Context returned by ForkCtxSpan.
resumerCtx, cancel := r.makeCtx()
_, span := tracing.ForkCtxSpan(ctx, "job")
if span != nil {
Expand Down
1 change: 0 additions & 1 deletion pkg/kv/kvserver/batcheval/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ go_library(
"//pkg/util/hlc",
"//pkg/util/limit",
"//pkg/util/log",
"//pkg/util/tracing",
"//pkg/util/uuid",
"//vendor/github.com/cockroachdb/errors",
"//vendor/github.com/kr/pretty",
Expand Down
4 changes: 0 additions & 4 deletions pkg/kv/kvserver/batcheval/cmd_end_transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -810,9 +809,6 @@ func splitTrigger(
split *roachpb.SplitTrigger,
ts hlc.Timestamp,
) (enginepb.MVCCStats, result.Result, error) {
// TODO(andrei): should this span be a child of the ctx's (if any)?
sp := rec.ClusterSettings().Tracer.StartSpan("split", tracing.WithCtxLogTags(ctx))
defer sp.Finish()
desc := rec.Desc()
if !bytes.Equal(desc.StartKey, split.LeftDesc.StartKey) ||
!bytes.Equal(desc.EndKey, split.RightDesc.EndKey) {
Expand Down
11 changes: 8 additions & 3 deletions pkg/kv/kvserver/replica_application_decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,14 @@ func (d *replicaDecoder) createTracingSpans(ctx context.Context) {
if err != nil {
log.Errorf(ctx, "unable to extract trace data from raft command: %s", err)
} else {
cmd.sp = d.r.AmbientContext.Tracer.StartSpan(
"raft application", tracing.WithParentAndManualCollection(spanCtx), tracing.WithFollowsFrom())
cmd.ctx = tracing.ContextWithSpan(ctx, cmd.sp)
cmd.ctx, cmd.sp = d.r.AmbientContext.Tracer.StartSpanCtx(
ctx,
"raft application",
// NB: we are lying here - we are not actually going to propagate
// the recording towards the root. That seems ok.
tracing.WithParentAndManualCollection(spanCtx),
tracing.WithFollowsFrom(),
)
}
} else {
cmd.ctx, cmd.sp = tracing.ForkCtxSpan(ctx, opName)
Expand Down
24 changes: 12 additions & 12 deletions pkg/kv/kvserver/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/logtags"
"go.etcd.io/etcd/raft/v3"
)

Expand Down Expand Up @@ -286,31 +287,30 @@ func (p *pendingLeaseRequest) requestLeaseAsync(
status kvserverpb.LeaseStatus,
leaseReq roachpb.Request,
) error {
// Create a new context *without* a timeout. Instead, we multiplex the
// cancellation of all contexts onto this new one, only canceling it if all
// coalesced requests timeout/cancel. p.cancelLocked (defined below) is the
// cancel function that must be called; calling just cancel is insufficient.
ctx := p.repl.AnnotateCtx(context.Background())
const opName = "request range lease"
var sp *tracing.Span
tr := p.repl.AmbientContext.Tracer
tagsOpt := tracing.WithLogTags(logtags.FromContext(parentCtx))
var sp *tracing.Span
if parentSp := tracing.SpanFromContext(parentCtx); parentSp != nil {
// We use FollowsFrom because the lease request's span can outlive the
// parent request. This is possible if parentCtx is canceled after others
// have coalesced on to this lease request (see leaseRequestHandle.Cancel).
// TODO(andrei): we should use Tracer.StartChildSpan() for efficiency,
// except that one does not currently support FollowsFrom relationships.
sp = tr.StartSpan(
ctx, sp = tr.StartSpanCtx(
ctx,
opName,
tracing.WithParentAndAutoCollection(parentSp),
tracing.WithFollowsFrom(),
tracing.WithCtxLogTags(parentCtx),
tagsOpt,
)
} else {
sp = tr.StartSpan(opName, tracing.WithCtxLogTags(parentCtx))
ctx, sp = tr.StartSpanCtx(ctx, opName, tagsOpt)
}

// Create a new context *without* a timeout. Instead, we multiplex the
// cancellation of all contexts onto this new one, only canceling it if all
// coalesced requests timeout/cancel. p.cancelLocked (defined below) is the
// cancel function that must be called; calling just cancel is insufficient.
ctx := p.repl.AnnotateCtx(context.Background())
ctx = tracing.ContextWithSpan(ctx, sp)
ctx, cancel := context.WithCancel(ctx)

// Make sure we clean up the context and request state. This will be called
Expand Down
3 changes: 1 addition & 2 deletions pkg/kv/kvserver/replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12566,8 +12566,7 @@ func TestLaterReproposalsDoNotReuseContext(t *testing.T) {

// Hold the RaftLock to encourage the reproposals to occur in the same batch.
tc.repl.RaftLock()
sp := tracer.StartSpan("replica send", tracing.WithCtxLogTags(ctx), tracing.WithForceRealSpan())
tracedCtx := tracing.ContextWithSpan(ctx, sp)
tracedCtx, sp := tracer.StartSpanCtx(ctx, "replica send", tracing.WithForceRealSpan())
// Go out of our way to enable recording so that expensive logging is enabled
// for this context.
sp.SetVerbose(true)
Expand Down
8 changes: 3 additions & 5 deletions pkg/server/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -930,20 +930,18 @@ func (n *Node) setupSpanForIncomingRPC(
// The operation name matches the one created by the interceptor in the
// remoteTrace case below.
const opName = "/cockroach.roachpb.Internal/Batch"
tr := n.storeCfg.AmbientCtx.Tracer
var newSpan, grpcSpan *tracing.Span
if isLocalRequest {
// This is a local request which circumvented gRPC. Start a span now.
ctx, newSpan = tracing.ChildSpan(ctx, opName)
ctx, newSpan = tracing.EnsureChildSpan(ctx, tr, opName)
} else {
grpcSpan = tracing.SpanFromContext(ctx)
if grpcSpan == nil {
// If tracing information was passed via gRPC metadata, the gRPC interceptor
// should have opened a span for us. If not, open a span now (if tracing is
// disabled, this will be a noop span).
newSpan = n.storeCfg.AmbientCtx.Tracer.StartSpan(
opName, tracing.WithLogTags(n.storeCfg.AmbientCtx.LogTags()),
)
ctx = tracing.ContextWithSpan(ctx, newSpan)
ctx, newSpan = tr.StartSpanCtx(ctx, opName)
} else {
grpcSpan.SetTag("node", n.Descriptor.NodeID)
}
Expand Down
1 change: 0 additions & 1 deletion pkg/sql/catalog/lease/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ go_library(
"//pkg/util/syncutil",
"//pkg/util/syncutil/singleflight",
"//pkg/util/timeutil",
"//pkg/util/tracing",
"//vendor/github.com/cockroachdb/errors",
"//vendor/github.com/cockroachdb/logtags",
"//vendor/github.com/cockroachdb/redact",
Expand Down
20 changes: 2 additions & 18 deletions pkg/sql/conn_executor_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -1493,28 +1493,12 @@ func (ex *connExecutor) recordTransaction(ev txnEvent, implicit bool, txnStart t
// parentCtx for an existing span and creates a root span if none is found, or a
// child span if one is found. A context derived from parentCtx which
// additionally contains the new span is also returned.
// TODO(asubiotto): Use tracing.EnsureChildSpan once that is available.
func createRootOrChildSpan(
parentCtx context.Context, opName string, tracer *tracing.Tracer,
parentCtx context.Context, opName string, tr *tracing.Tracer,
) (context.Context, *tracing.Span) {
// WithForceRealSpan is used to support the use of session tracing, which
// may start recording on this span.
var sp *tracing.Span
if parentSp := tracing.SpanFromContext(parentCtx); parentSp != nil {
// Create a child span for this operation.
sp = parentSp.Tracer().StartSpan(
opName,
tracing.WithParentAndAutoCollection(parentSp),
tracing.WithCtxLogTags(parentCtx),
tracing.WithForceRealSpan(),
)
} else {
// Create a root span for this operations.
sp = tracer.StartSpan(
opName, tracing.WithCtxLogTags(parentCtx), tracing.WithForceRealSpan(),
)
}
return tracing.ContextWithSpan(parentCtx, sp), sp
return tracing.EnsureChildSpan(parentCtx, tr, opName, tracing.WithForceRealSpan())
}

// logTraceAboveThreshold logs a span's recording if the duration is above a
Expand Down
12 changes: 5 additions & 7 deletions pkg/sql/distsql/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,27 +202,25 @@ func (ds *ServerImpl) setupFlow(
}

const opName = "flow"
var sp *tracing.Span
var sp *tracing.Span // will be Finish()ed by Flow.Cleanup()
if parentSpan == nil {
sp = ds.Tracer.StartSpan(opName, tracing.WithCtxLogTags(ctx))
ctx, sp = ds.Tracer.StartSpanCtx(ctx, opName)
} else if localState.IsLocal {
// If we're a local flow, we don't need a "follows from" relationship: we're
// going to run this flow synchronously.
// TODO(andrei): localState.IsLocal is not quite the right thing to use.
// If that field is unset, we might still want to create a child span if
// this flow is run synchronously.
sp = ds.Tracer.StartSpan(opName, tracing.WithParentAndAutoCollection(parentSpan), tracing.WithCtxLogTags(ctx))
ctx, sp = ds.Tracer.StartSpanCtx(ctx, opName, tracing.WithParentAndAutoCollection(parentSpan))
} else {
// We use FollowsFrom because the flow's span outlives the SetupFlow request.
sp = ds.Tracer.StartSpan(
ctx, sp = ds.Tracer.StartSpanCtx(
ctx,
opName,
tracing.WithParentAndAutoCollection(parentSpan),
tracing.WithFollowsFrom(),
tracing.WithCtxLogTags(ctx),
)
}
// sp will be Finish()ed by Flow.Cleanup().
ctx = tracing.ContextWithSpan(ctx, sp)

// The monitor opened here is closed in Flow.Cleanup().
monitor := mon.NewMonitor(
Expand Down
26 changes: 7 additions & 19 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -1541,30 +1541,18 @@ func (st *SessionTracing) StartTracing(

// Now hijack the conn's ctx with one that has a recording span.

opName := "session recording"
var sp *tracing.Span
connCtx := st.ex.ctxHolder.connCtx

if parentSp := tracing.SpanFromContext(connCtx); parentSp != nil {
// Create a child span while recording.
sp = parentSp.Tracer().StartSpan(
opName,
tracing.WithParentAndAutoCollection(parentSp),
tracing.WithCtxLogTags(connCtx),
tracing.WithForceRealSpan(),
)
} else {
// Create a root span while recording.
sp = st.ex.server.cfg.AmbientCtx.Tracer.StartSpan(
opName, tracing.WithForceRealSpan(),
tracing.WithCtxLogTags(connCtx),
)
}
opName := "session recording"
newConnCtx, sp := tracing.EnsureChildSpan(
connCtx,
st.ex.server.cfg.AmbientCtx.Tracer,
opName,
tracing.WithForceRealSpan(),
)
sp.SetVerbose(true)
st.connSpan = sp

// Hijack the connections context.
newConnCtx := tracing.ContextWithSpan(st.ex.ctxHolder.connCtx, sp)
st.ex.ctxHolder.hijack(newConnCtx)

return nil
Expand Down
10 changes: 5 additions & 5 deletions pkg/util/log/ambient_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,18 @@ type AmbientContext struct {
// log or an open span (if not nil).
eventLog *ctxEventLog

// The buffer.
//
// NB: this should not be returned to the caller, to avoid other mutations
// leaking in. If we return this to the caller, it should be in immutable
// form.
tags *logtags.Buffer

// Cached annotated version of context.{TODO,Background}, to avoid annotating
// these contexts repeatedly.
backgroundCtx context.Context
}

// LogTags returns the tags in the ambient context.
func (ac *AmbientContext) LogTags() *logtags.Buffer {
return ac.tags
}

// AddLogTag adds a tag to the ambient context.
func (ac *AmbientContext) AddLogTag(name string, value interface{}) {
ac.tags = ac.tags.Add(name, value)
Expand Down
9 changes: 5 additions & 4 deletions pkg/util/tracing/grpc_interceptor.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,14 @@ func ServerInterceptor(tracer *Tracer) grpc.UnaryServerInterceptor {
return handler(ctx, req)
}

serverSpan := tracer.StartSpan(
ctx, serverSpan := tracer.StartSpanCtx(
ctx,
info.FullMethod,
WithTags(gRPCComponentTag, ext.SpanKindRPCServer),
WithParentAndManualCollection(spanMeta),
)
defer serverSpan.Finish()

ctx = ContextWithSpan(ctx, serverSpan)
resp, err = handler(ctx, req)
if err != nil {
SetSpanTags(serverSpan, err, false)
Expand Down Expand Up @@ -168,15 +168,16 @@ func StreamServerInterceptor(tracer *Tracer) grpc.StreamServerInterceptor {
return handler(srv, ss)
}

serverSpan := tracer.StartSpan(
ctx, serverSpan := tracer.StartSpanCtx(
ss.Context(),
info.FullMethod,
WithTags(gRPCComponentTag, ext.SpanKindRPCServer),
WithParentAndManualCollection(spanMeta),
)
defer serverSpan.Finish()
ss = &tracingServerStream{
ServerStream: ss,
ctx: ContextWithSpan(ss.Context(), serverSpan),
ctx: ctx,
}
err = handler(srv, ss)
if err != nil {
Expand Down
16 changes: 6 additions & 10 deletions pkg/util/tracing/tags.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@

package tracing

import (
"context"

"github.com/cockroachdb/logtags"
)
import "github.com/cockroachdb/logtags"

// LogTagsOption is a StartSpanOption that uses log tags to populate the Span tags.
type logTagsOption logtags.Buffer
Expand All @@ -29,15 +25,15 @@ func (lt *logTagsOption) apply(opts spanOptions) spanOptions {
// WithLogTags returns a SpanOption that sets the Span tags to the given log
// tags. When applied, the returned option will apply any logtag name->Span tag
// name remapping that has been registered via RegisterTagRemapping.
//
// Note that there is no need to use this option with StartSpanCtx, as that will
// already propagate the log tags from the Context supplied to it to the Span.
// However, if a WithLogTags option is supplied, it will be used and replaces
// the Context-derived tags.
func WithLogTags(tags *logtags.Buffer) SpanOption {
return (*logTagsOption)(tags)
}

// WithCtxLogTags returns WithLogTags(logtags.FromContext(ctx)).
func WithCtxLogTags(ctx context.Context) SpanOption {
return WithLogTags(logtags.FromContext(ctx))
}

// tagRemap is a map that records desired conversions
var tagRemap = make(map[string]string)

Expand Down
Loading

0 comments on commit e1f9e34

Please sign in to comment.