Skip to content

Commit

Permalink
Merge #61777
Browse files Browse the repository at this point in the history
61777:  sql: only create real spans when session tracing/sampling r=irfansharif a=irfansharif

This drastically reduces the memory overhead for tracing we're observing
in #59424. This commit does a few disparate things to make it happen:

1. We now access the tracing span through txnState.Ctx exclusively. This
   gives us a single point to hijack, which we'll later do. By default
   txn's are initialized with a no-op span. If later on session tracing is
   enabled, we'll create a real (verbose) span and swap it out with the
   txn's no-op one. This gives us the same semantics as earlier, and on the
   plus side, we're not re-using the same tracing span when session tracing
   is toggled.
2. Hard tracing methods to work with no-op spans. Specifically
   GetRecording and TraceID.
3. Remove a crash vector through crdb_internal.trace_id. It was
   previously reaching into the first recording to retrieve a trace ID. But
   it's not guaranteed that recordings are non-empty. This could be used to
   induce panics in the server.

This PR will need to get backported to 21.1. Fixes #59424.

Release note: None

---

+cc @cockroachdb/kv-east.


Co-authored-by: irfan sharif <[email protected]>
  • Loading branch information
craig[bot] and irfansharif committed Mar 13, 2021
2 parents 1b6c7f9 + d7407d5 commit 283a21e
Show file tree
Hide file tree
Showing 11 changed files with 63 additions and 53 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/conn_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,7 @@ func (ex *connExecutor) close(ctx context.Context, closeType closeType) {
panic(errors.AssertionFailedf("unexpected state in conn executor after ApplyWithPayload %T", t))
}
}
if util.CrdbTestBuild && ex.state.sp != nil {
if util.CrdbTestBuild && ex.state.Ctx != nil {
panic(errors.AssertionFailedf("txn span not closed in state %s", ex.machine.CurState()))
}
} else if closeType == externalTxnClose {
Expand Down
5 changes: 1 addition & 4 deletions pkg/sql/conn_executor_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ func (ex *connExecutor) execStmtInOpenState(
alreadyRecording := ex.transitionCtx.sessionTracing.Enabled()
stmtTraceThreshold := traceStmtThreshold.Get(&ex.planner.execCfg.Settings.SV)
if !alreadyRecording && stmtTraceThreshold > 0 {
ctx, stmtThresholdSpan = createRootOrChildSpan(ctx, "trace-stmt-threshold", ex.transitionCtx.tracer)
ctx, stmtThresholdSpan = createRootOrChildSpan(ctx, "trace-stmt-threshold", ex.transitionCtx.tracer, tracing.WithForceRealSpan())
stmtThresholdSpan.SetVerbose(true)
}

Expand Down Expand Up @@ -1548,9 +1548,6 @@ func (ex *connExecutor) recordTransaction(ev txnEvent, implicit bool, txnStart t
func createRootOrChildSpan(
parentCtx context.Context, opName string, tr *tracing.Tracer, os ...tracing.SpanOption,
) (context.Context, *tracing.Span) {
// WithForceRealSpan is used to support the use of session tracing, which
// may start recording on this span.
os = append(os, tracing.WithForceRealSpan())
return tracing.EnsureChildSpan(parentCtx, tr, opName, os...)
}

Expand Down
14 changes: 8 additions & 6 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -1649,16 +1649,18 @@ func (st *SessionTracing) StartTracing(
return nil
}

// If we're inside a transaction, start recording on the txn span.
// If we're inside a transaction, hijack the txn's ctx with one that has a
// recording span.
if _, ok := st.ex.machine.CurState().(stateNoTxn); !ok {
sp := tracing.SpanFromContext(st.ex.state.Ctx)
if sp == nil {
txnCtx := st.ex.state.Ctx
if sp := tracing.SpanFromContext(txnCtx); sp == nil {
return errors.Errorf("no txn span for SessionTracing")
}
// We want to clear out any existing recordings so they don't show up in
// future traces.
sp.ResetRecording()

newTxnCtx, sp := tracing.EnsureChildSpan(txnCtx, st.ex.server.cfg.AmbientCtx.Tracer,
"session tracing", tracing.WithForceRealSpan())
sp.SetVerbose(true)
st.ex.state.Ctx = newTxnCtx
st.firstTxnSpan = sp
}

Expand Down
30 changes: 9 additions & 21 deletions pkg/sql/instrumentation.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,9 @@ type instrumentationHelper struct {
finishCollectionDiagnostics func()
withStatementTrace func(trace tracing.Recording, stmt string)

sp *tracing.Span
// If true, the instrumentation helper is responsible for finishing sp.
finishSpan bool
origCtx context.Context
evalCtx *tree.EvalContext
sp *tracing.Span
origCtx context.Context
evalCtx *tree.EvalContext

// If savePlanForStats is true, the explainPlan will be collected and returned
// via PlanForStats().
Expand Down Expand Up @@ -167,8 +165,6 @@ func (ih *instrumentationHelper) Setup(
ih.savePlanForStats = appStats.shouldSaveLogicalPlanDescription(fingerprint, implicitTxn)

if sp := tracing.SpanFromContext(ctx); sp != nil {
ih.sp = sp
ih.finishSpan = false
if sp.IsVerbose() {
// If verbose tracing was enabled at a higher level, stats
// collection is enabled so that stats are shown in the traces, but
Expand All @@ -186,18 +182,10 @@ func (ih *instrumentationHelper) Setup(

if !ih.collectBundle && ih.withStatementTrace == nil && ih.outputMode == unmodifiedOutput {
if ih.collectExecStats {
// If we need to collect stats, create a non-verbose child span. Stats
// will be added as structured metadata and processed in Finish.
ih.origCtx = ctx
if ih.sp != nil {
// The span present in the context is sufficient for us to
// record stats as structured metadata, so there is nothing else
// to do.
return ctx, true
}
// This is an edge case when the span is not present in the original
// context for some reason. This should *never* happen, but we
// choose to be defensive about it.
newCtx, ih.sp = tracing.EnsureChildSpan(ctx, cfg.AmbientCtx.Tracer, "traced statement")
ih.finishSpan = true
newCtx, ih.sp = tracing.EnsureChildSpan(ctx, cfg.AmbientCtx.Tracer, "traced statement", tracing.WithForceRealSpan())
return newCtx, true
}
return ctx, false
Expand All @@ -208,7 +196,6 @@ func (ih *instrumentationHelper) Setup(
ih.origCtx = ctx
ih.evalCtx = p.EvalContext()
newCtx, ih.sp = tracing.StartVerboseTrace(ctx, cfg.AmbientCtx.Tracer, "traced statement")
ih.finishSpan = true
return newCtx, true
}

Expand All @@ -224,9 +211,10 @@ func (ih *instrumentationHelper) Finish(
retErr error,
) error {
ctx := ih.origCtx
if ih.finishSpan {
ih.sp.Finish()
if ih.sp == nil {
return retErr
}
ih.sp.Finish()

// Record the statement information that we've collected.
// Note that in case of implicit transactions, the trace contains the auto-commit too.
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/logictest/testdata/logic_test/contention_event
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ INSERT INTO kv VALUES('k', 'v')

user root

statement ok
SET TRACING = on

# Scan all ranges of the table (note that we have intentionally
# split it into at least six ranges). This is better than a point
# lookup in that it gives tracing more of a chance to get things
Expand All @@ -47,9 +50,6 @@ user root
#
# NB: the contention event is not in our trace span but in one of its
# children, so it wouldn't be found if we filtered by the trace span ID.
#
# NB: this needs the 5node-pretend59315 config because otherwise the span is not
# tracked.
query B
WITH spans AS (
SELECT span_id
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inflight_trace_spans
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ user testuser
statement ok
BEGIN

statement ok
SET TRACING = on

let $curr_trace_id
SELECT * FROM crdb_internal.trace_id()

Expand Down
7 changes: 6 additions & 1 deletion pkg/sql/sem/builtins/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -3634,7 +3634,12 @@ may increase either contention or retry errors, or both.`,
if sp == nil {
return tree.DNull, nil
}
return tree.NewDInt(tree.DInt(sp.GetRecording()[0].TraceID)), nil

traceID := sp.TraceID()
if traceID == 0 {
return tree.DNull, nil
}
return tree.NewDInt(tree.DInt(traceID)), nil
},
Info: "Returns the current trace ID or an error if no trace is open.",
// NB: possibly this is or could be made stable, but it's not worth it.
Expand Down
40 changes: 24 additions & 16 deletions pkg/sql/txn_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,12 @@ type txnState struct {

// Ctx is the context for everything running in this SQL txn.
// This is only set while the session's state is not stateNoTxn.
//
// It also embeds the tracing span associated with the SQL txn. These are
// often root spans, as SQL txns are frequently the level at which we do
// tracing. This context is hijacked when session tracing is enabled.
Ctx context.Context

// sp is the span corresponding to the SQL txn. These are often root spans, as
// SQL txns are frequently the level at which we do tracing.
sp *tracing.Span
// recordingThreshold, is not zero, indicates that sp is recording and that
// the recording should be dumped to the log if execution of the transaction
// took more than this.
Expand Down Expand Up @@ -154,25 +155,30 @@ func (ts *txnState) resetForNewSQLTxn(
// Create a context for this transaction. It will include a root span that
// will contain everything executed as part of the upcoming SQL txn, including
// (automatic or user-directed) retries. The span is closed by finishSQLTxn().
// TODO(andrei): figure out how to close these spans on server shutdown? Ties
// into a larger discussion about how to drain SQL and rollback open txns.
opName := sqlTxnName
txnCtx, sp := createRootOrChildSpan(connCtx, opName, tranCtx.tracer)
alreadyRecording := tranCtx.sessionTracing.Enabled()

var txnCtx context.Context
var sp *tracing.Span
if alreadyRecording {
// WithForceRealSpan is used to support the use of session tracing,
// which will start recording on this span.
txnCtx, sp = createRootOrChildSpan(connCtx, opName, tranCtx.tracer, tracing.WithForceRealSpan())
} else {
txnCtx, sp = createRootOrChildSpan(connCtx, opName, tranCtx.tracer)
}
if txnType == implicitTxn {
sp.SetTag("implicit", "true")
}

alreadyRecording := tranCtx.sessionTracing.Enabled()
duration := traceTxnThreshold.Get(&tranCtx.settings.SV)
if !alreadyRecording && (duration > 0) {
sp.SetVerbose(true)
ts.recordingThreshold = duration
ts.recordingStart = timeutil.Now()
}

ts.sp = sp
ts.Ctx, ts.cancel = contextutil.WithCancel(txnCtx)

ts.mon.Start(ts.Ctx, tranCtx.connMon, mon.BoundAccount{} /* reserved */)
ts.mu.Lock()
ts.mu.stmtCount = 0
Expand Down Expand Up @@ -207,16 +213,16 @@ func (ts *txnState) finishSQLTxn() {
ts.cancel()
ts.cancel = nil
}
if ts.sp == nil {
sp := tracing.SpanFromContext(ts.Ctx)
if sp == nil {
panic(errors.AssertionFailedf("No span in context? Was resetForNewSQLTxn() called previously?"))
}

if ts.recordingThreshold > 0 {
logTraceAboveThreshold(ts.Ctx, ts.sp.GetRecording(), "SQL txn", ts.recordingThreshold, timeutil.Since(ts.recordingStart))
logTraceAboveThreshold(ts.Ctx, sp.GetRecording(), "SQL txn", ts.recordingThreshold, timeutil.Since(ts.recordingStart))
}

ts.sp.Finish()
ts.sp = nil
sp.Finish()
ts.Ctx = nil
ts.mu.Lock()
ts.mu.txn = nil
Expand All @@ -239,10 +245,12 @@ func (ts *txnState) finishExternalTxn() {
ts.cancel()
ts.cancel = nil
}
if ts.sp != nil {
ts.sp.Finish()

if ts.Ctx != nil {
if sp := tracing.SpanFromContext(ts.Ctx); sp != nil {
sp.Finish()
}
}
ts.sp = nil
ts.Ctx = nil
ts.mu.Lock()
ts.mu.txn = nil
Expand Down
1 change: 0 additions & 1 deletion pkg/sql/txn_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ func (tc *testContext) createOpenState(typ txnType) (fsm.State, *txnState) {
ts := txnState{
Ctx: ctx,
connCtx: tc.ctx,
sp: sp,
cancel: cancel,
sqlTimestamp: timeutil.Now(),
priority: roachpb.NormalUserPriority,
Expand Down
2 changes: 2 additions & 0 deletions pkg/util/tracing/span.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ func (sp *Span) SetVerboseRecursively(to bool) {

// ResetRecording clears any previously recorded information. This doesn't
// affect any auxiliary trace sinks such as net/trace or zipkin.
//
// TODO(irfansharif): Remove this, it's no longer used.
func (sp *Span) ResetRecording() {
sp.i.ResetRecording()
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/util/tracing/span_inner.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ type spanInner struct {
}

func (s *spanInner) TraceID() uint64 {
if s.isNoop() {
return 0
}
return s.crdb.traceID
}

Expand Down Expand Up @@ -73,6 +76,9 @@ func (s *spanInner) ResetRecording() {
}

func (s *spanInner) GetRecording() Recording {
if s.isNoop() {
return nil
}
// If the span is not verbose, optimize by avoiding the tags.
// This span is likely only used to carry payloads around.
wantTags := s.crdb.recordingType() == RecordingVerbose
Expand Down

0 comments on commit 283a21e

Please sign in to comment.