From 52d179f3243169f181d26ccb2eecbb053251758a Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Thu, 11 Mar 2021 10:27:25 -0500 Subject: [PATCH 1/2] sql: revert "sql: use stmt's span for exec stats propagation" This is a "by-hand" revert of 842d79b0 and motivates the next commit. The only "by-hand" bits comes from wanting to retain the comments added in that commit. To reduce the memory overhead for tracing, statements will stop using real spans unless absolutely required (for e.g. when SET TRACING = on). The intent behind #61532 was to reduce the overhead of sampled statements by not creating an additional new span when one was already available (the statement's span). Well, with the next commit, the statement's span will be a no-op one, so when we know that we're sampling we'll want create a new span. In the end, with respect to spans created, we'll be doing no worse for sampled statements relative to #61532. Release note: None --- pkg/sql/instrumentation.go | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/pkg/sql/instrumentation.go b/pkg/sql/instrumentation.go index 059f9758fbd9..22b35520bf1c 100644 --- a/pkg/sql/instrumentation.go +++ b/pkg/sql/instrumentation.go @@ -92,11 +92,9 @@ type instrumentationHelper struct { finishCollectionDiagnostics func() withStatementTrace func(trace tracing.Recording, stmt string) - sp *tracing.Span - // If true, the instrumentation helper is responsible for finishing sp. - finishSpan bool - origCtx context.Context - evalCtx *tree.EvalContext + sp *tracing.Span + origCtx context.Context + evalCtx *tree.EvalContext // If savePlanForStats is true, the explainPlan will be collected and returned // via PlanForStats(). @@ -167,8 +165,6 @@ func (ih *instrumentationHelper) Setup( ih.savePlanForStats = appStats.shouldSaveLogicalPlanDescription(fingerprint, implicitTxn) if sp := tracing.SpanFromContext(ctx); sp != nil { - ih.sp = sp - ih.finishSpan = false if sp.IsVerbose() { // If verbose tracing was enabled at a higher level, stats // collection is enabled so that stats are shown in the traces, but @@ -186,18 +182,10 @@ func (ih *instrumentationHelper) Setup( if !ih.collectBundle && ih.withStatementTrace == nil && ih.outputMode == unmodifiedOutput { if ih.collectExecStats { + // If we need to collect stats, create a non-verbose child span. Stats + // will be added as structured metadata and processed in Finish. ih.origCtx = ctx - if ih.sp != nil { - // The span present in the context is sufficient for us to - // record stats as structured metadata, so there is nothing else - // to do. - return ctx, true - } - // This is an edge case when the span is not present in the original - // context for some reason. This should *never* happen, but we - // choose to be defensive about it. newCtx, ih.sp = tracing.EnsureChildSpan(ctx, cfg.AmbientCtx.Tracer, "traced statement") - ih.finishSpan = true return newCtx, true } return ctx, false @@ -208,7 +196,6 @@ func (ih *instrumentationHelper) Setup( ih.origCtx = ctx ih.evalCtx = p.EvalContext() newCtx, ih.sp = tracing.StartVerboseTrace(ctx, cfg.AmbientCtx.Tracer, "traced statement") - ih.finishSpan = true return newCtx, true } @@ -224,9 +211,10 @@ func (ih *instrumentationHelper) Finish( retErr error, ) error { ctx := ih.origCtx - if ih.finishSpan { - ih.sp.Finish() + if ih.sp == nil { + return retErr } + ih.sp.Finish() // Record the statement information that we've collected. // Note that in case of implicit transactions, the trace contains the auto-commit too. From d7407d55814ddfb53d9d85a0e8cac0ba06d8da37 Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Thu, 11 Mar 2021 09:54:40 -0500 Subject: [PATCH 2/2] sql: only create real spans when session tracing/sampling This drastically reduces the memory overhead for tracing we're observing in #59424. This commit does a few disparate things to make it happen: 1. We now access the tracing span through txnState.Ctx exclusively. This gives us a single point to hijack, which we'll later do. By default txn's are initialized with a no-op span. If later on session tracing is enabled, we'll create a real (verbose) span and swap it out with the txn's no-op one. This gives us the same semantics as earlier, and on the plus side, we're not re-using the same tracing span when session tracing is toggled. 2. Hard tracing methods to work with no-op spans. Specifically GetRecording and TraceID. 3. Remove a crash vector through crdb_internal.trace_id. It was previously reaching into the first recording to retrieve a trace ID. But it's not guaranteed that recordings are non-empty. This could be used to induce panics in the server. This PR will need to get backported to 21.1. Fixes #59424. Release note: None --- pkg/sql/conn_executor.go | 2 +- pkg/sql/conn_executor_exec.go | 5 +-- pkg/sql/exec_util.go | 14 ++++--- pkg/sql/instrumentation.go | 2 +- .../testdata/logic_test/contention_event | 6 +-- .../testdata/logic_test/inflight_trace_spans | 3 ++ pkg/sql/sem/builtins/builtins.go | 7 +++- pkg/sql/txn_state.go | 40 +++++++++++-------- pkg/sql/txn_state_test.go | 1 - pkg/util/tracing/span.go | 2 + pkg/util/tracing/span_inner.go | 6 +++ 11 files changed, 55 insertions(+), 33 deletions(-) diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index be0924b75e97..1ff8ebeb8190 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -835,7 +835,7 @@ func (ex *connExecutor) close(ctx context.Context, closeType closeType) { panic(errors.AssertionFailedf("unexpected state in conn executor after ApplyWithPayload %T", t)) } } - if util.CrdbTestBuild && ex.state.sp != nil { + if util.CrdbTestBuild && ex.state.Ctx != nil { panic(errors.AssertionFailedf("txn span not closed in state %s", ex.machine.CurState())) } } else if closeType == externalTxnClose { diff --git a/pkg/sql/conn_executor_exec.go b/pkg/sql/conn_executor_exec.go index 2703a0589eae..0ed2864a2a5b 100644 --- a/pkg/sql/conn_executor_exec.go +++ b/pkg/sql/conn_executor_exec.go @@ -645,7 +645,7 @@ func (ex *connExecutor) execStmtInOpenState( alreadyRecording := ex.transitionCtx.sessionTracing.Enabled() stmtTraceThreshold := traceStmtThreshold.Get(&ex.planner.execCfg.Settings.SV) if !alreadyRecording && stmtTraceThreshold > 0 { - ctx, stmtThresholdSpan = createRootOrChildSpan(ctx, "trace-stmt-threshold", ex.transitionCtx.tracer) + ctx, stmtThresholdSpan = createRootOrChildSpan(ctx, "trace-stmt-threshold", ex.transitionCtx.tracer, tracing.WithForceRealSpan()) stmtThresholdSpan.SetVerbose(true) } @@ -1544,9 +1544,6 @@ func (ex *connExecutor) recordTransaction(ev txnEvent, implicit bool, txnStart t func createRootOrChildSpan( parentCtx context.Context, opName string, tr *tracing.Tracer, os ...tracing.SpanOption, ) (context.Context, *tracing.Span) { - // WithForceRealSpan is used to support the use of session tracing, which - // may start recording on this span. - os = append(os, tracing.WithForceRealSpan()) return tracing.EnsureChildSpan(parentCtx, tr, opName, os...) } diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index bef57301ce92..ccc8eafa3711 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -1642,16 +1642,18 @@ func (st *SessionTracing) StartTracing( return nil } - // If we're inside a transaction, start recording on the txn span. + // If we're inside a transaction, hijack the txn's ctx with one that has a + // recording span. if _, ok := st.ex.machine.CurState().(stateNoTxn); !ok { - sp := tracing.SpanFromContext(st.ex.state.Ctx) - if sp == nil { + txnCtx := st.ex.state.Ctx + if sp := tracing.SpanFromContext(txnCtx); sp == nil { return errors.Errorf("no txn span for SessionTracing") } - // We want to clear out any existing recordings so they don't show up in - // future traces. - sp.ResetRecording() + + newTxnCtx, sp := tracing.EnsureChildSpan(txnCtx, st.ex.server.cfg.AmbientCtx.Tracer, + "session tracing", tracing.WithForceRealSpan()) sp.SetVerbose(true) + st.ex.state.Ctx = newTxnCtx st.firstTxnSpan = sp } diff --git a/pkg/sql/instrumentation.go b/pkg/sql/instrumentation.go index 22b35520bf1c..e704dc1fdb19 100644 --- a/pkg/sql/instrumentation.go +++ b/pkg/sql/instrumentation.go @@ -185,7 +185,7 @@ func (ih *instrumentationHelper) Setup( // If we need to collect stats, create a non-verbose child span. Stats // will be added as structured metadata and processed in Finish. ih.origCtx = ctx - newCtx, ih.sp = tracing.EnsureChildSpan(ctx, cfg.AmbientCtx.Tracer, "traced statement") + newCtx, ih.sp = tracing.EnsureChildSpan(ctx, cfg.AmbientCtx.Tracer, "traced statement", tracing.WithForceRealSpan()) return newCtx, true } return ctx, false diff --git a/pkg/sql/logictest/testdata/logic_test/contention_event b/pkg/sql/logictest/testdata/logic_test/contention_event index e5e458d2a2c9..306835097034 100644 --- a/pkg/sql/logictest/testdata/logic_test/contention_event +++ b/pkg/sql/logictest/testdata/logic_test/contention_event @@ -26,6 +26,9 @@ INSERT INTO kv VALUES('k', 'v') user root +statement ok +SET TRACING = on + # Scan all ranges of the table (note that we have intentionally # split it into at least six ranges). This is better than a point # lookup in that it gives tracing more of a chance to get things @@ -47,9 +50,6 @@ user root # # NB: the contention event is not in our trace span but in one of its # children, so it wouldn't be found if we filtered by the trace span ID. -# -# NB: this needs the 5node-pretend59315 config because otherwise the span is not -# tracked. query B WITH spans AS ( SELECT span_id diff --git a/pkg/sql/logictest/testdata/logic_test/inflight_trace_spans b/pkg/sql/logictest/testdata/logic_test/inflight_trace_spans index e11114f9b3d6..cbe1b280e2e4 100644 --- a/pkg/sql/logictest/testdata/logic_test/inflight_trace_spans +++ b/pkg/sql/logictest/testdata/logic_test/inflight_trace_spans @@ -16,6 +16,9 @@ user testuser statement ok BEGIN +statement ok +SET TRACING = on + let $curr_trace_id SELECT * FROM crdb_internal.trace_id() diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go index 045a93b56ddd..c3cd4d205e07 100644 --- a/pkg/sql/sem/builtins/builtins.go +++ b/pkg/sql/sem/builtins/builtins.go @@ -3634,7 +3634,12 @@ may increase either contention or retry errors, or both.`, if sp == nil { return tree.DNull, nil } - return tree.NewDInt(tree.DInt(sp.GetRecording()[0].TraceID)), nil + + traceID := sp.TraceID() + if traceID == 0 { + return tree.DNull, nil + } + return tree.NewDInt(tree.DInt(traceID)), nil }, Info: "Returns the current trace ID or an error if no trace is open.", // NB: possibly this is or could be made stable, but it's not worth it. diff --git a/pkg/sql/txn_state.go b/pkg/sql/txn_state.go index ed04da383866..0e0af21b2dd8 100644 --- a/pkg/sql/txn_state.go +++ b/pkg/sql/txn_state.go @@ -61,11 +61,12 @@ type txnState struct { // Ctx is the context for everything running in this SQL txn. // This is only set while the session's state is not stateNoTxn. + // + // It also embeds the tracing span associated with the SQL txn. These are + // often root spans, as SQL txns are frequently the level at which we do + // tracing. This context is hijacked when session tracing is enabled. Ctx context.Context - // sp is the span corresponding to the SQL txn. These are often root spans, as - // SQL txns are frequently the level at which we do tracing. - sp *tracing.Span // recordingThreshold, is not zero, indicates that sp is recording and that // the recording should be dumped to the log if execution of the transaction // took more than this. @@ -154,15 +155,22 @@ func (ts *txnState) resetForNewSQLTxn( // Create a context for this transaction. It will include a root span that // will contain everything executed as part of the upcoming SQL txn, including // (automatic or user-directed) retries. The span is closed by finishSQLTxn(). - // TODO(andrei): figure out how to close these spans on server shutdown? Ties - // into a larger discussion about how to drain SQL and rollback open txns. opName := sqlTxnName - txnCtx, sp := createRootOrChildSpan(connCtx, opName, tranCtx.tracer) + alreadyRecording := tranCtx.sessionTracing.Enabled() + + var txnCtx context.Context + var sp *tracing.Span + if alreadyRecording { + // WithForceRealSpan is used to support the use of session tracing, + // which will start recording on this span. + txnCtx, sp = createRootOrChildSpan(connCtx, opName, tranCtx.tracer, tracing.WithForceRealSpan()) + } else { + txnCtx, sp = createRootOrChildSpan(connCtx, opName, tranCtx.tracer) + } if txnType == implicitTxn { sp.SetTag("implicit", "true") } - alreadyRecording := tranCtx.sessionTracing.Enabled() duration := traceTxnThreshold.Get(&tranCtx.settings.SV) if !alreadyRecording && (duration > 0) { sp.SetVerbose(true) @@ -170,9 +178,7 @@ func (ts *txnState) resetForNewSQLTxn( ts.recordingStart = timeutil.Now() } - ts.sp = sp ts.Ctx, ts.cancel = contextutil.WithCancel(txnCtx) - ts.mon.Start(ts.Ctx, tranCtx.connMon, mon.BoundAccount{} /* reserved */) ts.mu.Lock() ts.mu.stmtCount = 0 @@ -207,16 +213,16 @@ func (ts *txnState) finishSQLTxn() { ts.cancel() ts.cancel = nil } - if ts.sp == nil { + sp := tracing.SpanFromContext(ts.Ctx) + if sp == nil { panic(errors.AssertionFailedf("No span in context? Was resetForNewSQLTxn() called previously?")) } if ts.recordingThreshold > 0 { - logTraceAboveThreshold(ts.Ctx, ts.sp.GetRecording(), "SQL txn", ts.recordingThreshold, timeutil.Since(ts.recordingStart)) + logTraceAboveThreshold(ts.Ctx, sp.GetRecording(), "SQL txn", ts.recordingThreshold, timeutil.Since(ts.recordingStart)) } - ts.sp.Finish() - ts.sp = nil + sp.Finish() ts.Ctx = nil ts.mu.Lock() ts.mu.txn = nil @@ -239,10 +245,12 @@ func (ts *txnState) finishExternalTxn() { ts.cancel() ts.cancel = nil } - if ts.sp != nil { - ts.sp.Finish() + + if ts.Ctx != nil { + if sp := tracing.SpanFromContext(ts.Ctx); sp != nil { + sp.Finish() + } } - ts.sp = nil ts.Ctx = nil ts.mu.Lock() ts.mu.txn = nil diff --git a/pkg/sql/txn_state_test.go b/pkg/sql/txn_state_test.go index 2dd9055e4c9c..324abf6c322b 100644 --- a/pkg/sql/txn_state_test.go +++ b/pkg/sql/txn_state_test.go @@ -97,7 +97,6 @@ func (tc *testContext) createOpenState(typ txnType) (fsm.State, *txnState) { ts := txnState{ Ctx: ctx, connCtx: tc.ctx, - sp: sp, cancel: cancel, sqlTimestamp: timeutil.Now(), priority: roachpb.NormalUserPriority, diff --git a/pkg/util/tracing/span.go b/pkg/util/tracing/span.go index 4f354eab4b4a..17f0c051a2bf 100644 --- a/pkg/util/tracing/span.go +++ b/pkg/util/tracing/span.go @@ -146,6 +146,8 @@ func (sp *Span) SetVerboseRecursively(to bool) { // ResetRecording clears any previously recorded information. This doesn't // affect any auxiliary trace sinks such as net/trace or zipkin. +// +// TODO(irfansharif): Remove this, it's no longer used. func (sp *Span) ResetRecording() { sp.i.ResetRecording() } diff --git a/pkg/util/tracing/span_inner.go b/pkg/util/tracing/span_inner.go index dcb22bf2d952..765bacc58ca4 100644 --- a/pkg/util/tracing/span_inner.go +++ b/pkg/util/tracing/span_inner.go @@ -38,6 +38,9 @@ type spanInner struct { } func (s *spanInner) TraceID() uint64 { + if s.isNoop() { + return 0 + } return s.crdb.traceID } @@ -73,6 +76,9 @@ func (s *spanInner) ResetRecording() { } func (s *spanInner) GetRecording() Recording { + if s.isNoop() { + return nil + } // If the span is not verbose, optimize by avoiding the tags. // This span is likely only used to carry payloads around. wantTags := s.crdb.recordingType() == RecordingVerbose