From a6558fd5e6757bff716943f95e30baeaa1e267d1 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Thu, 28 Oct 2021 17:45:15 -0400 Subject: [PATCH] sql: fix and simplify session tracing span mgmt Before this patch, session tracing was broken in two ways: - it leaked a span, corresponding to the transaction that was ongoing when session tracing is initiated. Session tracing was replacing that span in the connection's tracking, and so nobody was ever Finish()ing it. - it exposed a span to use-after-finish. This is the span that replaced the span discussed above. When session tracing is initiated in a transaction, it overwrote the txn's span. When session tracing was finished, it finished that span. But, in case tracing is finished in the same txn as the one it started in, then that span is still referenced by the connection. This patch fixes these issues and also simplifies how session tracing works. We do away with the special handling of the transaction during which tracing starts. Instead, we just give this transaction a span inheriting from the same recording span that future transactions will inherit from. The SessionTracing struct gets out of the business of owning any spans. Release note: None --- pkg/sql/exec_util.go | 75 ++++++++----------- .../testdata/logic_test/inflight_trace_spans | 3 + 2 files changed, 34 insertions(+), 44 deletions(-) diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 00648b78f880..c4d12249a064 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -1950,10 +1950,6 @@ type SessionTracing struct { // ex is the connExecutor to which this SessionTracing is tied. ex *connExecutor - // firstTxnSpan is the span of the first txn that was active when session - // tracing was enabled. It is finished and unset in StopTracing. - firstTxnSpan *tracing.Span - // connSpan is the connection's span. This is recording. It is finished and // unset in StopTracing. connSpan *tracing.Span @@ -1970,16 +1966,7 @@ func (st *SessionTracing) getSessionTrace() ([]traceRow, error) { return st.lastRecording, nil } - return generateSessionTraceVTable(st.getRecording()) -} - -// getRecording returns the recorded spans of the current trace. -func (st *SessionTracing) getRecording() []tracingpb.RecordedSpan { - var spans []tracingpb.RecordedSpan - if st.firstTxnSpan != nil { - spans = append(spans, st.firstTxnSpan.GetRecording()...) - } - return append(spans, st.connSpan.GetRecording()...) + return generateSessionTraceVTable(st.connSpan.GetRecording()) } // StartTracing starts "session tracing". From this moment on, everything @@ -2031,20 +2018,36 @@ func (st *SessionTracing) StartTracing( return nil } + // Hijack the conn's ctx with one that has a recording span. All future + // transactions will inherit from this span, so they'll all be recorded. + var newConnCtx context.Context + { + connCtx := st.ex.ctxHolder.connCtx + opName := "session recording" + newConnCtx, st.connSpan = tracing.EnsureChildSpan( + connCtx, + st.ex.server.cfg.AmbientCtx.Tracer, + opName, + tracing.WithForceRealSpan(), + ) + st.connSpan.SetVerbose(true) + st.ex.ctxHolder.hijack(newConnCtx) + } + // If we're inside a transaction, hijack the txn's ctx with one that has a - // recording span. The recording of this first transaction will be collected - // separately from the recording of future transactions. + // recording span. if _, ok := st.ex.machine.CurState().(stateNoTxn); !ok { txnCtx := st.ex.state.Ctx - if sp := tracing.SpanFromContext(txnCtx); sp == nil { + sp := tracing.SpanFromContext(txnCtx) + if sp == nil { return errors.Errorf("no txn span for SessionTracing") } + // We're hijacking this span and we're never going to un-hijack it, so it's + // up to us to finish it. + sp.Finish() - newTxnCtx, sp := tracing.EnsureChildSpan(txnCtx, st.ex.server.cfg.AmbientCtx.Tracer, - "session tracing", tracing.WithForceRealSpan()) - sp.SetVerbose(true) - st.ex.state.Ctx = newTxnCtx - st.firstTxnSpan = sp + st.ex.state.Ctx, _ = tracing.EnsureChildSpan( + newConnCtx, st.ex.server.cfg.AmbientCtx.Tracer, "session tracing") } st.enabled = true @@ -2052,20 +2055,6 @@ func (st *SessionTracing) StartTracing( st.showResults = showResults st.recordingType = recType - // Now hijack the conn's ctx with one that has a recording span. All future - // transactions will inherit from this span, so they'll all be recorded. - connCtx := st.ex.ctxHolder.connCtx - opName := "session recording" - newConnCtx, sp := tracing.EnsureChildSpan( - connCtx, - st.ex.server.cfg.AmbientCtx.Tracer, - opName, - tracing.WithForceRealSpan(), - ) - sp.SetVerbose(true) - st.connSpan = sp - st.ex.ctxHolder.hijack(newConnCtx) - return nil } @@ -2081,19 +2070,17 @@ func (st *SessionTracing) StopTracing() error { st.recordingType = tracing.RecordingOff // Accumulate all recordings and finish the tracing spans. - var spans []tracingpb.RecordedSpan - if st.firstTxnSpan != nil { - spans = append(spans, st.firstTxnSpan.GetRecording()...) - st.firstTxnSpan.Finish() - st.firstTxnSpan = nil - } - spans = append(spans, st.connSpan.GetRecording()...) + rec := st.connSpan.GetRecording() + // We're about to finish this span, but there might be a child that remains + // open - the child corresponding to the current transaction. We don't want + // that span to be recording any more. + st.connSpan.SetVerbose(false) st.connSpan.Finish() st.connSpan = nil st.ex.ctxHolder.unhijack() var err error - st.lastRecording, err = generateSessionTraceVTable(spans) + st.lastRecording, err = generateSessionTraceVTable(rec) return err } diff --git a/pkg/sql/logictest/testdata/logic_test/inflight_trace_spans b/pkg/sql/logictest/testdata/logic_test/inflight_trace_spans index cbe1b280e2e4..bd63f8404ea1 100644 --- a/pkg/sql/logictest/testdata/logic_test/inflight_trace_spans +++ b/pkg/sql/logictest/testdata/logic_test/inflight_trace_spans @@ -40,6 +40,9 @@ statement ok INSERT INTO kv VALUES('k', 'v'); COMMIT +statement ok +SET TRACING = off + # Confirm that the trace and its associated spans are no longer tracked. query B SELECT count(*) = 0