From c120153ebf434362c51b12c1c56ec17cc676f777 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Sun, 2 Apr 2023 14:34:59 -0700 Subject: [PATCH] tracing: Notify EventListeners while holding the span's mutex This commit addresses an oversight that was introduced in 6a3c0f9207eda2d136046a367049160f0386cd8b where we made the notifications of the EventListeners done without holding the span's mutex. However, the contract of `WithEventListeners` implies that the callbacks are not called any more after `Finish()` returns, so that commit introduced a bug that is now fixed in this commit. The fix done in this commit is suboptimal though - it introduces an exported method `SetLazyTagLocked` which is not great. The proper fix would be to refactor `TracingAggregator` to avoid setting the lazy tag dynamically, in the event listener callback, but that seems like a non-trivial change, and I'd like to get this commit backported. To avoid introducing more callsites of this `SetLazyTagLocked` method it is marked as "deprecated" and #100438 tracks addressing this piece of the tech debt. Release note: None --- build/bazelutil/nogo_config.json | 1 + .../bulkpb/ingestion_performance_stats.go | 2 -- pkg/testutils/lint/lint_test.go | 2 ++ pkg/util/bulk/tracing_aggregator.go | 2 +- pkg/util/tracing/crdbspan.go | 15 ++++++------ pkg/util/tracing/span.go | 16 +++++++++++-- pkg/util/tracing/span_inner.go | 8 +++++++ pkg/util/tracing/span_test.go | 23 ------------------- 8 files changed, 33 insertions(+), 36 deletions(-) diff --git a/build/bazelutil/nogo_config.json b/build/bazelutil/nogo_config.json index c99487961f61..2ff3ce5bb6df 100644 --- a/build/bazelutil/nogo_config.json +++ b/build/bazelutil/nogo_config.json @@ -740,6 +740,7 @@ "pkg/server/dumpstore/dumpstore.go$": "excluded until all uses of io/ioutil are replaced", "pkg/server/dumpstore/dumpstore_test.go$": "excluded until all uses of io/ioutil are replaced", "pkg/server/profiler/profilestore_test.go$": "excluded until all uses of io/ioutil are replaced", + "pkg/util/bulk/tracing_aggregator.go$":"temporary exclusion until #100438 is resolved", "pkg/util/log/file_api.go$": "excluded until all uses of io/ioutil are replaced", "pkg/build/bazel/bazel\\.go$": "Runfile function deprecated" }, diff --git a/pkg/kv/bulk/bulkpb/ingestion_performance_stats.go b/pkg/kv/bulk/bulkpb/ingestion_performance_stats.go index d0261a5ba5b3..3e5d5c8308ba 100644 --- a/pkg/kv/bulk/bulkpb/ingestion_performance_stats.go +++ b/pkg/kv/bulk/bulkpb/ingestion_performance_stats.go @@ -271,5 +271,3 @@ type timing time.Duration func (t timing) String() string { return time.Duration(t).Round(time.Second).String() } func (t timing) SafeValue() {} - -var _ bulk.TracingAggregatorEvent = &IngestionPerformanceStats{} diff --git a/pkg/testutils/lint/lint_test.go b/pkg/testutils/lint/lint_test.go index 689a97571f5d..0487c2f1753e 100644 --- a/pkg/testutils/lint/lint_test.go +++ b/pkg/testutils/lint/lint_test.go @@ -1799,6 +1799,8 @@ func TestLint(t *testing.T) { stream.GrepNot(`pkg/sql/row/expr_walker_test.go:.* evalCtx.SetDeprecatedContext is deprecated: .*`), stream.GrepNot(`pkg/sql/schemachanger/scbuild/tree_context_builder.go:.* evalCtx.SetDeprecatedContext is deprecated: .*`), stream.GrepNot(`pkg/sql/schemachanger/scplan/internal/rules/.*/.*go:.* should not use dot imports \(ST1001\)`), + // TODO(yuzefovich): remove this exclusion (#100438). + stream.GrepNot(`pkg/util/bulk/tracing_aggregator.go:.*SetLazyTagLocked is deprecated: .*`), ), func(s string) { t.Errorf("\n%s", s) }); err != nil { diff --git a/pkg/util/bulk/tracing_aggregator.go b/pkg/util/bulk/tracing_aggregator.go index 3b9b884e540d..c5b3aa5c7d94 100644 --- a/pkg/util/bulk/tracing_aggregator.go +++ b/pkg/util/bulk/tracing_aggregator.go @@ -62,7 +62,7 @@ func (b *TracingAggregator) Notify(event tracing.Structured) tracing.EventConsum eventTag := bulkEvent.Tag() if _, ok := b.mu.aggregatedEvents[bulkEvent.Tag()]; !ok { b.mu.aggregatedEvents[eventTag] = bulkEvent.Identity() - b.mu.sp.SetLazyTag(eventTag, b.mu.aggregatedEvents[eventTag]) + b.mu.sp.SetLazyTagLocked(eventTag, b.mu.aggregatedEvents[eventTag]) } b.mu.aggregatedEvents[eventTag].Combine(bulkEvent) return tracing.EventNotConsumed diff --git a/pkg/util/tracing/crdbspan.go b/pkg/util/tracing/crdbspan.go index c013f151ddd9..d3271fd7aaac 100644 --- a/pkg/util/tracing/crdbspan.go +++ b/pkg/util/tracing/crdbspan.go @@ -962,28 +962,28 @@ func (s *crdbSpan) getLazyTagLocked(key string) (interface{}, bool) { // convention between parents and children. func (s *crdbSpan) notifyEventListeners(item Structured) { s.mu.Lock() + var unlocked bool + defer func() { + if !unlocked { + s.mu.Unlock() + } + }() // Check if the span has been finished concurrently with this notify call. // This can happen when the signal comes from a child span; in that case the // child calls into the parent without holding the child's lock, so the call // can race with parent.Finish(). if s.mu.finished { - s.mu.Unlock() return } - s.mu.Unlock() // Notify s' eventListeners first, before passing the event to parent. - // - // We can operate on s' eventListeners without holding the mutex because the - // slice is only written to once during span creation. for _, listener := range s.eventListeners { if listener.Notify(item) == EventConsumed { return } } - s.mu.Lock() if s.mu.recording.notifyParentOnStructuredEvent { parent := s.mu.parent.Span.i.crdb // Take a reference of s' parent before releasing the mutex. This ensures @@ -992,9 +992,8 @@ func (s *crdbSpan) notifyEventListeners(item Structured) { parentRef := makeSpanRef(s.mu.parent.Span) defer parentRef.release() s.mu.Unlock() + unlocked = true parent.notifyEventListeners(item) - } else { - s.mu.Unlock() } } diff --git a/pkg/util/tracing/span.go b/pkg/util/tracing/span.go index 3024440c9e42..db4724f6e12b 100644 --- a/pkg/util/tracing/span.go +++ b/pkg/util/tracing/span.go @@ -520,6 +520,19 @@ func (sp *Span) SetLazyTag(key string, value interface{}) { sp.i.SetLazyTag(key, value) } +// SetLazyTagLocked is the same as SetLazyTag but assumes that the mutex of sp +// is being held. +// +// Deprecated: this method should not be used because it's introduced only to go +// around some tech debt (#100438). Once that issue is addressed, this method +// should be removed. +func (sp *Span) SetLazyTagLocked(key string, value interface{}) { + if sp.detectUseAfterFinish() { + return + } + sp.i.setLazyTagLocked(key, value) +} + // GetLazyTag returns the value of the tag with the given key. If that tag doesn't // exist, the bool retval is false. func (sp *Span) GetLazyTag(key string) (interface{}, bool) { @@ -535,8 +548,7 @@ type EventListener interface { // Notify is invoked on every Structured event recorded by the span and its // children, recursively. // - // Note that this method should not run for a long time as it will hold up the - // span recording Structured events during traced operations. + // The caller holds the mutex of the span. // // Notify will not be called concurrently on the same span. Notify(event Structured) EventConsumptionStatus diff --git a/pkg/util/tracing/span_inner.go b/pkg/util/tracing/span_inner.go index d5714d27dc29..38369d7f74a3 100644 --- a/pkg/util/tracing/span_inner.go +++ b/pkg/util/tracing/span_inner.go @@ -241,6 +241,14 @@ func (s *spanInner) SetLazyTag(key string, value interface{}) *spanInner { return s } +func (s *spanInner) setLazyTagLocked(key string, value interface{}) *spanInner { + if s.isNoop() { + return s + } + s.crdb.setLazyTagLocked(key, value) + return s +} + // GetLazyTag returns the value of the tag with the given key. If that tag doesn't // exist, the bool retval is false. func (s *spanInner) GetLazyTag(key string) (interface{}, bool) { diff --git a/pkg/util/tracing/span_test.go b/pkg/util/tracing/span_test.go index f215f6167bcf..1e623ca041d2 100644 --- a/pkg/util/tracing/span_test.go +++ b/pkg/util/tracing/span_test.go @@ -1310,16 +1310,12 @@ func TestWithRemoteParentFromTraceInfo(t *testing.T) { type mockEventListener struct { eventsSeen int - notifyImpl func() // If set, then the events are "consumed" by this listener. consuming bool } func (f *mockEventListener) Notify(_ Structured) EventConsumptionStatus { f.eventsSeen++ - if f.notifyImpl != nil { - f.notifyImpl() - } if f.consuming { return EventConsumed } @@ -1392,25 +1388,6 @@ func TestEventListener(t *testing.T) { childSp2.Finish() } -func TestEventListenerNotifiedWithoutHoldingSpanMutex(t *testing.T) { - tr := NewTracer() - rootEventListener := &mockEventListener{} - sp := tr.StartSpan("root", WithRecording(tracingpb.RecordingStructured), - WithEventListeners(rootEventListener)) - defer sp.Finish() - - // Set the EventListeners Notify() method to acquire the span's mutex. - rootEventListener.notifyImpl = func() { - sp.i.crdb.mu.Lock() - defer sp.i.crdb.mu.Unlock() - } - - // Record a StructuredEvent, if Notify() were called holding the span's mutex - // this would deadlock. - sp.RecordStructured(&types.Int32Value{Value: 5}) - require.Equal(t, 1, rootEventListener.eventsSeen) -} - // TestFinishedChildrenMetadata tests that on Finish() the parent span's // `childrenMetadata` map captures all the children in the recording rooted at // the finished span.