Skip to content

Commit

Permalink
tracing: Notify EventListeners while holding the span's mutex
Browse files Browse the repository at this point in the history
This commit addresses an oversight that was introduced in 6a3c0f9
where we made the notifications of the EventListeners done without
holding the span's mutex. However, the contract of `WithEventListeners`
implies that the callbacks are not called any more after `Finish()`
returns, so that commit introduced a bug that is now fixed in this
commit.

The fix done in this commit is suboptimal though - it introduces an
exported method `SetLazyTagLocked` which is not great. The proper fix
would be to refactor `TracingAggregator` to avoid setting the lazy tag
dynamically, in the event listener callback, but that seems like
a non-trivial change, and I'd like to get this commit backported. To
avoid introducing more callsites of this `SetLazyTagLocked` method it is
marked as "deprecated" and cockroachdb#100438 tracks addressing this piece of the
tech debt.

Release note: None
  • Loading branch information
yuzefovich committed Apr 3, 2023
1 parent 5a940f6 commit c120153
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 36 deletions.
1 change: 1 addition & 0 deletions build/bazelutil/nogo_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,7 @@
"pkg/server/dumpstore/dumpstore.go$": "excluded until all uses of io/ioutil are replaced",
"pkg/server/dumpstore/dumpstore_test.go$": "excluded until all uses of io/ioutil are replaced",
"pkg/server/profiler/profilestore_test.go$": "excluded until all uses of io/ioutil are replaced",
"pkg/util/bulk/tracing_aggregator.go$":"temporary exclusion until #100438 is resolved",
"pkg/util/log/file_api.go$": "excluded until all uses of io/ioutil are replaced",
"pkg/build/bazel/bazel\\.go$": "Runfile function deprecated"
},
Expand Down
2 changes: 0 additions & 2 deletions pkg/kv/bulk/bulkpb/ingestion_performance_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,5 +271,3 @@ type timing time.Duration

func (t timing) String() string { return time.Duration(t).Round(time.Second).String() }
func (t timing) SafeValue() {}

var _ bulk.TracingAggregatorEvent = &IngestionPerformanceStats{}
2 changes: 2 additions & 0 deletions pkg/testutils/lint/lint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1799,6 +1799,8 @@ func TestLint(t *testing.T) {
stream.GrepNot(`pkg/sql/row/expr_walker_test.go:.* evalCtx.SetDeprecatedContext is deprecated: .*`),
stream.GrepNot(`pkg/sql/schemachanger/scbuild/tree_context_builder.go:.* evalCtx.SetDeprecatedContext is deprecated: .*`),
stream.GrepNot(`pkg/sql/schemachanger/scplan/internal/rules/.*/.*go:.* should not use dot imports \(ST1001\)`),
// TODO(yuzefovich): remove this exclusion (#100438).
stream.GrepNot(`pkg/util/bulk/tracing_aggregator.go:.*SetLazyTagLocked is deprecated: .*`),
), func(s string) {
t.Errorf("\n%s", s)
}); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/bulk/tracing_aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func (b *TracingAggregator) Notify(event tracing.Structured) tracing.EventConsum
eventTag := bulkEvent.Tag()
if _, ok := b.mu.aggregatedEvents[bulkEvent.Tag()]; !ok {
b.mu.aggregatedEvents[eventTag] = bulkEvent.Identity()
b.mu.sp.SetLazyTag(eventTag, b.mu.aggregatedEvents[eventTag])
b.mu.sp.SetLazyTagLocked(eventTag, b.mu.aggregatedEvents[eventTag])
}
b.mu.aggregatedEvents[eventTag].Combine(bulkEvent)
return tracing.EventNotConsumed
Expand Down
15 changes: 7 additions & 8 deletions pkg/util/tracing/crdbspan.go
Original file line number Diff line number Diff line change
Expand Up @@ -962,28 +962,28 @@ func (s *crdbSpan) getLazyTagLocked(key string) (interface{}, bool) {
// convention between parents and children.
func (s *crdbSpan) notifyEventListeners(item Structured) {
s.mu.Lock()
var unlocked bool
defer func() {
if !unlocked {
s.mu.Unlock()
}
}()

// Check if the span has been finished concurrently with this notify call.
// This can happen when the signal comes from a child span; in that case the
// child calls into the parent without holding the child's lock, so the call
// can race with parent.Finish().
if s.mu.finished {
s.mu.Unlock()
return
}
s.mu.Unlock()

// Notify s' eventListeners first, before passing the event to parent.
//
// We can operate on s' eventListeners without holding the mutex because the
// slice is only written to once during span creation.
for _, listener := range s.eventListeners {
if listener.Notify(item) == EventConsumed {
return
}
}

s.mu.Lock()
if s.mu.recording.notifyParentOnStructuredEvent {
parent := s.mu.parent.Span.i.crdb
// Take a reference of s' parent before releasing the mutex. This ensures
Expand All @@ -992,9 +992,8 @@ func (s *crdbSpan) notifyEventListeners(item Structured) {
parentRef := makeSpanRef(s.mu.parent.Span)
defer parentRef.release()
s.mu.Unlock()
unlocked = true
parent.notifyEventListeners(item)
} else {
s.mu.Unlock()
}
}

Expand Down
16 changes: 14 additions & 2 deletions pkg/util/tracing/span.go
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,19 @@ func (sp *Span) SetLazyTag(key string, value interface{}) {
sp.i.SetLazyTag(key, value)
}

// SetLazyTagLocked is the same as SetLazyTag but assumes that the mutex of sp
// is being held.
//
// Deprecated: this method should not be used because it's introduced only to go
// around some tech debt (#100438). Once that issue is addressed, this method
// should be removed.
func (sp *Span) SetLazyTagLocked(key string, value interface{}) {
if sp.detectUseAfterFinish() {
return
}
sp.i.setLazyTagLocked(key, value)
}

// GetLazyTag returns the value of the tag with the given key. If that tag doesn't
// exist, the bool retval is false.
func (sp *Span) GetLazyTag(key string) (interface{}, bool) {
Expand All @@ -535,8 +548,7 @@ type EventListener interface {
// Notify is invoked on every Structured event recorded by the span and its
// children, recursively.
//
// Note that this method should not run for a long time as it will hold up the
// span recording Structured events during traced operations.
// The caller holds the mutex of the span.
//
// Notify will not be called concurrently on the same span.
Notify(event Structured) EventConsumptionStatus
Expand Down
8 changes: 8 additions & 0 deletions pkg/util/tracing/span_inner.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,14 @@ func (s *spanInner) SetLazyTag(key string, value interface{}) *spanInner {
return s
}

func (s *spanInner) setLazyTagLocked(key string, value interface{}) *spanInner {
if s.isNoop() {
return s
}
s.crdb.setLazyTagLocked(key, value)
return s
}

// GetLazyTag returns the value of the tag with the given key. If that tag doesn't
// exist, the bool retval is false.
func (s *spanInner) GetLazyTag(key string) (interface{}, bool) {
Expand Down
23 changes: 0 additions & 23 deletions pkg/util/tracing/span_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1310,16 +1310,12 @@ func TestWithRemoteParentFromTraceInfo(t *testing.T) {

type mockEventListener struct {
eventsSeen int
notifyImpl func()
// If set, then the events are "consumed" by this listener.
consuming bool
}

func (f *mockEventListener) Notify(_ Structured) EventConsumptionStatus {
f.eventsSeen++
if f.notifyImpl != nil {
f.notifyImpl()
}
if f.consuming {
return EventConsumed
}
Expand Down Expand Up @@ -1392,25 +1388,6 @@ func TestEventListener(t *testing.T) {
childSp2.Finish()
}

func TestEventListenerNotifiedWithoutHoldingSpanMutex(t *testing.T) {
tr := NewTracer()
rootEventListener := &mockEventListener{}
sp := tr.StartSpan("root", WithRecording(tracingpb.RecordingStructured),
WithEventListeners(rootEventListener))
defer sp.Finish()

// Set the EventListeners Notify() method to acquire the span's mutex.
rootEventListener.notifyImpl = func() {
sp.i.crdb.mu.Lock()
defer sp.i.crdb.mu.Unlock()
}

// Record a StructuredEvent, if Notify() were called holding the span's mutex
// this would deadlock.
sp.RecordStructured(&types.Int32Value{Value: 5})
require.Equal(t, 1, rootEventListener.eventsSeen)
}

// TestFinishedChildrenMetadata tests that on Finish() the parent span's
// `childrenMetadata` map captures all the children in the recording rooted at
// the finished span.
Expand Down

0 comments on commit c120153

Please sign in to comment.