-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[exporterhelper] Fix potential deadlocks in BatcherSender shutdown #10258
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Use this changelog template to create an entry for release notes. | ||
|
||
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' | ||
change_type: bug_fix | ||
|
||
# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver) | ||
component: exporterhelper | ||
|
||
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). | ||
note: Fix potential deadlocks in BatcherSender shutdown | ||
|
||
# One or more tracking issues or pull requests related to the change | ||
issues: [10255] | ||
|
||
# Optional: The change log or logs in which this entry should be included. | ||
# e.g. '[user]' or '[user, api]' | ||
# Include 'user' if the change is relevant to end users. | ||
# Include 'api' if there is a change to a library API. | ||
# Default: '[user]' | ||
change_logs: [user] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -439,6 +439,63 @@ func TestBatchSender_WithBatcherOption(t *testing.T) { | |
} | ||
} | ||
|
||
// TestBatchSender_ShutdownDeadlock tests that the exporter does not deadlock when shutting down while a batch is being | ||
// merged. | ||
func TestBatchSender_ShutdownDeadlock(t *testing.T) { | ||
blockMerge := make(chan struct{}) | ||
waitMerge := make(chan struct{}, 10) | ||
|
||
// blockedBatchMergeFunc blocks until the blockMerge channel is closed | ||
blockedBatchMergeFunc := func(_ context.Context, r1 Request, _ Request) (Request, error) { | ||
waitMerge <- struct{}{} | ||
<-blockMerge | ||
return r1, nil | ||
} | ||
|
||
bCfg := exporterbatcher.NewDefaultConfig() | ||
bCfg.FlushTimeout = 10 * time.Minute // high timeout to avoid the timeout to trigger | ||
be, err := newBaseExporter(defaultSettings, defaultDataType, newNoopObsrepSender, | ||
WithBatcher(bCfg, WithRequestBatchFuncs(blockedBatchMergeFunc, fakeBatchMergeSplitFunc))) | ||
require.NoError(t, err) | ||
require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) | ||
|
||
sink := newFakeRequestSink() | ||
|
||
// Send 10 concurrent requests and wait for them to start | ||
startWG := sync.WaitGroup{} | ||
for i := 0; i < 10; i++ { | ||
startWG.Add(1) | ||
go func() { | ||
startWG.Done() | ||
require.NoError(t, be.send(context.Background(), &fakeRequest{items: 4, sink: sink})) | ||
}() | ||
} | ||
startWG.Wait() | ||
|
||
// Wait for at least one batch to enter the merge function | ||
<-waitMerge | ||
|
||
// Initiate the exporter shutdown, unblock the batch merge function to catch possible deadlocks, | ||
// then wait for the exporter to finish. | ||
startShutdown := make(chan struct{}) | ||
doneShutdown := make(chan struct{}) | ||
go func() { | ||
close(startShutdown) | ||
require.Nil(t, be.Shutdown(context.Background())) | ||
close(doneShutdown) | ||
}() | ||
<-startShutdown | ||
close(blockMerge) | ||
<-doneShutdown | ||
|
||
// The exporter should have sent only one "merged" batch, in some cases it might send two if the shutdown | ||
// happens before the batch is fully merged. | ||
assert.LessOrEqual(t, uint64(1), sink.requestsCount.Load()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it valid to send two batches in some cases, as your comment is referencing? If so, should the assert here be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's usually 1 or more in rare cases. LessOrEqual checks that the first argument is less or equal to the second one |
||
|
||
// blockedBatchMergeFunc just returns the first request, so the items count should be 4 times the requests count. | ||
assert.Equal(t, sink.requestsCount.Load()*4, sink.itemsCount.Load()) | ||
} | ||
|
||
func queueBatchExporter(t *testing.T, batchOption Option) *baseExporter { | ||
be, err := newBaseExporter(defaultSettings, defaultDataType, newNoopObsrepSender, batchOption, | ||
WithRequestQueue(exporterqueue.NewDefaultConfig(), exporterqueue.NewMemoryQueueFactory[Request]())) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be after the
be.send
call, right? Maybe use defer instead? Or are you purposefully wanting to continue in the main goroutine before send is complete?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, it should be before send. We need to ensure all the goroutines are started. The send is blocking
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The goroutines will all be started regardless of where the
startWG.Done()
call is made, if I'm following correctly. The only impact is when the main goroutine will continue after thestartWG.Wait()
call. I'm wondering if it should wait until all sends are complete or not?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the send operations cannot complete until they are unblocked, which we do after the shutdown - that's the purpose of the test case
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, thanks for clarifying 👍