Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed race condition between StartETW() and StopETW() #22664

Merged
merged 3 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 47 additions & 7 deletions pkg/util/winutil/etw/etw.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
package etw

import (
"sync"
"syscall"
"unsafe"
)
Expand All @@ -27,7 +28,8 @@ type Subscriber interface {
}

var (
subscribers = make(map[ProviderType]Subscriber)
subscribers = make(map[ProviderType]Subscriber)
subscribersMutex sync.Mutex
)

// ProviderType identifies an ETW provider
Expand Down Expand Up @@ -72,10 +74,45 @@ func isHTTPServiceSubscriptionEnabled(etwProviders ProviderType) bool {
return (etwProviders & EtwProviderHTTPService) == EtwProviderHTTPService
}

func addToSubscriber(etwProviders ProviderType, sub Subscriber) {
subscribersMutex.Lock()
defer subscribersMutex.Unlock()

subscribers[etwProviders] = sub
}

func getSubscribers() []Subscriber {
subscribersMutex.Lock()
defer subscribersMutex.Unlock()

var subs []Subscriber
for _, s := range subscribers {
subs = append(subs, s)
}

return subs
}

func deleteSubscribers() {
subscribersMutex.Lock()
defer subscribersMutex.Unlock()

subscribers = make(map[ProviderType]Subscriber)
}

//export etwCallbackC
func etwCallbackC(eventInfo *C.DD_ETW_EVENT_INFO) {
switch eventInfo.provider {
case C.DD_ETW_TRACE_PROVIDER_HttpService:
// This function needs to be as fast as possible because HTTP ETW providers send a
// very high volume of events so we don't want to block the ETW thread. On the other
// hand it is safe to access global map here because system invokes this function
// but only if ETW session is not closed, which means that while a call to
// C.StartEtwSubscription() from StartEtw() is in progress, this function is safe
// to access global map and reversely when before and after a call to
// C.StartEtwSubscription(), this function will not be invoked. It is also safe to
// presume that this function will not be invoked after a call to
// C.StopEtwSubscription() initiated from StopEtw() is completed.
if sub, ok := subscribers[EtwProviderHTTPService]; ok {
sub.OnEvent((*DDEtwEventInfo)(unsafe.Pointer(eventInfo)))
}
Expand All @@ -93,18 +130,21 @@ func etwCallbackC(eventInfo *C.DD_ETW_EVENT_INFO) {
func StartEtw(subscriptionName string, etwProviders ProviderType, sub Subscriber) error {

if isHTTPServiceSubscriptionEnabled(etwProviders) {
subscribers[etwProviders] = sub
addToSubscriber(etwProviders, sub)
sub.OnStart()
}

// This call is blocking and will not return until the ETW session is stopped
// or an error occurs. This is by design. There is a race condition between
// this function and StopEtw() which will unblock C.StartEtwSubscription().
ret := C.StartEtwSubscription(
C.CString(subscriptionName),
providersToNativeProviders(etwProviders),
flagsToNativeFlags(0),
(C.ETW_EVENT_CALLBACK)(unsafe.Pointer(C.etwCallbackC)))

if isHTTPServiceSubscriptionEnabled(etwProviders) {
delete(subscribers, etwProviders)
deleteSubscribers()
sub.OnStop()

}
Expand All @@ -120,12 +160,12 @@ func StartEtw(subscriptionName string, etwProviders ProviderType, sub Subscriber
//
// See above note about http-centrism
func StopEtw(subscriptionName string) {
subs := getSubscribers()

if len(subscribers) != 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❓ question
Should this be len(subs) ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes!!! Thank you, creating another PR

C.StopEtwSubscription()

if sub, ok := subscribers[EtwProviderHTTPService]; ok {
sub.OnStop()
for _, s := range subs {
s.OnStop()
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Each section from every release note are combined when the
# CHANGELOG.rst is rendered. So the text needs to be worded so that
# it does not depend on any information only available in another
# section. This may mean repeating some details, but each section
# must be readable independently of the other.
#
# Each section note must be formatted as reStructuredText.
---
fixes:
- |
Fixed race conditions caused by concurrent execution of etw.StartEtw()
and etw.StopEtw() functions which may concurrently access and modify a
global map.
Loading