Skip to content

Commit

Permalink
Fixed race condition between StartETW() and StopETW() (#22664)
Browse files Browse the repository at this point in the history
Fixed race condition between StartETW() and StopETW()
  • Loading branch information
iglendd authored and Julio-Guerra committed Feb 8, 2024
1 parent 409b3d4 commit 621c8db
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 7 deletions.
54 changes: 47 additions & 7 deletions pkg/util/winutil/etw/etw.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
package etw

import (
"sync"
"syscall"
"unsafe"
)
Expand All @@ -27,7 +28,8 @@ type Subscriber interface {
}

var (
subscribers = make(map[ProviderType]Subscriber)
subscribers = make(map[ProviderType]Subscriber)
subscribersMutex sync.Mutex
)

// ProviderType identifies an ETW provider
Expand Down Expand Up @@ -72,10 +74,45 @@ func isHTTPServiceSubscriptionEnabled(etwProviders ProviderType) bool {
return (etwProviders & EtwProviderHTTPService) == EtwProviderHTTPService
}

func addToSubscriber(etwProviders ProviderType, sub Subscriber) {
subscribersMutex.Lock()
defer subscribersMutex.Unlock()

subscribers[etwProviders] = sub
}

func getSubscribers() []Subscriber {
subscribersMutex.Lock()
defer subscribersMutex.Unlock()

var subs []Subscriber
for _, s := range subscribers {
subs = append(subs, s)
}

return subs
}

func deleteSubscribers() {
subscribersMutex.Lock()
defer subscribersMutex.Unlock()

subscribers = make(map[ProviderType]Subscriber)
}

//export etwCallbackC
func etwCallbackC(eventInfo *C.DD_ETW_EVENT_INFO) {
switch eventInfo.provider {
case C.DD_ETW_TRACE_PROVIDER_HttpService:
// This function needs to be as fast as possible because HTTP ETW providers send a
// very high volume of events so we don't want to block the ETW thread. On the other
// hand it is safe to access global map here because system invokes this function
// but only if ETW session is not closed, which means that while a call to
// C.StartEtwSubscription() from StartEtw() is in progress, this function is safe
// to access global map and reversely when before and after a call to
// C.StartEtwSubscription(), this function will not be invoked. It is also safe to
// presume that this function will not be invoked after a call to
// C.StopEtwSubscription() initiated from StopEtw() is completed.
if sub, ok := subscribers[EtwProviderHTTPService]; ok {
sub.OnEvent((*DDEtwEventInfo)(unsafe.Pointer(eventInfo)))
}
Expand All @@ -93,18 +130,21 @@ func etwCallbackC(eventInfo *C.DD_ETW_EVENT_INFO) {
func StartEtw(subscriptionName string, etwProviders ProviderType, sub Subscriber) error {

if isHTTPServiceSubscriptionEnabled(etwProviders) {
subscribers[etwProviders] = sub
addToSubscriber(etwProviders, sub)
sub.OnStart()
}

// This call is blocking and will not return until the ETW session is stopped
// or an error occurs. This is by design. There is a race condition between
// this function and StopEtw() which will unblock C.StartEtwSubscription().
ret := C.StartEtwSubscription(
C.CString(subscriptionName),
providersToNativeProviders(etwProviders),
flagsToNativeFlags(0),
(C.ETW_EVENT_CALLBACK)(unsafe.Pointer(C.etwCallbackC)))

if isHTTPServiceSubscriptionEnabled(etwProviders) {
delete(subscribers, etwProviders)
deleteSubscribers()
sub.OnStop()

}
Expand All @@ -120,12 +160,12 @@ func StartEtw(subscriptionName string, etwProviders ProviderType, sub Subscriber
//
// See above note about http-centrism
func StopEtw(subscriptionName string) {
subs := getSubscribers()

if len(subscribers) != 0 {
C.StopEtwSubscription()

if sub, ok := subscribers[EtwProviderHTTPService]; ok {
sub.OnStop()
for _, s := range subs {
s.OnStop()
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Each section from every release note are combined when the
# CHANGELOG.rst is rendered. So the text needs to be worded so that
# it does not depend on any information only available in another
# section. This may mean repeating some details, but each section
# must be readable independently of the other.
#
# Each section note must be formatted as reStructuredText.
---
fixes:
- |
Fixed race conditions caused by concurrent execution of etw.StartEtw()
and etw.StopEtw() functions which may concurrently access and modify a
global map.

0 comments on commit 621c8db

Please sign in to comment.