Skip to content

Commit

Permalink
Fix Unix named mutex crash during some race conditions
Browse files Browse the repository at this point in the history
Below when I refer to "mutex" I'm referring to the underlying mutex object, not an instance of the `Mutex` class.
- When the last reference to a mutex is closed while the lock is held by some thread and a pthread mutex is used, the mutex was attempted to be destroyed but that has undefined behavior
- There doesn't seem to be a way to behave exactly like on Windows for this corner case, where the mutex is destroyed when the last reference to it is released, regardless of which process has the mutex locked and which process releases the last reference to it (they could be two different processes), including in cases of abrupt shutdown
- For this corner case I settled on what seems like a decent solution and compatible with older runtimes:
  - When a process releases its last reference to the mutex
    - If that mutex is locked by the same thread, the lock is abandoned and the process no longer references the mutex
    - If that mutex is locked by a different thread, the lifetime of the mutex is extended with an implicit ref. The implicit ref prevents this or other processes from attempting to destroy the mutex while it is locked. The implicit ref is removed in either of these cases:
      - The mutex gets another reference from within the same process
      - The thread that owns the lock exits and abandons the mutex, at which point that would be the last reference to the mutex and the process would not reference the mutex anymore
  - The implementation based on file locks is less restricted, but for consistency that implementation also follows the same behavior
- There was also a race between an exiting thread abandoning one of its locked named mutexes and another thread releasing the last reference to it, fixed by using the creation/deletion process lock to synchronize

Fix for dotnet#34271 in master
Closes dotnet#28449 - probably doesn't fix the issue, but trying to enable it to see if it continues to fail
  • Loading branch information
kouvel committed May 12, 2020
1 parent ef2ea8a commit 1149f02
Show file tree
Hide file tree
Showing 8 changed files with 462 additions and 89 deletions.
13 changes: 12 additions & 1 deletion src/coreclr/src/pal/src/include/pal/mutex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,14 +146,14 @@ class NamedMutexProcessData : public SharedMemoryProcessDataBase

private:
SharedMemoryProcessDataHeader *m_processDataHeader;
NamedMutexSharedData *m_sharedData;
SIZE_T m_lockCount;
#if !NAMED_MUTEX_USE_PTHREAD_MUTEX
HANDLE m_processLockHandle;
int m_sharedLockFileDescriptor;
#endif // !NAMED_MUTEX_USE_PTHREAD_MUTEX
CorUnix::CPalThread *m_lockOwnerThread;
NamedMutexProcessData *m_nextInThreadOwnedNamedMutexList;
bool m_hasRefFromLockOwnerThread;

public:
static SharedMemoryProcessDataHeader *CreateOrOpen(LPCSTR name, bool acquireLockIfCreated, bool *createdRef);
Expand All @@ -169,8 +169,19 @@ class NamedMutexProcessData : public SharedMemoryProcessDataBase
int sharedLockFileDescriptor
#endif // !NAMED_MUTEX_USE_PTHREAD_MUTEX
);

public:
virtual bool CanClose() const override;
virtual bool HasImplicitRef() const override;
virtual void SetHasImplicitRef(bool value) override;
virtual void Close(bool isAbruptShutdown, bool releaseSharedData) override;

public:
bool IsLockOwnedByCurrentThread() const
{
return GetSharedData()->IsLockOwnedByCurrentThread();
}

private:
NamedMutexSharedData *GetSharedData() const;
void SetLockOwnerThread(CorUnix::CPalThread *lockOwnerThread);
Expand Down
7 changes: 4 additions & 3 deletions src/coreclr/src/pal/src/include/pal/sharedmemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,10 @@ class SharedMemorySharedDataHeader
class SharedMemoryProcessDataBase
{
public:
virtual void Close(bool isAbruptShutdown, bool releaseSharedData)
{
}
virtual bool CanClose() const = 0;
virtual bool HasImplicitRef() const = 0;
virtual void SetHasImplicitRef(bool value) = 0;
virtual void Close(bool isAbruptShutdown, bool releaseSharedData) = 0;

virtual ~SharedMemoryProcessDataBase()
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/pal/src/include/pal/synchobjects.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ namespace CorUnix
Volatile<LONG> m_lSharedSynchLockCount;
LIST_ENTRY m_leOwnedObjsList;

CRITICAL_SECTION m_ownedNamedMutexListLock;
NamedMutexProcessData *m_ownedNamedMutexListHead;

ThreadNativeWaitData m_tnwdNativeData;
Expand Down Expand Up @@ -178,6 +177,7 @@ namespace CorUnix
void RemoveOwnedNamedMutex(NamedMutexProcessData *processData);
NamedMutexProcessData *RemoveFirstOwnedNamedMutex();
bool OwnsNamedMutex(NamedMutexProcessData *processData);
bool OwnsAnyNamedMutex() const;

// The following methods provide access to the native wait lock for
// those implementations that need a lock to protect the support for
Expand Down
23 changes: 20 additions & 3 deletions src/coreclr/src/pal/src/sharedmemory/sharedmemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,7 @@ void SharedMemoryProcessDataHeader::Close()
// nonzero, don't clean up any object or global process-local state.
if (m_refCount == 0)
{
_ASSERTE(m_data == nullptr || m_data->CanClose());
SharedMemoryManager::RemoveProcessDataHeader(this);
}

Expand Down Expand Up @@ -1015,18 +1016,34 @@ void SharedMemoryProcessDataHeader::IncRefCount()
_ASSERTE(SharedMemoryManager::IsCreationDeletionProcessLockAcquired());
_ASSERTE(m_refCount != 0);

++m_refCount;
if (++m_refCount == 2 && m_data != nullptr && m_data->HasImplicitRef())
{
// The synchronization object got an explicit ref that will govern its lifetime, remove the implicit ref
--m_refCount;
m_data->SetHasImplicitRef(false);
}
}

void SharedMemoryProcessDataHeader::DecRefCount()
{
_ASSERTE(SharedMemoryManager::IsCreationDeletionProcessLockAcquired());
_ASSERTE(m_refCount != 0);

if (--m_refCount == 0)
if (--m_refCount != 0)
{
return;
}

if (m_data != nullptr && !m_data->CanClose())
{
InternalDelete(this);
// Extend the lifetime of the synchronization object. The process data object is responsible for removing this extra ref
// when the synchronization object transitions into a state where it can be closed.
++m_refCount;
m_data->SetHasImplicitRef(true);
return;
}

InternalDelete(this);
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
65 changes: 45 additions & 20 deletions src/coreclr/src/pal/src/synchmgr/synchmanager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,17 @@ namespace CorUnix
CThreadSynchronizationInfo * pSynchInfo = &pthrTarget->synchronizationInfo;
CPalSynchronizationManager * pSynchManager = GetInstance();

// The shared memory manager's process lock is acquired before calling into some PAL synchronization primitives that may
// take the PAL synchronization manager's synch lock (acquired below). For example, when using a file lock
// implementation for a named mutex (see NamedMutexProcessData::NamedMutexProcessData()), under the shared memory
// manager's process lock, CreateMutex is called, which acquires the PAL synchronization manager's synch lock. The same
// lock order needs to be maintained here to avoid a deadlock.
bool abandonNamedMutexes = pSynchInfo->OwnsAnyNamedMutex();
if (abandonNamedMutexes)
{
SharedMemoryManager::AcquireCreationDeletionProcessLock();
}

// Local lock
AcquireLocalSynchLock(pthrCurrent);

Expand Down Expand Up @@ -610,15 +621,18 @@ namespace CorUnix
pSynchManager->m_cacheOwnedObjectsListNodes.Add(pthrCurrent, poolnItem);
}

// Abandon owned named mutexes
while (true)
if (abandonNamedMutexes)
{
NamedMutexProcessData *processData = pSynchInfo->RemoveFirstOwnedNamedMutex();
if (processData == nullptr)
// Abandon owned named mutexes
while (true)
{
break;
NamedMutexProcessData *processData = pSynchInfo->RemoveFirstOwnedNamedMutex();
if (processData == nullptr)
{
break;
}
processData->Abandon();
}
processData->Abandon();
}

if (pthrTarget != pthrCurrent)
Expand Down Expand Up @@ -660,6 +674,12 @@ namespace CorUnix
}

ReleaseLocalSynchLock(pthrCurrent);

if (abandonNamedMutexes)
{
SharedMemoryManager::ReleaseCreationDeletionProcessLock();
}

DiscardAllPendingAPCs(pthrCurrent, pthrTarget);

return palErr;
Expand Down Expand Up @@ -4036,7 +4056,6 @@ namespace CorUnix
m_ownedNamedMutexListHead(nullptr)
{
InitializeListHead(&m_leOwnedObjsList);
InitializeCriticalSection(&m_ownedNamedMutexListLock);

#ifdef SYNCHMGR_SUSPENSION_SAFE_CONDITION_SIGNALING
m_lPendingSignalingCount = 0;
Expand All @@ -4046,7 +4065,6 @@ namespace CorUnix

CThreadSynchronizationInfo::~CThreadSynchronizationInfo()
{
DeleteCriticalSection(&m_ownedNamedMutexListLock);
if (NULL != m_shridWaitAwakened)
{
free(m_shridWaitAwakened);
Expand Down Expand Up @@ -4283,20 +4301,21 @@ namespace CorUnix

void CThreadSynchronizationInfo::AddOwnedNamedMutex(NamedMutexProcessData *processData)
{
_ASSERTE(this == &GetCurrentPalThread()->synchronizationInfo);
_ASSERTE(processData != nullptr);
_ASSERTE(processData->IsLockOwnedByCurrentThread());
_ASSERTE(processData->GetNextInThreadOwnedNamedMutexList() == nullptr);

EnterCriticalSection(&m_ownedNamedMutexListLock);
processData->SetNextInThreadOwnedNamedMutexList(m_ownedNamedMutexListHead);
m_ownedNamedMutexListHead = processData;
LeaveCriticalSection(&m_ownedNamedMutexListLock);
}

void CThreadSynchronizationInfo::RemoveOwnedNamedMutex(NamedMutexProcessData *processData)
{
_ASSERTE(this == &GetCurrentPalThread()->synchronizationInfo);
_ASSERTE(processData != nullptr);
_ASSERTE(processData->IsLockOwnedByCurrentThread());

EnterCriticalSection(&m_ownedNamedMutexListLock);
if (m_ownedNamedMutexListHead == processData)
{
m_ownedNamedMutexListHead = processData->GetNextInThreadOwnedNamedMutexList();
Expand All @@ -4321,38 +4340,44 @@ namespace CorUnix
}
_ASSERTE(found);
}
LeaveCriticalSection(&m_ownedNamedMutexListLock);
}

NamedMutexProcessData *CThreadSynchronizationInfo::RemoveFirstOwnedNamedMutex()
{
EnterCriticalSection(&m_ownedNamedMutexListLock);
_ASSERTE(this == &GetCurrentPalThread()->synchronizationInfo);

NamedMutexProcessData *processData = m_ownedNamedMutexListHead;
if (processData != nullptr)
{
_ASSERTE(processData->IsLockOwnedByCurrentThread());
m_ownedNamedMutexListHead = processData->GetNextInThreadOwnedNamedMutexList();
processData->SetNextInThreadOwnedNamedMutexList(nullptr);
}
LeaveCriticalSection(&m_ownedNamedMutexListLock);
return processData;
}

bool CThreadSynchronizationInfo::OwnsNamedMutex(NamedMutexProcessData *processData)
{
EnterCriticalSection(&m_ownedNamedMutexListLock);
bool found = false;
_ASSERTE(this == &GetCurrentPalThread()->synchronizationInfo);

for (NamedMutexProcessData *current = m_ownedNamedMutexListHead;
current != nullptr;
current = current->GetNextInThreadOwnedNamedMutexList())
{
_ASSERTE(current->IsLockOwnedByCurrentThread());
if (current == processData)
{
found = true;
break;
return true;
}
}
LeaveCriticalSection(&m_ownedNamedMutexListLock);
return found;

return false;
}

bool CThreadSynchronizationInfo::OwnsAnyNamedMutex() const
{
_ASSERTE(this == &GetCurrentPalThread()->synchronizationInfo);
return m_ownedNamedMutexListHead != nullptr;
}

#if SYNCHMGR_SUSPENSION_SAFE_CONDITION_SIGNALING
Expand Down
Loading

0 comments on commit 1149f02

Please sign in to comment.