Skip to content

Commit

Permalink
Use _mm_pause, load loop, and SRLWOCK
Browse files Browse the repository at this point in the history
Resolves microsoft#370 , resolves microsoft#680
  • Loading branch information
AlexGuteniev committed Aug 4, 2020
1 parent b74b618 commit c139abc
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 20 deletions.
6 changes: 4 additions & 2 deletions stl/inc/atomic
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,10 @@ void _Atomic_wait_direct(

#if 1 // TRANSITION, ABI
inline void _Atomic_lock_spinlock(long& _Spinlock) noexcept {
while (_InterlockedExchange(&_Spinlock, 1)) {
_YIELD_PROCESSOR();
while (_InterlockedExchange(&_Spinlock, 1) != 0) {
while (__iso_volatile_load32(&reinterpret_cast<int&>(_Spinlock)) != 0) {
_YIELD_PROCESSOR();
}
}
}

Expand Down
6 changes: 6 additions & 0 deletions stl/inc/xatomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,13 @@ _STL_DISABLE_CLANG_WARNINGS
#define _INTRIN_ACQUIRE(x) x
#define _INTRIN_RELEASE(x) x
#define _INTRIN_ACQ_REL(x) x
#ifdef _M_CEE_PURE
#define _YIELD_PROCESSOR()
#else // ^^^ _M_CEE_PURE / !_M_CEE_PURE vvv
extern "C" __MACHINEX86_X64(void _mm_pause(void))
#define _YIELD_PROCESSOR() _mm_pause()
#endif // ^^^ !_M_CEE_PURE ^^^


#elif defined(_M_ARM) || defined(_M_ARM64)
#define _INTRIN_RELAXED(x) _CONCAT(x, _nf)
Expand Down
90 changes: 72 additions & 18 deletions stl/src/atomic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,87 @@
// implement shared_ptr spin lock

#include <yvals.h>

#include <atomic>
#include <intrin.h>
#pragma warning(disable : 4793)
#include <libloaderapi.h>
#include <synchapi.h>

_EXTERN_C
#pragma warning(disable : 4793) // '%s' : function is compiled as native code

// SPIN LOCK FOR shared_ptr ATOMIC OPERATIONS
volatile long _Shared_ptr_flag;
namespace {
// MUTEX FOR shared_ptr ATOMIC OPERATIONS
SRWLOCK _Shared_ptr_lock = SRWLOCK_INIT;

_CRTIMP2_PURE void __cdecl _Lock_shared_ptr_spin_lock() { // spin until _Shared_ptr_flag successfully set
#ifdef _M_ARM
while (_InterlockedExchange_acq(&_Shared_ptr_flag, 1)) {
__yield();
#if _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA
// SPIN LOCK FOR shared_ptr ATOMIC OPERATIONS
long _Shared_ptr_flag;


struct _Srw_functions_table {
_STD atomic<decltype(&::AcquireSRWLockExclusive)> _Pfn_AcquireSRWLockExclusive{nullptr};
_STD atomic<decltype(&::ReleaseSRWLockExclusive)> _Pfn_ReleaseSRWLockExclusive{nullptr};
};

_Srw_functions_table _Table;

bool _Acuire_srw_functions() {
enum class _Shared_ptr_api_level : char {
__has_nothing = false,
__has_srwlock = true,
__not_set,
};
static _STD atomic<_Shared_ptr_api_level> _Api_level{_Shared_ptr_api_level::__not_set};

_Shared_ptr_api_level _Current = _Api_level.load(_STD memory_order_acquire);
if (_Current == _Shared_ptr_api_level::__not_set) {
_Current = _Shared_ptr_api_level::__has_nothing;
HMODULE _Kernel32 = GetModuleHandleW(L"Kernel32.dll");
if (_Kernel32 != nullptr) {
auto _Pfn_AcquireSRWLockExclusive = reinterpret_cast<decltype(&::AcquireSRWLockExclusive)>(
GetProcAddress(_Kernel32, "AcquireSRWLockExclusive"));
auto _Pfn_ReleaseSRWLockExclusive = reinterpret_cast<decltype(&::ReleaseSRWLockExclusive)>(
GetProcAddress(_Kernel32, "ReleaseSRWLockExclusive"));
if (_Pfn_AcquireSRWLockExclusive != nullptr && _Pfn_ReleaseSRWLockExclusive != nullptr) {
_Table._Pfn_AcquireSRWLockExclusive.store(_Pfn_AcquireSRWLockExclusive, _STD memory_order_relaxed);
_Table._Pfn_ReleaseSRWLockExclusive.store(_Pfn_ReleaseSRWLockExclusive, _STD memory_order_relaxed);
_Current = _Shared_ptr_api_level::__has_srwlock;
}
}
_Api_level.store(_Current, _STD memory_order_release);
}
return reinterpret_cast<bool&>(_Current);
}
#else // _M_ARM
while (_interlockedbittestandset(&_Shared_ptr_flag, 0)) { // set bit 0
#endif // ^^^ _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA ^^^
} // unnamed namespace

_EXTERN_C

_CRTIMP2_PURE void __cdecl _Lock_shared_ptr_spin_lock() { // spin until _Shared_ptr_flag successfully set
#if _STL_WIN32_WINNT >= _STL_WIN32_WINNT_VISTA
AcquireSRWLockExclusive(_Shared_ptr_lock);
#else // ^^^ _STL_WIN32_WINNT >= _STL_WIN32_WINNT_VISTA / _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA vvv
if (_Acuire_srw_functions()) {
_Table._Pfn_AcquireSRWLockExclusive.load(_STD memory_order_relaxed)(&_Shared_ptr_lock);
} else {
while (_interlockedbittestandset(&_Shared_ptr_flag, 0) != 0) { // set bit 0
while (__iso_volatile_load32(reinterpret_cast<int*>(&_Shared_ptr_flag)) != 0) {
YieldProcessor();
}
}
}
#endif // _M_ARM
#endif // ^^^ _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA ^^^
}

_CRTIMP2_PURE void __cdecl _Unlock_shared_ptr_spin_lock() { // release previously obtained lock
#ifdef _M_ARM
__dmb(_ARM_BARRIER_ISH);
__iso_volatile_store32(reinterpret_cast<volatile int*>(&_Shared_ptr_flag), 0);
#else // _M_ARM
_interlockedbittestandreset(&_Shared_ptr_flag, 0); // reset bit 0
#endif // _M_ARM
#if _STL_WIN32_WINNT >= _STL_WIN32_WINNT_VISTA
ReleaseSRWLockExclusive(&_Shared_ptr_lock);
#else // ^^^ _STL_WIN32_WINNT >= _STL_WIN32_WINNT_VISTA / _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA vvv
if (_Acuire_srw_functions()) {
_Table._Pfn_ReleaseSRWLockExclusive.load(_STD memory_order_relaxed)(&_Shared_ptr_lock);
} else {
_interlockedbittestandreset(&_Shared_ptr_flag, 0); // reset bit 0
}
#endif // ^^^ _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA ^^^
}

_END_EXTERN_C

0 comments on commit c139abc

Please sign in to comment.