From c139abc5098bfe355f897d9b8bb4d1026cd4d3bd Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 4 Aug 2020 22:11:37 +0300 Subject: [PATCH] Use _mm_pause, load loop, and SRLWOCK Resolves #370 , resolves #680 --- stl/inc/atomic | 6 ++-- stl/inc/xatomic.h | 6 ++++ stl/src/atomic.cpp | 90 ++++++++++++++++++++++++++++++++++++---------- 3 files changed, 82 insertions(+), 20 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 633280f5d5..1a57fe4416 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -392,8 +392,10 @@ void _Atomic_wait_direct( #if 1 // TRANSITION, ABI inline void _Atomic_lock_spinlock(long& _Spinlock) noexcept { - while (_InterlockedExchange(&_Spinlock, 1)) { - _YIELD_PROCESSOR(); + while (_InterlockedExchange(&_Spinlock, 1) != 0) { + while (__iso_volatile_load32(&reinterpret_cast(_Spinlock)) != 0) { + _YIELD_PROCESSOR(); + } } } diff --git a/stl/inc/xatomic.h b/stl/inc/xatomic.h index 8498f18c1a..a607b545c0 100644 --- a/stl/inc/xatomic.h +++ b/stl/inc/xatomic.h @@ -28,7 +28,13 @@ _STL_DISABLE_CLANG_WARNINGS #define _INTRIN_ACQUIRE(x) x #define _INTRIN_RELEASE(x) x #define _INTRIN_ACQ_REL(x) x +#ifdef _M_CEE_PURE #define _YIELD_PROCESSOR() +#else // ^^^ _M_CEE_PURE / !_M_CEE_PURE vvv +extern "C" __MACHINEX86_X64(void _mm_pause(void)) +#define _YIELD_PROCESSOR() _mm_pause() +#endif // ^^^ !_M_CEE_PURE ^^^ + #elif defined(_M_ARM) || defined(_M_ARM64) #define _INTRIN_RELAXED(x) _CONCAT(x, _nf) diff --git a/stl/src/atomic.cpp b/stl/src/atomic.cpp index b2c0eeb00b..a9c54cb8a4 100644 --- a/stl/src/atomic.cpp +++ b/stl/src/atomic.cpp @@ -4,33 +4,87 @@ // implement shared_ptr spin lock #include - +#include #include -#pragma warning(disable : 4793) +#include +#include -_EXTERN_C +#pragma warning(disable : 4793) // '%s' : function is compiled as native code -// SPIN LOCK FOR shared_ptr ATOMIC OPERATIONS -volatile long _Shared_ptr_flag; +namespace { + // MUTEX FOR shared_ptr ATOMIC OPERATIONS + SRWLOCK _Shared_ptr_lock = SRWLOCK_INIT; -_CRTIMP2_PURE void __cdecl _Lock_shared_ptr_spin_lock() { // spin until _Shared_ptr_flag successfully set -#ifdef _M_ARM - while (_InterlockedExchange_acq(&_Shared_ptr_flag, 1)) { - __yield(); +#if _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA + // SPIN LOCK FOR shared_ptr ATOMIC OPERATIONS + long _Shared_ptr_flag; + + + struct _Srw_functions_table { + _STD atomic _Pfn_AcquireSRWLockExclusive{nullptr}; + _STD atomic _Pfn_ReleaseSRWLockExclusive{nullptr}; + }; + + _Srw_functions_table _Table; + + bool _Acuire_srw_functions() { + enum class _Shared_ptr_api_level : char { + __has_nothing = false, + __has_srwlock = true, + __not_set, + }; + static _STD atomic<_Shared_ptr_api_level> _Api_level{_Shared_ptr_api_level::__not_set}; + + _Shared_ptr_api_level _Current = _Api_level.load(_STD memory_order_acquire); + if (_Current == _Shared_ptr_api_level::__not_set) { + _Current = _Shared_ptr_api_level::__has_nothing; + HMODULE _Kernel32 = GetModuleHandleW(L"Kernel32.dll"); + if (_Kernel32 != nullptr) { + auto _Pfn_AcquireSRWLockExclusive = reinterpret_cast( + GetProcAddress(_Kernel32, "AcquireSRWLockExclusive")); + auto _Pfn_ReleaseSRWLockExclusive = reinterpret_cast( + GetProcAddress(_Kernel32, "ReleaseSRWLockExclusive")); + if (_Pfn_AcquireSRWLockExclusive != nullptr && _Pfn_ReleaseSRWLockExclusive != nullptr) { + _Table._Pfn_AcquireSRWLockExclusive.store(_Pfn_AcquireSRWLockExclusive, _STD memory_order_relaxed); + _Table._Pfn_ReleaseSRWLockExclusive.store(_Pfn_ReleaseSRWLockExclusive, _STD memory_order_relaxed); + _Current = _Shared_ptr_api_level::__has_srwlock; + } + } + _Api_level.store(_Current, _STD memory_order_release); + } + return reinterpret_cast(_Current); } -#else // _M_ARM - while (_interlockedbittestandset(&_Shared_ptr_flag, 0)) { // set bit 0 +#endif // ^^^ _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA ^^^ +} // unnamed namespace + +_EXTERN_C + +_CRTIMP2_PURE void __cdecl _Lock_shared_ptr_spin_lock() { // spin until _Shared_ptr_flag successfully set +#if _STL_WIN32_WINNT >= _STL_WIN32_WINNT_VISTA + AcquireSRWLockExclusive(_Shared_ptr_lock); +#else // ^^^ _STL_WIN32_WINNT >= _STL_WIN32_WINNT_VISTA / _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA vvv + if (_Acuire_srw_functions()) { + _Table._Pfn_AcquireSRWLockExclusive.load(_STD memory_order_relaxed)(&_Shared_ptr_lock); + } else { + while (_interlockedbittestandset(&_Shared_ptr_flag, 0) != 0) { // set bit 0 + while (__iso_volatile_load32(reinterpret_cast(&_Shared_ptr_flag)) != 0) { + YieldProcessor(); + } + } } -#endif // _M_ARM +#endif // ^^^ _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA ^^^ } _CRTIMP2_PURE void __cdecl _Unlock_shared_ptr_spin_lock() { // release previously obtained lock -#ifdef _M_ARM - __dmb(_ARM_BARRIER_ISH); - __iso_volatile_store32(reinterpret_cast(&_Shared_ptr_flag), 0); -#else // _M_ARM - _interlockedbittestandreset(&_Shared_ptr_flag, 0); // reset bit 0 -#endif // _M_ARM +#if _STL_WIN32_WINNT >= _STL_WIN32_WINNT_VISTA + ReleaseSRWLockExclusive(&_Shared_ptr_lock); +#else // ^^^ _STL_WIN32_WINNT >= _STL_WIN32_WINNT_VISTA / _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA vvv + if (_Acuire_srw_functions()) { + _Table._Pfn_ReleaseSRWLockExclusive.load(_STD memory_order_relaxed)(&_Shared_ptr_lock); + } else { + _interlockedbittestandreset(&_Shared_ptr_flag, 0); // reset bit 0 + } +#endif // ^^^ _STL_WIN32_WINNT < _STL_WIN32_WINNT_VISTA ^^^ } _END_EXTERN_C