Skip to content

Commit

Permalink
[libc] add a simple TTAS spin lock (llvm#98846)
Browse files Browse the repository at this point in the history
  • Loading branch information
SchrodingerZhu authored Jul 16, 2024
1 parent 515618e commit 408a351
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 0 deletions.
9 changes: 9 additions & 0 deletions libc/src/__support/threads/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ add_header_library(
sleep.h
)

add_header_library(
spin_lock
HDRS
spin_lock.h
DEPENDS
.sleep
libc.src.__support.CPP.atomic
)

if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
add_subdirectory(${LIBC_TARGET_OS})
endif()
Expand Down
81 changes: 81 additions & 0 deletions libc/src/__support/threads/spin_lock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
//===-- TTAS Spin Lock ----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_SPIN_LOCK_H
#define LLVM_LIBC_SRC___SUPPORT_THREADS_SPIN_LOCK_H

#include "src/__support/CPP/atomic.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/threads/sleep.h"

namespace LIBC_NAMESPACE_DECL {

namespace spinlock {
template <typename LockWord, typename Return>
using AtomicOp = Return (cpp::Atomic<LockWord>::*)(LockWord, cpp::MemoryOrder,
cpp::MemoryScope);
}

template <typename LockWord, spinlock::AtomicOp<LockWord, LockWord> Acquire,
spinlock::AtomicOp<LockWord, void> Release>
class SpinLockAdaptor {
cpp::Atomic<LockWord> flag;

public:
LIBC_INLINE constexpr SpinLockAdaptor() : flag{false} {}
LIBC_INLINE bool try_lock() {
return !flag.*Acquire(static_cast<LockWord>(1), cpp::MemoryOrder::ACQUIRE);
}
LIBC_INLINE void lock() {
// clang-format off
// For normal TTAS, this compiles to the following on armv9a and x86_64:
// mov w8, #1 | .LBB0_1:
// .LBB0_1: | mov al, 1
// swpab w8, w9, [x0] | xchg byte ptr [rdi], al
// tbnz w9, #0, .LBB0_3 | test al, 1
// b .LBB0_4 | jne .LBB0_3
// .LBB0_2: | jmp .LBB0_4
// isb | .LBB0_2:
// .LBB0_3: | pause
// ldrb w9, [x0] | .LBB0_3:
// tbnz w9, #0, .LBB0_2 | movzx eax, byte ptr [rdi]
// b .LBB0_1 | test al, 1
// .LBB0_4: | jne .LBB0_2
// ret | jmp .LBB0_1
// | .LBB0_4:
// | ret
// clang-format on
// Notice that inside the busy loop .LBB0_2 and .LBB0_3, only instructions
// with load semantics are used. swpab/xchg is only issued in outer loop
// .LBB0_1. This is useful to avoid extra write traffic. The cache
// coherence guarantees "write propagation", so even if the inner loop only
// reads with relaxed ordering, the thread will evetually see the write.
while (!try_lock())
while (flag.load(cpp::MemoryOrder::RELAXED))
sleep_briefly();
}
LIBC_INLINE void unlock() {
flag.*Release(static_cast<LockWord>(0), cpp::MemoryOrder::RELEASE);
}
};

// It is reported that atomic operations with higher-order semantics
// lead to better performance on GPUs.
#ifdef LIBC_TARGET_ARCH_IS_GPU
using SpinLock =
SpinLockAdaptor<unsigned int, &cpp::Atomic<unsigned int>::fetch_or,
&cpp::Atomic<unsigned int>::fetch_and>;
#else
using SpinLock = SpinLockAdaptor<bool, &cpp::Atomic<bool>::exchange,
&cpp::Atomic<bool>::store>;
#endif

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_THREADS_SPIN_LOCK_H

0 comments on commit 408a351

Please sign in to comment.