Skip to content

Commit

Permalink
atomic: Make all operations follow sequentially consistent ordering
Browse files Browse the repository at this point in the history
  • Loading branch information
stotko committed May 25, 2020
1 parent 89ec1d2 commit 25af2e0
Show file tree
Hide file tree
Showing 9 changed files with 372 additions and 42 deletions.
14 changes: 5 additions & 9 deletions src/stdgpu/atomic.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ namespace stdgpu
* Differences to std::atomic:
* - Atomics must be modeled as containers since threads have to operate on the exact same object (which also requires copy and move constructors)
* - Manual allocation and destruction of container required
* - load and store are not atomically safe
* - All operations (including load() and store()) explicitly follow sequentially consistent ordering
* - Additional min and max functions for all supported integer and floating point types
* - Additional increment/decrement + modulo functions for unsigned int
*/
Expand Down Expand Up @@ -100,37 +100,33 @@ class atomic


/**
* \brief Loads and returns the current value of the atomic object
* \brief Atomically loads and returns the current value of the atomic object
* \return The current value of this object
* \note This operation is not atomically safe
*/
STDGPU_HOST_DEVICE T
load() const;


/**
* \brief Loads and returns the current value of the atomic object
* \brief Atomically loads and returns the current value of the atomic object
* \return The current value of this object
* \note Equivalent to load()
*/
STDGPU_HOST_DEVICE
operator T() const; // NOLINT(hicpp-explicit-conversions)


/**
* \brief Replaces the current value with desired
* \brief Atomically replaces the current value with desired one
* \param[in] desired The value to store to the atomic object
* \note This operation is not atomically safe
*/
STDGPU_HOST_DEVICE void
store(const T desired);


/**
* \brief Replaces the current value with desired
* \brief Atomically replaces the current value with desired one
* \param[in] desired The value to store to the atomic object
* \return The desired value
* \note Equivalent to store()
*/
STDGPU_HOST_DEVICE T //NOLINT(misc-unconventional-assign-operator)
operator=(const T desired);
Expand Down
19 changes: 19 additions & 0 deletions src/stdgpu/cuda/atomic.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,25 @@ namespace stdgpu
namespace cuda
{

/**
* \brief Atomically loads and returns the current value of the atomic object
* \param[in] address A pointer to a value
* \return The current value of this object
*/
template <typename T>
STDGPU_DEVICE_ONLY T
atomic_load(T* address);

/**
* \brief Atomically replaces the current value with desired one
* \param[in] address A pointer to a value
* \param[in] desired The value to store to the atomic object
*/
template <typename T>
STDGPU_DEVICE_ONLY void
atomic_store(T* address,
const T desired);

/**
* \brief Atomically exchanges the stored value with the given argument
* \param[in] address A pointer to a value
Expand Down
122 changes: 108 additions & 14 deletions src/stdgpu/cuda/impl/atomic_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#define STDGPU_CUDA_ATOMIC_DETAIL_H

#include <stdgpu/algorithm.h>
#include <stdgpu/contract.h>
#include <stdgpu/limits.h>
#include <stdgpu/platform.h>

Expand Down Expand Up @@ -49,7 +48,7 @@ atomicMin(float* address,
do
{
assumed = old;
old = atomicCAS(address_as_int, assumed, __float_as_int( fminf(__int_as_float(assumed), value) ));
old = atomicCAS(address_as_int, assumed, __float_as_int( stdgpu::min<float>(__int_as_float(assumed), value) ));

// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
}
Expand All @@ -69,7 +68,7 @@ atomicMax(float* address,
do
{
assumed = old;
old = atomicCAS(address_as_int, assumed, __float_as_int( fmaxf(__int_as_float(assumed), value) ));
old = atomicCAS(address_as_int, assumed, __float_as_int( stdgpu::max<float>(__int_as_float(assumed), value) ));

// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
}
Expand Down Expand Up @@ -179,12 +178,47 @@ namespace stdgpu
namespace cuda
{

template <typename T>
STDGPU_DEVICE_ONLY T
atomic_load(T* address)
{
__threadfence();

volatile T* volatile_address = address;
T current = *volatile_address;

__threadfence();

return current;
}


template <typename T>
STDGPU_DEVICE_ONLY void
atomic_store(T* address,
const T desired)
{
__threadfence();

volatile T* volatile_address = address;
*volatile_address = desired;

__threadfence();
}


template <typename T, typename>
STDGPU_DEVICE_ONLY T
atomic_exchange(T* address,
const T desired)
{
return atomicExch(address, desired);
__threadfence();

T old = atomicExch(address, desired);

__threadfence();

return old;
}


Expand All @@ -194,7 +228,13 @@ atomic_compare_exchange(T* address,
const T expected,
const T desired)
{
return atomicCAS(address, expected, desired);
__threadfence();

T old = atomicCAS(address, expected, desired);

__threadfence();

return old;
}


Expand All @@ -203,7 +243,13 @@ STDGPU_DEVICE_ONLY T
atomic_fetch_add(T* address,
const T arg)
{
return atomicAdd(address, arg);
__threadfence();

T old = atomicAdd(address, arg);

__threadfence();

return old;
}


Expand All @@ -212,7 +258,13 @@ STDGPU_DEVICE_ONLY T
atomic_fetch_sub(T* address,
const T arg)
{
return atomicSub(address, arg);
__threadfence();

T old = atomicSub(address, arg);

__threadfence();

return old;
}


Expand All @@ -221,7 +273,13 @@ STDGPU_DEVICE_ONLY T
atomic_fetch_and(T* address,
const T arg)
{
return atomicAnd(address, arg);
__threadfence();

T old = atomicAnd(address, arg);

__threadfence();

return old;
}


Expand All @@ -230,7 +288,13 @@ STDGPU_DEVICE_ONLY T
atomic_fetch_or(T* address,
const T arg)
{
return atomicOr(address, arg);
__threadfence();

T old = atomicOr(address, arg);

__threadfence();

return old;
}


Expand All @@ -239,7 +303,13 @@ STDGPU_DEVICE_ONLY T
atomic_fetch_xor(T* address,
const T arg)
{
return atomicXor(address, arg);
__threadfence();

T old = atomicXor(address, arg);

__threadfence();

return old;
}


Expand All @@ -248,7 +318,13 @@ STDGPU_DEVICE_ONLY T
atomic_fetch_min(T* address,
const T arg)
{
return atomicMin(address, arg);
__threadfence();

T old = atomicMin(address, arg);

__threadfence();

return old;
}


Expand All @@ -257,7 +333,13 @@ STDGPU_DEVICE_ONLY T
atomic_fetch_max(T* address,
const T arg)
{
return atomicMax(address, arg);
__threadfence();

T old = atomicMax(address, arg);

__threadfence();

return old;
}


Expand All @@ -266,7 +348,13 @@ STDGPU_DEVICE_ONLY T
atomic_fetch_inc_mod(T* address,
const T arg)
{
return atomicInc(address, arg);
__threadfence();

T old = atomicInc(address, arg);

__threadfence();

return old;
}


Expand All @@ -275,7 +363,13 @@ STDGPU_DEVICE_ONLY T
atomic_fetch_dec_mod(T* address,
const T arg)
{
return atomicDec(address, arg);
__threadfence();

T old = atomicDec(address, arg);

__threadfence();

return old;
}

} // namespace cuda
Expand Down
19 changes: 19 additions & 0 deletions src/stdgpu/hip/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,25 @@ namespace stdgpu
namespace hip
{

/**
* \brief Atomically loads and returns the current value of the atomic object
* \param[in] address A pointer to a value
* \return The current value of this object
*/
template <typename T>
STDGPU_DEVICE_ONLY T
atomic_load(T* address);

/**
* \brief Atomically replaces the current value with desired one
* \param[in] address A pointer to a value
* \param[in] desired The value to store to the atomic object
*/
template <typename T>
STDGPU_DEVICE_ONLY void
atomic_store(T* address,
const T desired);

/**
* \brief Atomically exchanges the stored value with the given argument
* \param[in] address A pointer to a value
Expand Down
Loading

0 comments on commit 25af2e0

Please sign in to comment.