Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Merge pull request #194 from NVIDIA/bugfix/atomic_nvrtc
Browse files Browse the repository at this point in the history
Fix atomic and barrier on NVRTC.
  • Loading branch information
wmaxey authored Aug 10, 2021
2 parents a1b8f0f + 32f88dd commit 9575eff
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 85 deletions.
2 changes: 1 addition & 1 deletion include/cuda/std/barrier
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ public:

_LIBCUDACXX_INLINE_VISIBILITY
barrier(std::ptrdiff_t __expected, std::__empty_completion __completion = std::__empty_completion()) {
static_assert(offsetof(barrier<thread_scope_block>, __barrier) == 0, "fatal error: bad barrier layout");
static_assert(_LIBCUDACXX_OFFSET_IS_ZERO(barrier<thread_scope_block>, __barrier), "fatal error: bad barrier layout");
init(this, __expected, __completion);
}

Expand Down
8 changes: 7 additions & 1 deletion libcxx/include/__config
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,12 @@ extern "C++" {

#define _LIBCUDACXX_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp)

#if defined(_LIBCUDACXX_COMPILER_NVRTC)
#define _LIBCUDACXX_OFFSET_IS_ZERO(type, member) !(&(((type *)0)->member))
#else
#define _LIBCUDACXX_OFFSET_IS_ZERO(type, member) !offsetof(type, member)
#endif

#if defined(_LIBCUDACXX_COMPILER_CLANG)

// _LIBCUDACXX_ALTERNATE_STRING_LAYOUT is an old name for
Expand Down Expand Up @@ -1615,7 +1621,7 @@ _LIBCUDACXX_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container(
#define _LIBCUDACXX_NO_RUNTIME_LOCK_FREE

// CUDA Atomics supersede host atomics in order to insert the host/device dispatch layer
#if defined(_LIBCUDACXX_COMPILER_NVCC) || defined(_LIBCUDACXX_COMPILER_PGI)
#if defined(_LIBCUDACXX_COMPILER_NVCC) || defined(_LIBCUDACXX_COMPILER_NVRTC) || defined(_LIBCUDACXX_COMPILER_PGI)
# define _LIBCUDACXX_HAS_CUDA_ATOMIC_IMPL
#endif

Expand Down
84 changes: 1 addition & 83 deletions libcxx/include/support/atomic/atomic_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,89 +11,7 @@
#ifndef _LIBCUDACXX_ATOMIC_BASE_H
#define _LIBCUDACXX_ATOMIC_BASE_H

template <typename _Tp, int _Sco>
struct __cxx_atomic_base_impl {
using __underlying_t = _Tp;
static constexpr int __sco = _Sco;

_LIBCUDACXX_CONSTEXPR
__cxx_atomic_base_impl() _NOEXCEPT = default;

_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR explicit
__cxx_atomic_base_impl(_Tp value) _NOEXCEPT : __a_value(value) {}

_ALIGNAS(sizeof(_Tp)) _Tp __a_value;
};

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
_Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> * __a) _NOEXCEPT {
return &__a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> volatile* __a) _NOEXCEPT {
return &__a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
const _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> const* __a) _NOEXCEPT {
return &__a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
const volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> const volatile* __a) _NOEXCEPT {
return &__a->__a_value;
}

template <typename _Tp, int _Sco>
struct __cxx_atomic_ref_base_impl {
using __underlying_t = _Tp;
static constexpr int __sco = _Sco;

_LIBCUDACXX_CONSTEXPR
__cxx_atomic_ref_base_impl() _NOEXCEPT = default;

_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR explicit
__cxx_atomic_ref_base_impl(_Tp value) _NOEXCEPT : __a_value(value) {}

_Tp* __a_value;
};

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
_Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco>* __a) _NOEXCEPT {
return __a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> volatile* __a) _NOEXCEPT {
return __a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
const _Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> const* __a) _NOEXCEPT {
return __a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
const volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> const volatile* __a) _NOEXCEPT {
return __a->__a_value;
}

template <typename _Tp>
_LIBCUDACXX_INLINE_VISIBILITY auto __cxx_atomic_base_unwrap(_Tp* __a) _NOEXCEPT -> decltype(__cxx_get_underlying_atomic(__a)) {
return __cxx_get_underlying_atomic(__a);
}

template <typename _Tp>
using __cxx_atomic_underlying_t = typename _Tp::__underlying_t;
#include "cxx_atomic.h"

_LIBCUDACXX_INLINE_VISIBILITY inline _LIBCUDACXX_CONSTEXPR int __cxx_atomic_order_to_int(memory_order __order) {
// Avoid switch statement to make this a constexpr.
Expand Down
2 changes: 2 additions & 0 deletions libcxx/include/support/atomic/atomic_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ namespace __host {
#elif defined (_LIBCUDACXX_HAS_C11_ATOMIC_IMP)
//TODO
// # include "atomic_c11.h"
#elif defined(_LIBCUDACXX_COMPILER_NVRTC)
# include "atomic_nvrtc.h"
#endif
}

Expand Down
16 changes: 16 additions & 0 deletions libcxx/include/support/atomic/atomic_nvrtc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCUDACXX_ATOMIC_NVRTC_H
#define _LIBCUDACXX_ATOMIC_NVRTC_H

#include "cxx_atomic.h"

#endif // _LIBCUDACXX_ATOMIC_NVRTC_H
98 changes: 98 additions & 0 deletions libcxx/include/support/atomic/cxx_atomic.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCUDACXX_CXX_ATOMIC_H
#define _LIBCUDACXX_CXX_ATOMIC_H

template <typename _Tp, int _Sco>
struct __cxx_atomic_base_impl {
using __underlying_t = _Tp;
static constexpr int __sco = _Sco;

_LIBCUDACXX_CONSTEXPR
__cxx_atomic_base_impl() _NOEXCEPT = default;

_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR explicit
__cxx_atomic_base_impl(_Tp value) _NOEXCEPT : __a_value(value) {}

_ALIGNAS(sizeof(_Tp)) _Tp __a_value;
};

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
_Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> * __a) _NOEXCEPT {
return &__a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> volatile* __a) _NOEXCEPT {
return &__a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
const _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> const* __a) _NOEXCEPT {
return &__a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
const volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> const volatile* __a) _NOEXCEPT {
return &__a->__a_value;
}

template <typename _Tp, int _Sco>
struct __cxx_atomic_ref_base_impl {
using __underlying_t = _Tp;
static constexpr int __sco = _Sco;

_LIBCUDACXX_CONSTEXPR
__cxx_atomic_ref_base_impl() _NOEXCEPT = default;

_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR explicit
__cxx_atomic_ref_base_impl(_Tp value) _NOEXCEPT : __a_value(value) {}

_Tp* __a_value;
};

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
_Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco>* __a) _NOEXCEPT {
return __a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> volatile* __a) _NOEXCEPT {
return __a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
const _Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> const* __a) _NOEXCEPT {
return __a->__a_value;
}

template <typename _Tp, int _Sco>
_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
const volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> const volatile* __a) _NOEXCEPT {
return __a->__a_value;
}

template <typename _Tp>
_LIBCUDACXX_INLINE_VISIBILITY auto __cxx_atomic_base_unwrap(_Tp* __a) _NOEXCEPT -> decltype(__cxx_get_underlying_atomic(__a)) {
return __cxx_get_underlying_atomic(__a);
}

template <typename _Tp>
using __cxx_atomic_underlying_t = typename _Tp::__underlying_t;

#endif //_LIBCUDACXX_CXX_ATOMIC_H

0 comments on commit 9575eff

Please sign in to comment.