Skip to content

Commit

Permalink
Add thrust::universal_host_pinned_vector
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Oct 30, 2024
1 parent 7ff1d7b commit 1ce4a2e
Show file tree
Hide file tree
Showing 16 changed files with 112 additions and 100 deletions.
24 changes: 5 additions & 19 deletions libcudacxx/examples/concurrent_hash_table.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,13 @@
#include <thrust/host_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/pair.h>
#include <thrust/system/cuda/vector.h>

#include <cassert>
#include <cstdio>
#include <iostream>
#include <random>

// TODO: This should be upstreamed and then removed.
namespace thrust
{

using universal_raw_memory_resource =
thrust::system::cuda::detail::cuda_memory_resource<thrust::system::cuda::detail::cudaMallocManaged, cudaFree, void*>;

template <typename T>
using universal_allocator = thrust::mr::stateless_resource_allocator<T, universal_raw_memory_resource>;

template <typename T>
using universal_vector = thrust::device_vector<T, universal_allocator<T>>;

} // namespace thrust

template <typename Key,
typename Value,
typename Hash = thrust::identity<Key>,
Expand Down Expand Up @@ -199,8 +185,8 @@ int main()

auto freq = thrust::allocate_unique<table>(thrust::universal_allocator<table>{}, 8);

thrust::universal_vector<int> input = [] {
thrust::universal_vector<int> v(2048);
thrust::cuda::universal_vector<int> input = [] {
thrust::cuda::universal_vector<int> v(2048);
std::mt19937 gen(1337);
std::uniform_int_distribution<long> dis(0, 7);
thrust::generate(v.begin(), v.end(), [&] {
Expand Down Expand Up @@ -230,8 +216,8 @@ int main()

auto freq = thrust::allocate_unique<table>(thrust::universal_allocator<table>{}, 8, identity_modulo<int>(4));

thrust::universal_vector<int> input = [] {
thrust::universal_vector<int> v(2048);
thrust::cuda::universal_vector<int> input = [] {
thrust::cuda::universal_vector<int> v(2048);
std::mt19937 gen(1337);
std::uniform_int_distribution<long> dis(0, 7);
thrust::generate(v.begin(), v.end(), [&] {
Expand Down
56 changes: 17 additions & 39 deletions thrust/testing/cuda/device_side_universal_vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

#include <unittest/unittest.h>

template <class VecT>
_CCCL_HOST_DEVICE void universal_vector_access(VecT& in, thrust::universal_vector<bool>& out)
template <class VecInT, class VecOutT>
_CCCL_HOST_DEVICE void universal_vector_access(VecInT& in, VecOutT& out)
{
const int expected_front = 4;
const int expected_back = 2;
Expand All @@ -17,33 +17,34 @@ _CCCL_HOST_DEVICE void universal_vector_access(VecT& in, thrust::universal_vecto
}

#if defined(THRUST_TEST_DEVICE_SIDE)
template <class VecT>
__global__ void universal_vector_device_access_kernel(VecT& vec, thrust::universal_vector<bool>& out)
template <class VecInT, class VecOutT>
__global__ void universal_vector_device_access_kernel(VecInT& vec, VecOutT& out)
{
universal_vector_access(vec, out);
}

template <class VecT>
void test_universal_vector_access(VecT& vec, thrust::universal_vector<bool>& out)
template <class VecInT, class VecOutT>
void test_universal_vector_access(VecInT& vec, VecOutT& out)
{
universal_vector_device_access_kernel<<<1, 1>>>(vec, out);
cudaError_t const err = cudaDeviceSynchronize();
ASSERT_EQUAL(cudaSuccess, err);
ASSERT_EQUAL(out[0], true);
}
#else
template <class VecT>
void test_universal_vector_access(VecT& vec, thrust::universal_vector<bool>& out)
template <class VecInT, class VecOutT>
void test_universal_vector_access(VecInT& vec, VecOutT& out)
{
universal_vector_access(vec, out);
ASSERT_EQUAL(out[0], true);
}
#endif

void TestUniversalVectorDeviceAccess()
template <template <typename...> class UniversalVector>
void TestDeviceAccess()
{
using in_vector_t = thrust::universal_vector<int>;
using out_vector_t = thrust::universal_vector<bool>;
using in_vector_t = UniversalVector<int>;
using out_vector_t = UniversalVector<bool>;

in_vector_t* in_ptr{};
cudaMallocManaged(&in_ptr, sizeof(*in_ptr));
Expand All @@ -62,34 +63,11 @@ void TestUniversalVectorDeviceAccess()
out[0] = false;

test_universal_vector_access(in, out);
}
DECLARE_UNITTEST(TestUniversalVectorDeviceAccess);

void TestConstUniversalVectorDeviceAccess()
{
using in_vector_t = thrust::universal_vector<int>;
using out_vector_t = thrust::universal_vector<bool>;

in_vector_t* in_ptr{};
cudaMallocManaged(&in_ptr, sizeof(*in_ptr));
new (in_ptr) in_vector_t(1);

{
auto& in = *in_ptr;
in.resize(2);
in = {4, 2};
}

const auto& const_in = *in_ptr;

out_vector_t* out_ptr{};
cudaMallocManaged(&out_ptr, sizeof(*out_ptr));
new (out_ptr) out_vector_t(1);
auto& out = *out_ptr;

out.resize(1);
out[0] = false;

test_universal_vector_access(const_in, out);

cudaFree(in_ptr);
cudaFree(out_ptr);
}
DECLARE_UNITTEST(TestConstUniversalVectorDeviceAccess);
DECLARE_UNITTEST_WITH_NAME(TestDeviceAccess<thrust::universal_vector>, TestUniversalVectorDeviceAccess);
DECLARE_UNITTEST_WITH_NAME(TestDeviceAccess<thrust::universal_host_pinned_vector>, TestUniversalHPVectorDeviceAccess);
46 changes: 21 additions & 25 deletions thrust/testing/unittest/testframework.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,37 +388,33 @@ class UnitTestDriver
void VTEST##Universal() \
{ \
VTEST<thrust::universal_vector<int>>(); \
VTEST<thrust::device_vector< \
int, \
thrust::mr::stateless_resource_allocator<int, thrust::universal_host_pinned_memory_resource>>>(); \
VTEST<thrust::universal_host_pinned_vector<int>>(); \
} \
DECLARE_UNITTEST(VTEST##Host); \
DECLARE_UNITTEST(VTEST##Device); \
DECLARE_UNITTEST(VTEST##Universal);

// Same as above, but only for integral types
#define DECLARE_INTEGRAL_VECTOR_UNITTEST(VTEST) \
void VTEST##Host() \
{ \
VTEST<thrust::host_vector<signed char>>(); \
VTEST<thrust::host_vector<short>>(); \
VTEST<thrust::host_vector<int>>(); \
} \
void VTEST##Device() \
{ \
VTEST<thrust::device_vector<signed char>>(); \
VTEST<thrust::device_vector<short>>(); \
VTEST<thrust::device_vector<int>>(); \
} \
void VTEST##Universal() \
{ \
VTEST<thrust::universal_vector<int>>(); \
VTEST<thrust::device_vector< \
int, \
thrust::mr::stateless_resource_allocator<int, thrust::universal_host_pinned_memory_resource>>>(); \
} \
DECLARE_UNITTEST(VTEST##Host); \
DECLARE_UNITTEST(VTEST##Device); \
#define DECLARE_INTEGRAL_VECTOR_UNITTEST(VTEST) \
void VTEST##Host() \
{ \
VTEST<thrust::host_vector<signed char>>(); \
VTEST<thrust::host_vector<short>>(); \
VTEST<thrust::host_vector<int>>(); \
} \
void VTEST##Device() \
{ \
VTEST<thrust::device_vector<signed char>>(); \
VTEST<thrust::device_vector<short>>(); \
VTEST<thrust::device_vector<int>>(); \
} \
void VTEST##Universal() \
{ \
VTEST<thrust::universal_vector<int>>(); \
VTEST<thrust::universal_host_pinned_vector<int>>(); \
} \
DECLARE_UNITTEST(VTEST##Host); \
DECLARE_UNITTEST(VTEST##Device); \
DECLARE_UNITTEST(VTEST##Universal);

// Macro to create instances of a test for several data types.
Expand Down
19 changes: 19 additions & 0 deletions thrust/testing/universal_memory.cu
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,25 @@ void TestUniversalThrustVector(std::size_t const n)
}
DECLARE_VARIABLE_UNITTEST(TestUniversalThrustVector);

// TODO(bgruber): merge test into previous when we have Catch2
template <typename T>
void TestUniversalHostPinnedThrustVector(std::size_t const n)
{
thrust::host_vector<T> host(n);
thrust::universal_host_pinned_vector<T> universal(n);

static_assert(std::is_same<typename std::decay<decltype(universal)>::type::pointer, thrust::universal_ptr<T>>::value,
"Unexpected thrust::universal_vector pointer type.");

thrust::sequence(host.begin(), host.end(), 0);
thrust::sequence(universal.begin(), universal.end(), 0);

ASSERT_EQUAL(host.size(), n);
ASSERT_EQUAL(universal.size(), n);
ASSERT_EQUAL(host, universal);
}
DECLARE_VARIABLE_UNITTEST(TestUniversalHostPinnedThrustVector);

// Verify that a std::vector using the universal allocator will work with
// Standard Library algorithms.
template <typename T>
Expand Down
8 changes: 5 additions & 3 deletions thrust/thrust/system/cpp/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,14 @@ inline void free(pointer<void> ptr);
template <typename T>
using allocator = thrust::mr::stateless_resource_allocator<T, thrust::system::cpp::memory_resource>;

/*! \p cpp::universal_allocator allocates memory that can be used by the \p cpp
* system and host systems.
*/
//! \p cpp::universal_allocator allocates memory that can be used by the \p cpp system and host systems.
template <typename T>
using universal_allocator = thrust::mr::stateless_resource_allocator<T, thrust::system::cpp::universal_memory_resource>;

//! \p cpp::universal_host_pinned_allocator allocates memory that can be used by the \p cpp system and host systems.
template <typename T>
using universal_host_pinned_allocator =
thrust::mr::stateless_resource_allocator<T, thrust::system::cpp::universal_host_pinned_memory_resource>;
} // namespace cpp
} // namespace system

Expand Down
2 changes: 1 addition & 1 deletion thrust/thrust/system/cpp/memory_resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ using memory_resource = detail::native_resource;
*/
using universal_memory_resource = detail::universal_native_resource;
/*! An alias for \p cpp::universal_memory_resource. */
using universal_host_pinned_memory_resource = detail::native_resource;
using universal_host_pinned_memory_resource = universal_memory_resource;

/*! \} // memory_resources
*/
Expand Down
2 changes: 2 additions & 0 deletions thrust/thrust/system/cpp/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ using vector = thrust::detail::vector_base<T, Allocator>;
template <typename T, typename Allocator = thrust::system::cpp::universal_allocator<T>>
using universal_vector = thrust::detail::vector_base<T, Allocator>;

template <typename T>
using universal_host_pinned_vector = thrust::detail::vector_base<T, universal_host_pinned_allocator<T>>;
} // namespace cpp
} // namespace system

Expand Down
11 changes: 8 additions & 3 deletions thrust/thrust/system/cuda/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,16 @@ inline _CCCL_HOST_DEVICE void free(pointer<void> ptr);
template <typename T>
using allocator = thrust::mr::stateless_resource_allocator<T, thrust::system::cuda::memory_resource>;

/*! \p cuda::universal_allocator allocates memory that can be used by the \p cuda
* system and host systems.
*/
//! \p cuda::universal_allocator allocates managed memory that can be used by the \p cuda system and host systems.
template <typename T>
using universal_allocator =
thrust::mr::stateless_resource_allocator<T, thrust::system::cuda::universal_memory_resource>;

//! \p cuda::universal_host_pinned_allocator allocates pinned host memory that can be used by the \p cuda system and
//! host systems.
template <typename T>
using universal_host_pinned_allocator =
thrust::mr::stateless_resource_allocator<T, thrust::system::cuda::universal_host_pinned_memory_resource>;
} // namespace cuda_cub

namespace system
Expand All @@ -98,6 +101,7 @@ using thrust::cuda_cub::allocator;
using thrust::cuda_cub::free;
using thrust::cuda_cub::malloc;
using thrust::cuda_cub::universal_allocator;
using thrust::cuda_cub::universal_host_pinned_allocator;
} // namespace cuda
} // namespace system

Expand All @@ -110,6 +114,7 @@ using thrust::cuda_cub::allocator;
using thrust::cuda_cub::free;
using thrust::cuda_cub::malloc;
using thrust::cuda_cub::universal_allocator;
using thrust::cuda_cub::universal_host_pinned_allocator;
} // namespace cuda

THRUST_NAMESPACE_END
Expand Down
12 changes: 10 additions & 2 deletions thrust/thrust/system/cuda/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace cuda_cub
* shared by \p cuda::vector
* \see device_vector
* \see universal_vector
* \see universal_host_pinned_vector
*/
template <typename T, typename Allocator = thrust::system::cuda::allocator<T>>
using vector = thrust::detail::vector_base<T, Allocator>;
Expand All @@ -64,7 +65,7 @@ using vector = thrust::detail::vector_base<T, Allocator>;
* insertion and removal of elements at the beginning or in the middle. The
* number of elements in a \p cuda::universal_vector may vary dynamically;
* memory management is automatic. The elements contained in a
* \p cuda::universal_vector reside in memory accessible by the \p cuda system
* \p cuda::universal_vector reside in managed memory accessible by the \p cuda system
* and host systems.
*
* \tparam T The element type of the \p cuda::universal_vector.
Expand All @@ -75,24 +76,31 @@ using vector = thrust::detail::vector_base<T, Allocator>;
* \see host_vector For the documentation of the complete interface which is
* shared by \p cuda::universal_vector
* \see device_vector
* \see universal_vector
* \see universal_host_pinned_vector
*/
template <typename T, typename Allocator = thrust::system::cuda::universal_allocator<T>>
using universal_vector = thrust::detail::vector_base<T, Allocator>;

//! Like \ref cuda::universal_vector but uses pinned host memory (cudaMallocHost).
//! \see device_vector
//! \see universal_vector
template <typename T>
using universal_host_pinned_vector = thrust::detail::vector_base<T, universal_host_pinned_allocator<T>>;
} // namespace cuda_cub

namespace system
{
namespace cuda
{
using thrust::cuda_cub::universal_host_pinned_vector;
using thrust::cuda_cub::universal_vector;
using thrust::cuda_cub::vector;
} // namespace cuda
} // namespace system

namespace cuda
{
using thrust::cuda_cub::universal_host_pinned_vector;
using thrust::cuda_cub::universal_vector;
using thrust::cuda_cub::vector;
} // namespace cuda
Expand Down
9 changes: 6 additions & 3 deletions thrust/thrust/system/omp/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,14 @@ inline void free(pointer<void> ptr);
template <typename T>
using allocator = thrust::mr::stateless_resource_allocator<T, thrust::system::omp::memory_resource>;

/*! \p omp::universal_allocator allocates memory that can be used by the \p omp
* system and host systems.
*/
//! \p omp::universal_allocator allocates memory that can be used by the \p omp system and host systems.
template <typename T>
using universal_allocator = thrust::mr::stateless_resource_allocator<T, thrust::system::omp::universal_memory_resource>;

//! \p omp::universal_host_pinned_allocator allocates memory that can be used by the \p omp system and host systems.
template <typename T>
using universal_host_pinned_allocator =
thrust::mr::stateless_resource_allocator<T, thrust::system::omp::universal_host_pinned_memory_resource>;
} // namespace omp
} // namespace system

Expand All @@ -101,6 +103,7 @@ using thrust::system::omp::allocator;
using thrust::system::omp::free;
using thrust::system::omp::malloc;
using thrust::system::omp::universal_allocator;
using thrust::system::omp::universal_host_pinned_allocator;
} // namespace omp

THRUST_NAMESPACE_END
Expand Down
2 changes: 1 addition & 1 deletion thrust/thrust/system/omp/memory_resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ using memory_resource = detail::native_resource;
*/
using universal_memory_resource = detail::universal_native_resource;
/*! An alias for \p omp::universal_memory_resource. */
using universal_host_pinned_memory_resource = detail::native_resource;
using universal_host_pinned_memory_resource = universal_memory_resource;

/*! \}
*/
Expand Down
2 changes: 2 additions & 0 deletions thrust/thrust/system/omp/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ using vector = thrust::detail::vector_base<T, Allocator>;
template <typename T, typename Allocator = thrust::system::omp::universal_allocator<T>>
using universal_vector = thrust::detail::vector_base<T, Allocator>;

template <typename T>
using universal_host_pinned_vector = thrust::detail::vector_base<T, universal_host_pinned_allocator<T>>;
} // namespace omp
} // namespace system

Expand Down
Loading

0 comments on commit 1ce4a2e

Please sign in to comment.