From 9a3ef24e53bb51e749f1c1914bab5aa01dec9e4a Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Thu, 24 Oct 2019 15:36:02 -0500 Subject: [PATCH 01/15] add value setter to device_scalar --- include/rmm/device_scalar.hpp | 17 +++++++++++++++++ tests/device_scalar_tests.cpp | 10 ++++++++++ 2 files changed, 27 insertions(+) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 50a8dc42f..c16c8d333 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -83,6 +83,23 @@ class device_scalar { *---------------------------------------------------------------------------**/ T const *get() const noexcept { return static_cast(buff.data()); } + /**---------------------------------------------------------------------------* + * @brief Copies the value from host to device. + * + * @return T The value of the scalar after synchronizing its stream + *---------------------------------------------------------------------------**/ + void value(T value) { + auto status = cudaMemcpyAsync(buff.data(), &value, sizeof(T), + cudaMemcpyDefault, buff.stream()); + if (cudaSuccess != status) { + throw std::runtime_error{"Device memcpy failed."}; + } + status = cudaStreamSynchronize(buff.stream()); + if (cudaSuccess != status) { + throw std::runtime_error{"Stream sync failed."}; + } + } + device_scalar() = default; ~device_scalar() = default; device_scalar(device_scalar const &) = default; diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp index 2f24c34a2..cdc04ed67 100644 --- a/tests/device_scalar_tests.cpp +++ b/tests/device_scalar_tests.cpp @@ -87,3 +87,13 @@ TYPED_TEST(DeviceScalarTest, MoveCtor) { EXPECT_EQ(moved_to.value(), original_value); EXPECT_EQ(nullptr, scalar.get()); } + +TYPED_TEST(DeviceScalarTest, SetValue) { + rmm::device_scalar scalar{this->value, this->stream, this->mr}; + EXPECT_NE(nullptr, scalar.get()); + + auto expected = this->distribution(this->generator); + + scalar.value(expected); + EXPECT_EQ(expected, scalar.value()); +} From e57f4d709dc5eb2992525141b4c5c8ebc707215f Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Thu, 24 Oct 2019 15:43:27 -0500 Subject: [PATCH 02/15] changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60921f0e5..6be7f228c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## New Features + - PR #167 Added value setter to `device_scalar` + ## Improvements - PR #161 Use `std::atexit` to finalize RMM after Python interpreter shutdown From 91b2d7ab198323cd83ee497cd123f1a3931014d0 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Thu, 24 Oct 2019 15:44:10 -0500 Subject: [PATCH 03/15] fix doc --- include/rmm/device_scalar.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index c16c8d333..f47781ca8 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -85,8 +85,6 @@ class device_scalar { /**---------------------------------------------------------------------------* * @brief Copies the value from host to device. - * - * @return T The value of the scalar after synchronizing its stream *---------------------------------------------------------------------------**/ void value(T value) { auto status = cudaMemcpyAsync(buff.data(), &value, sizeof(T), From da98387fe90a806d199eb0d5bd9f3e7a45caa792 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Thu, 24 Oct 2019 16:20:54 -0500 Subject: [PATCH 04/15] for device_scalar, rename `T get()` to `T data()`, rename `void value(T)` to `void set_value(T)`, adjust docs --- include/rmm/device_scalar.hpp | 32 +++++++++++++++++--------------- tests/device_scalar_tests.cpp | 24 ++++++++++++------------ 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index f47781ca8..3c14667dd 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -62,7 +62,7 @@ class device_scalar { T value() const { T host_value{}; auto status = cudaMemcpyAsync(&host_value, buff.data(), sizeof(T), - cudaMemcpyDefault, buff.stream()); + cudaMemcpyDefault, buff.stream()); if (cudaSuccess != status) { throw std::runtime_error{"Device memcpy failed."}; } @@ -74,21 +74,13 @@ class device_scalar { } /**---------------------------------------------------------------------------* - * @brief Returns pointer to object in device memory. - *---------------------------------------------------------------------------**/ - T *get() noexcept { return static_cast(buff.data()); } - - /**---------------------------------------------------------------------------* - * @brief Returns pointer to object in device memory. - *---------------------------------------------------------------------------**/ - T const *get() const noexcept { return static_cast(buff.data()); } - - /**---------------------------------------------------------------------------* - * @brief Copies the value from host to device. + * @brief Copies the value from hostto device. + * + * @param host_value The value of the scalar after synchronizing its stream *---------------------------------------------------------------------------**/ - void value(T value) { - auto status = cudaMemcpyAsync(buff.data(), &value, sizeof(T), - cudaMemcpyDefault, buff.stream()); + void set_value(T host_value) { + auto status = cudaMemcpyAsync(buff.data(), &host_value, sizeof(T), + cudaMemcpyDefault, buff.stream()); if (cudaSuccess != status) { throw std::runtime_error{"Device memcpy failed."}; } @@ -98,6 +90,16 @@ class device_scalar { } } + /**---------------------------------------------------------------------------* + * @brief Returns pointer to object in device memory. + *---------------------------------------------------------------------------**/ + T *data() noexcept { return static_cast(buff.data()); } + + /**---------------------------------------------------------------------------* + * @brief Returns pointer to object in device memory. + *---------------------------------------------------------------------------**/ + T const *data() const noexcept { return static_cast(buff.data()); } + device_scalar() = default; ~device_scalar() = default; device_scalar(device_scalar const &) = default; diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp index cdc04ed67..4f2256159 100644 --- a/tests/device_scalar_tests.cpp +++ b/tests/device_scalar_tests.cpp @@ -53,47 +53,47 @@ TYPED_TEST_CASE(DeviceScalarTest, Types); TYPED_TEST(DeviceScalarTest, DefaultUninitialized) { rmm::device_scalar scalar{}; - EXPECT_NE(nullptr, scalar.get()); + EXPECT_NE(nullptr, scalar.data()); } TYPED_TEST(DeviceScalarTest, InitialValue) { rmm::device_scalar scalar{this->value, this->stream, this->mr}; - EXPECT_NE(nullptr, scalar.get()); + EXPECT_NE(nullptr, scalar.data()); EXPECT_EQ(this->value, scalar.value()); } TYPED_TEST(DeviceScalarTest, CopyCtor) { rmm::device_scalar scalar{this->value, this->stream, this->mr}; - EXPECT_NE(nullptr, scalar.get()); + EXPECT_NE(nullptr, scalar.data()); EXPECT_EQ(this->value, scalar.value()); rmm::device_scalar copy{scalar}; - EXPECT_NE(nullptr, copy.get()); - EXPECT_NE(copy.get(), scalar.get()); + EXPECT_NE(nullptr, copy.data()); + EXPECT_NE(copy.data(), scalar.data()); EXPECT_EQ(copy.value(), scalar.value()); } TYPED_TEST(DeviceScalarTest, MoveCtor) { rmm::device_scalar scalar{this->value, this->stream, this->mr}; - EXPECT_NE(nullptr, scalar.get()); + EXPECT_NE(nullptr, scalar.data()); EXPECT_EQ(this->value, scalar.value()); - auto original_pointer = scalar.get(); + auto original_pointer = scalar.data(); auto original_value = scalar.value(); rmm::device_scalar moved_to{std::move(scalar)}; - EXPECT_NE(nullptr, moved_to.get()); - EXPECT_EQ(moved_to.get(), original_pointer); + EXPECT_NE(nullptr, moved_to.data()); + EXPECT_EQ(moved_to.data(), original_pointer); EXPECT_EQ(moved_to.value(), original_value); - EXPECT_EQ(nullptr, scalar.get()); + EXPECT_EQ(nullptr, scalar.data()); } TYPED_TEST(DeviceScalarTest, SetValue) { rmm::device_scalar scalar{this->value, this->stream, this->mr}; - EXPECT_NE(nullptr, scalar.get()); + EXPECT_NE(nullptr, scalar.data()); auto expected = this->distribution(this->generator); - scalar.value(expected); + scalar.set_value(expected); EXPECT_EQ(expected, scalar.value()); } From c27e29050c961d4a40210e49b92cb15e74fc7b83 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Thu, 24 Oct 2019 17:33:18 -0500 Subject: [PATCH 05/15] Update include/rmm/device_scalar.hpp Co-Authored-By: Mark Harris --- include/rmm/device_scalar.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 3c14667dd..936450456 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -74,7 +74,7 @@ class device_scalar { } /**---------------------------------------------------------------------------* - * @brief Copies the value from hostto device. + * @brief Copies the value from host to device. * * @param host_value The value of the scalar after synchronizing its stream *---------------------------------------------------------------------------**/ From aff0a4a7a7c4879043d8d553fb43a25f08ecd01d Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Fri, 25 Oct 2019 13:16:56 -0500 Subject: [PATCH 06/15] add value_async and set_value_async to device_scalar --- include/rmm/device_scalar.hpp | 116 +++++++++++++++++++++++++++------- 1 file changed, 94 insertions(+), 22 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 3c14667dd..953ba503c 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -60,34 +60,75 @@ class device_scalar { * @return T The value of the scalar after synchronizing its stream *---------------------------------------------------------------------------**/ T value() const { - T host_value{}; - auto status = cudaMemcpyAsync(&host_value, buff.data(), sizeof(T), - cudaMemcpyDefault, buff.stream()); - if (cudaSuccess != status) { - throw std::runtime_error{"Device memcpy failed."}; - } - status = cudaStreamSynchronize(buff.stream()); - if (cudaSuccess != status) { - throw std::runtime_error{"Stream sync failed."}; - } - return host_value; + return _value(buff.stream()); } /**---------------------------------------------------------------------------* - * @brief Copies the value from hostto device. + * @brief Copies the value from device to host and returns the value. * - * @param host_value The value of the scalar after synchronizing its stream + * @return T The value of the scalar after synchronizing its stream + * @param stream CUDA stream on which to perform the copy + *---------------------------------------------------------------------------**/ + T value(cudaStream_t stream) const { + return _value(stream); + } + + /**---------------------------------------------------------------------------* + * @brief Copies the value from device to host and returns the value. + * + * @return T The value of the scalar + *---------------------------------------------------------------------------**/ + T value_async() const { + return _value(buff.stream()); + } + + /**---------------------------------------------------------------------------* + * @brief Copies the value from device to host and returns the value. + * + * @return T The value of the scalar + * @param stream CUDA stream on which to perform the copy + *---------------------------------------------------------------------------**/ + T value_async(cudaStream_t stream) const { + return _value(stream); + } + + /**---------------------------------------------------------------------------* + * @brief Copies the value from host to device and synchronizes. + * + * @param host_value The host value which will be copied to device *---------------------------------------------------------------------------**/ void set_value(T host_value) { - auto status = cudaMemcpyAsync(buff.data(), &host_value, sizeof(T), - cudaMemcpyDefault, buff.stream()); - if (cudaSuccess != status) { - throw std::runtime_error{"Device memcpy failed."}; - } - status = cudaStreamSynchronize(buff.stream()); - if (cudaSuccess != status) { - throw std::runtime_error{"Stream sync failed."}; - } + _set_value(host_value, buff.stream()); + } + + + /**---------------------------------------------------------------------------* + * @brief Copies the value from host to device and synchronizes. + * + * @param host_value The host value which will be copied to device + * @param stream CUDA stream on which to perform the copy + *---------------------------------------------------------------------------**/ + void set_value(T host_value, cudaStream_t stream) { + _set_value(host_value, stream); + } + + /**---------------------------------------------------------------------------* + * @brief Copies the value from host to device. + * + * @param host_value The host value which will be copied to device + *---------------------------------------------------------------------------**/ + void set_value_async(T host_value) { + _set_value(host_value, buff.stream()); + } + + /**---------------------------------------------------------------------------* + * @brief Copies the value from host to device. + * + * @param host_value The host value which will be copied to device + * @param stream CUDA stream on which to perform the copy + *---------------------------------------------------------------------------**/ + void set_value_async(T host_value, cudaStream_t stream) { + _set_value(host_value, stream); } /**---------------------------------------------------------------------------* @@ -109,6 +150,37 @@ class device_scalar { private: rmm::device_buffer buff{sizeof(T)}; + + template + inline T _value(cudaStream_t stream) const { + T host_value{}; + _memcpy(&host_value, buff.data(), sizeof(T), stream); + return host_value; + } + + template + inline void _set_value(T host_value, cudaStream_t stream) { + _memcpy(buff.data(), &host_value, sizeof(T), stream); + } + + template + inline void _memcpy(void *dst, const void *src, size_t count, cudaStream_t stream) const { + auto status = cudaMemcpyAsync(dst, src, count, cudaMemcpyDefault, stream); + + if (cudaSuccess != status) { + throw std::runtime_error{"Device memcpy failed."}; + } + + if (false == synchronize) { + return; + } + + status = cudaStreamSynchronize(stream); + + if (cudaSuccess != status) { + throw std::runtime_error{"Stream sync failed."}; + } + } }; } // namespace rmm From 772fb67628e5a9132623b1129a5b7e70095e2781 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Fri, 25 Oct 2019 13:25:01 -0500 Subject: [PATCH 07/15] make device_scalar constructor use _set_value. --- include/rmm/device_scalar.hpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 953ba503c..f2596ae30 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -46,12 +46,8 @@ class device_scalar { T const &initial_value, cudaStream_t stream_ = 0, rmm::mr::device_memory_resource *mr_ = rmm::mr::get_default_resource()) : buff{sizeof(T), stream_, mr_} { - auto status = cudaMemcpyAsync(buff.data(), &initial_value, sizeof(T), - cudaMemcpyDefault, buff.stream()); - - if (cudaSuccess != status) { - throw std::runtime_error{"Device memcpy failed."}; - } + + _set_value(initial_value, buff.stream()); } /**---------------------------------------------------------------------------* From b03e7f17defb68b18b087b2c19d5105e7e831aee Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Sun, 27 Oct 2019 13:41:40 -0500 Subject: [PATCH 08/15] use RMM_CHECK_CUDA in rmm::device_scalar --- include/rmm/device_scalar.hpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index f2596ae30..d90e8e7b0 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -161,9 +162,9 @@ class device_scalar { template inline void _memcpy(void *dst, const void *src, size_t count, cudaStream_t stream) const { - auto status = cudaMemcpyAsync(dst, src, count, cudaMemcpyDefault, stream); + auto status = _memcpy_copy(dst, src, count, stream); - if (cudaSuccess != status) { + if (RMM_SUCCESS != status) { throw std::runtime_error{"Device memcpy failed."}; } @@ -171,12 +172,22 @@ class device_scalar { return; } - status = cudaStreamSynchronize(stream); + status = _memcpy_sync(stream); - if (cudaSuccess != status) { + if (RMM_SUCCESS != status) { throw std::runtime_error{"Stream sync failed."}; } } + + inline rmmError_t _memcpy_copy(void *dst, const void *src, size_t count, cudaStream_t stream) const { + RMM_CHECK_CUDA(cudaMemcpyAsync(dst, src, count, cudaMemcpyDefault, stream)); + return RMM_SUCCESS; + } + + inline rmmError_t _memcpy_sync(cudaStream_t stream) const { + RMM_CHECK_CUDA(cudaStreamSynchronize(stream)); + return RMM_SUCCESS; + } }; } // namespace rmm From a16f5a636aef41490bcf065a7f58ee26e20f825b Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Mon, 28 Oct 2019 14:26:42 -0500 Subject: [PATCH 09/15] revert calls to RMM_CHECK_CUDA --- include/rmm/device_scalar.hpp | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index d90e8e7b0..f2596ae30 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -16,7 +16,6 @@ #pragma once -#include #include #include #include @@ -162,9 +161,9 @@ class device_scalar { template inline void _memcpy(void *dst, const void *src, size_t count, cudaStream_t stream) const { - auto status = _memcpy_copy(dst, src, count, stream); + auto status = cudaMemcpyAsync(dst, src, count, cudaMemcpyDefault, stream); - if (RMM_SUCCESS != status) { + if (cudaSuccess != status) { throw std::runtime_error{"Device memcpy failed."}; } @@ -172,22 +171,12 @@ class device_scalar { return; } - status = _memcpy_sync(stream); + status = cudaStreamSynchronize(stream); - if (RMM_SUCCESS != status) { + if (cudaSuccess != status) { throw std::runtime_error{"Stream sync failed."}; } } - - inline rmmError_t _memcpy_copy(void *dst, const void *src, size_t count, cudaStream_t stream) const { - RMM_CHECK_CUDA(cudaMemcpyAsync(dst, src, count, cudaMemcpyDefault, stream)); - return RMM_SUCCESS; - } - - inline rmmError_t _memcpy_sync(cudaStream_t stream) const { - RMM_CHECK_CUDA(cudaStreamSynchronize(stream)); - return RMM_SUCCESS; - } }; } // namespace rmm From c80baa400b9387bee2c0b44fa6175231d5ca71c7 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Tue, 29 Oct 2019 10:42:30 -0500 Subject: [PATCH 10/15] removing stored-stream overloads of rmm:device_scalar value/set_value --- include/rmm/device_scalar.hpp | 57 +++++------------------------------ 1 file changed, 7 insertions(+), 50 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index f2596ae30..821474d25 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -50,53 +50,26 @@ class device_scalar { _set_value(initial_value, buff.stream()); } - /**---------------------------------------------------------------------------* - * @brief Copies the value from device to host and returns the value. - * - * @return T The value of the scalar after synchronizing its stream - *---------------------------------------------------------------------------**/ - T value() const { - return _value(buff.stream()); - } - /**---------------------------------------------------------------------------* * @brief Copies the value from device to host and returns the value. * * @return T The value of the scalar after synchronizing its stream * @param stream CUDA stream on which to perform the copy *---------------------------------------------------------------------------**/ - T value(cudaStream_t stream) const { + T value(cudaStream_t stream = 0) const { return _value(stream); } - /**---------------------------------------------------------------------------* - * @brief Copies the value from device to host and returns the value. - * - * @return T The value of the scalar - *---------------------------------------------------------------------------**/ - T value_async() const { - return _value(buff.stream()); - } - /**---------------------------------------------------------------------------* * @brief Copies the value from device to host and returns the value. * * @return T The value of the scalar * @param stream CUDA stream on which to perform the copy *---------------------------------------------------------------------------**/ - T value_async(cudaStream_t stream) const { + T value_async(cudaStream_t stream = 0) const { return _value(stream); } - /**---------------------------------------------------------------------------* - * @brief Copies the value from host to device and synchronizes. - * - * @param host_value The host value which will be copied to device - *---------------------------------------------------------------------------**/ - void set_value(T host_value) { - _set_value(host_value, buff.stream()); - } - /**---------------------------------------------------------------------------* * @brief Copies the value from host to device and synchronizes. @@ -104,26 +77,17 @@ class device_scalar { * @param host_value The host value which will be copied to device * @param stream CUDA stream on which to perform the copy *---------------------------------------------------------------------------**/ - void set_value(T host_value, cudaStream_t stream) { + void set_value(T host_value, cudaStream_t stream = 0) { _set_value(host_value, stream); } - /**---------------------------------------------------------------------------* - * @brief Copies the value from host to device. - * - * @param host_value The host value which will be copied to device - *---------------------------------------------------------------------------**/ - void set_value_async(T host_value) { - _set_value(host_value, buff.stream()); - } - /**---------------------------------------------------------------------------* * @brief Copies the value from host to device. * * @param host_value The host value which will be copied to device * @param stream CUDA stream on which to perform the copy *---------------------------------------------------------------------------**/ - void set_value_async(T host_value, cudaStream_t stream) { + void set_value_async(T host_value, cudaStream_t stream = 0) { _set_value(host_value, stream); } @@ -160,20 +124,13 @@ class device_scalar { } template - inline void _memcpy(void *dst, const void *src, size_t count, cudaStream_t stream) const { + inline void _memcpy(void *dst, const void *src, size_t count, + cudaStream_t stream) const{ auto status = cudaMemcpyAsync(dst, src, count, cudaMemcpyDefault, stream); - if (cudaSuccess != status) { throw std::runtime_error{"Device memcpy failed."}; } - - if (false == synchronize) { - return; - } - - status = cudaStreamSynchronize(stream); - - if (cudaSuccess != status) { + if (cudaSuccess != cudaStreamSynchronize(stream)) { throw std::runtime_error{"Stream sync failed."}; } } From 5356a9c9f8a0dc29493cb83cfb723ca6bf375b9f Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Tue, 29 Oct 2019 18:54:58 -0500 Subject: [PATCH 11/15] remove trailing underscores on device_scalar constructor args --- include/rmm/device_scalar.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 821474d25..485f93e79 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -39,13 +39,13 @@ class device_scalar { * @brief Construct a new `device_scalar` * * @param initial_value The initial value of the object in device memory - * @param stream_ Optional, stream on which to perform allocation and copy - * @param mr_ Optional, resource with which to allocate + * @param stream Optional, stream on which to perform allocation and copy + * @param mr Optional, resource with which to allocate *---------------------------------------------------------------------------**/ explicit device_scalar( - T const &initial_value, cudaStream_t stream_ = 0, - rmm::mr::device_memory_resource *mr_ = rmm::mr::get_default_resource()) - : buff{sizeof(T), stream_, mr_} { + T const &initial_value, cudaStream_t stream = 0, + rmm::mr::device_memory_resource *mr = rmm::mr::get_default_resource()) + : buff{sizeof(T), stream, mr} { _set_value(initial_value, buff.stream()); } From 525b4cadc00baf096a46655eca42bb61fce3b375 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Wed, 30 Oct 2019 13:32:02 -0500 Subject: [PATCH 12/15] remove async versions of device_scalar value/set_value --- include/rmm/device_scalar.hpp | 45 ++++------------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 485f93e79..44182a02c 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -47,7 +47,7 @@ class device_scalar { rmm::mr::device_memory_resource *mr = rmm::mr::get_default_resource()) : buff{sizeof(T), stream, mr} { - _set_value(initial_value, buff.stream()); + _memcpy(buff.data(), &initial_value, sizeof(T), stream); } /**---------------------------------------------------------------------------* @@ -57,20 +57,11 @@ class device_scalar { * @param stream CUDA stream on which to perform the copy *---------------------------------------------------------------------------**/ T value(cudaStream_t stream = 0) const { - return _value(stream); - } - - /**---------------------------------------------------------------------------* - * @brief Copies the value from device to host and returns the value. - * - * @return T The value of the scalar - * @param stream CUDA stream on which to perform the copy - *---------------------------------------------------------------------------**/ - T value_async(cudaStream_t stream = 0) const { - return _value(stream); + T host_value{}; + _memcpy(&host_value, buff.data(), sizeof(T), stream); + return host_value; } - /**---------------------------------------------------------------------------* * @brief Copies the value from host to device and synchronizes. * @@ -78,17 +69,7 @@ class device_scalar { * @param stream CUDA stream on which to perform the copy *---------------------------------------------------------------------------**/ void set_value(T host_value, cudaStream_t stream = 0) { - _set_value(host_value, stream); - } - - /**---------------------------------------------------------------------------* - * @brief Copies the value from host to device. - * - * @param host_value The host value which will be copied to device - * @param stream CUDA stream on which to perform the copy - *---------------------------------------------------------------------------**/ - void set_value_async(T host_value, cudaStream_t stream = 0) { - _set_value(host_value, stream); + _memcpy(buff.data(), &host_value, sizeof(T), stream); } /**---------------------------------------------------------------------------* @@ -111,28 +92,12 @@ class device_scalar { private: rmm::device_buffer buff{sizeof(T)}; - template - inline T _value(cudaStream_t stream) const { - T host_value{}; - _memcpy(&host_value, buff.data(), sizeof(T), stream); - return host_value; - } - - template - inline void _set_value(T host_value, cudaStream_t stream) { - _memcpy(buff.data(), &host_value, sizeof(T), stream); - } - - template inline void _memcpy(void *dst, const void *src, size_t count, cudaStream_t stream) const{ auto status = cudaMemcpyAsync(dst, src, count, cudaMemcpyDefault, stream); if (cudaSuccess != status) { throw std::runtime_error{"Device memcpy failed."}; } - if (cudaSuccess != cudaStreamSynchronize(stream)) { - throw std::runtime_error{"Stream sync failed."}; - } } }; From 53ebb7edb03bb964736fd36b06b838ec45114624 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Wed, 30 Oct 2019 15:56:01 -0500 Subject: [PATCH 13/15] device_scalar doc adjustments --- include/rmm/device_scalar.hpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 44182a02c..2130f5c1a 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -47,29 +47,30 @@ class device_scalar { rmm::mr::device_memory_resource *mr = rmm::mr::get_default_resource()) : buff{sizeof(T), stream, mr} { - _memcpy(buff.data(), &initial_value, sizeof(T), stream); + _memcpy(buff.data(), &initial_value, stream); } /**---------------------------------------------------------------------------* - * @brief Copies the value from device to host and returns the value. + * @brief Copies the value from device to host synchronously and returns the + * value. * - * @return T The value of the scalar after synchronizing its stream + * @return T The value of the scalar * @param stream CUDA stream on which to perform the copy *---------------------------------------------------------------------------**/ T value(cudaStream_t stream = 0) const { T host_value{}; - _memcpy(&host_value, buff.data(), sizeof(T), stream); + _memcpy(&host_value, buff.data(), stream); return host_value; } /**---------------------------------------------------------------------------* - * @brief Copies the value from host to device and synchronizes. + * @brief Copies the value from host to device synchronously. * * @param host_value The host value which will be copied to device * @param stream CUDA stream on which to perform the copy *---------------------------------------------------------------------------**/ void set_value(T host_value, cudaStream_t stream = 0) { - _memcpy(buff.data(), &host_value, sizeof(T), stream); + _memcpy(buff.data(), &host_value, stream); } /**---------------------------------------------------------------------------* @@ -92,9 +93,9 @@ class device_scalar { private: rmm::device_buffer buff{sizeof(T)}; - inline void _memcpy(void *dst, const void *src, size_t count, - cudaStream_t stream) const{ - auto status = cudaMemcpyAsync(dst, src, count, cudaMemcpyDefault, stream); + inline void _memcpy(void *dst, const void *src, cudaStream_t stream) const { + auto status = cudaMemcpyAsync(dst, src, sizeof(T), cudaMemcpyDefault, + stream); if (cudaSuccess != status) { throw std::runtime_error{"Device memcpy failed."}; } From 3268c07b11187121e5dac758fcaa09a6ea4c3d9c Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Wed, 30 Oct 2019 19:45:45 -0500 Subject: [PATCH 14/15] device_scalar explicit synchronization in value/set_value --- include/rmm/device_scalar.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 44182a02c..6b1ba5665 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -95,9 +95,14 @@ class device_scalar { inline void _memcpy(void *dst, const void *src, size_t count, cudaStream_t stream) const{ auto status = cudaMemcpyAsync(dst, src, count, cudaMemcpyDefault, stream); + if (cudaSuccess != status) { throw std::runtime_error{"Device memcpy failed."}; } + + if (cudaSuccess != cudaStreamSynchronize(stream)) { + throw std::runtime_error{"Stream sync failed."}; + } } }; From c5b8a99850cb2bde4e8ebea1d7bc66d5f597f437 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Wed, 30 Oct 2019 19:53:49 -0500 Subject: [PATCH 15/15] fix build error --- include/rmm/device_scalar.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 0795a09e4..ceae5e16e 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -93,9 +93,9 @@ class device_scalar { private: rmm::device_buffer buff{sizeof(T)}; - inline void _memcpy(void *dst, const void *src, size_t count, + inline void _memcpy(void *dst, const void *src, cudaStream_t stream) const{ - auto status = cudaMemcpyAsync(dst, src, count, cudaMemcpyDefault, stream); + auto status = cudaMemcpyAsync(dst, src, sizeof(T), cudaMemcpyDefault, stream); if (cudaSuccess != status) { throw std::runtime_error{"Device memcpy failed."};