From cce2b94df86b9df3a9bfed46ad07fdcd5fa8c894 Mon Sep 17 00:00:00 2001 From: HongyuJia Date: Thu, 2 Mar 2023 14:08:28 +0800 Subject: [PATCH] [GetCurrentCUDAStream] Add C++ API GetCurrentCUDAStream (#51027) * polish codes according #50813 * [getCurrentCUDAStream] Add C++ API getCurrentCUDAStream * change get->Get * wrap with macro * use Get instead of get --- paddle/phi/api/include/context_pool.h | 19 +++++++++++---- paddle/phi/api/lib/context_pool.cc | 27 +++++++++++++++++---- paddle/phi/core/allocator.cc | 3 --- paddle/phi/core/allocator.h | 1 - paddle/phi/tests/api/CMakeLists.txt | 8 +++++++ paddle/phi/tests/api/test_allocator.cu | 30 +++++++++++------------- paddle/phi/tests/api/test_cuda_stream.cu | 26 ++++++++++++++++++++ 7 files changed, 86 insertions(+), 28 deletions(-) create mode 100644 paddle/phi/tests/api/test_cuda_stream.cu diff --git a/paddle/phi/api/include/context_pool.h b/paddle/phi/api/include/context_pool.h index f548696bf9999..7afe17ba8419d 100644 --- a/paddle/phi/api/include/context_pool.h +++ b/paddle/phi/api/include/context_pool.h @@ -25,6 +25,8 @@ namespace phi { class DeviceContext; class CPUContext; class GPUContext; +class Allocator; +class CUDAStream; } // namespace phi namespace paddle { @@ -88,9 +90,18 @@ class PADDLE_API DeviceContextPool { } // namespace experimental } // namespace paddle -namespace phi { -class Allocator; +namespace paddle { -PADDLE_API Allocator* GetAllocator(const Place& place); +/** + * Get the Allocator for the passed place. + */ +PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place); -} // namespace phi +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +/** + * Get the current CUDA stream for the passed CUDA device. + */ +PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place); +#endif + +} // namespace paddle diff --git a/paddle/phi/api/lib/context_pool.cc b/paddle/phi/api/lib/context_pool.cc index a17df04183e82..460622698b963 100644 --- a/paddle/phi/api/lib/context_pool.cc +++ b/paddle/phi/api/lib/context_pool.cc @@ -18,6 +18,10 @@ limitations under the License. */ #include "paddle/phi/core/allocator.h" #include "paddle/phi/core/enforce.h" +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/phi/core/cuda_stream.h" +#endif + #include "paddle/fluid/platform/init.h" namespace paddle { @@ -52,12 +56,27 @@ phi::DeviceContext* DeviceContextPool::GetMutable(const Place& place) { } // namespace experimental } // namespace paddle -namespace phi { +namespace paddle { -PADDLE_API Allocator* GetAllocator(const Place& place) { - const DeviceContext* dev_ctx = +PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place) { + const phi::DeviceContext* dev_ctx = paddle::experimental::DeviceContextPool::Instance().Get(place); return const_cast(&dev_ctx->GetAllocator()); } -} // namespace phi +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place) { + PADDLE_ENFORCE(place.GetType() == phi::AllocationType::GPU, + phi::errors::InvalidArgument( + "GetCurrentCUDAStream only supports GPUPlace input. " + "However, your input is place=%s", + place)); + + auto& pool = paddle::experimental::DeviceContextPool::Instance(); + const phi::GPUContext* dev_ctx = + static_cast(pool.Get(place)); + return dev_ctx->cuda_stream(); +} +#endif + +} // namespace paddle diff --git a/paddle/phi/core/allocator.cc b/paddle/phi/core/allocator.cc index 76e5c38c51ae1..4d766d7003f6b 100644 --- a/paddle/phi/core/allocator.cc +++ b/paddle/phi/core/allocator.cc @@ -14,7 +14,4 @@ limitations under the License. */ #include "paddle/phi/core/allocator.h" -#include "paddle/phi/api/include/context_pool.h" -#include "paddle/phi/core/device_context.h" - namespace phi {} // namespace phi diff --git a/paddle/phi/core/allocator.h b/paddle/phi/core/allocator.h index 9595a51ec0316..849fc1548c7ec 100644 --- a/paddle/phi/core/allocator.h +++ b/paddle/phi/core/allocator.h @@ -18,7 +18,6 @@ limitations under the License. */ #include #include -#include "paddle/phi/api/include/dll_decl.h" #include "paddle/phi/common/place.h" namespace phi { diff --git a/paddle/phi/tests/api/CMakeLists.txt b/paddle/phi/tests/api/CMakeLists.txt index 7ee6bd692b7e5..d4f1ecaab66fd 100644 --- a/paddle/phi/tests/api/CMakeLists.txt +++ b/paddle/phi/tests/api/CMakeLists.txt @@ -9,6 +9,10 @@ if(WITH_GPU) test_allocator SRCS test_allocator.cu DEPS memory place device_context context_pool) + nv_test( + test_cuda_stream + SRCS test_cuda_stream.cu + DEPS context_pool) elseif(WITH_ROCM) hip_test( test_phi_tensor @@ -18,6 +22,10 @@ elseif(WITH_ROCM) test_allocator SRCS test_allocator.cu DEPS memory place device_context context_pool) + hip_test( + test_cuda_stream + SRCS test_cuda_stream.cu + DEPS context_pool) else() cc_test( test_phi_tensor diff --git a/paddle/phi/tests/api/test_allocator.cu b/paddle/phi/tests/api/test_allocator.cu index 23738d9a5ff42..eafbe84019b5f 100644 --- a/paddle/phi/tests/api/test_allocator.cu +++ b/paddle/phi/tests/api/test_allocator.cu @@ -22,8 +22,20 @@ limitations under the License. */ #include "paddle/phi/core/allocator.h" #include "paddle/phi/core/device_context.h" +using paddle::memory::Copy; + +template +class Scale { + public: + explicit Scale(const T& scale) : scale_(scale) {} + HOSTDEVICE T operator()(const T& a) const { return a * scale_; } + + private: + T scale_; +}; + TEST(Allocator, CPU) { - phi::Allocator* allocator = phi::GetAllocator(phi::CPUPlace()); + phi::Allocator* allocator = paddle::GetAllocator(phi::CPUPlace()); auto cpu_allocation = allocator->Allocate(sizeof(float) * 4); float* cpu_buf = static_cast(cpu_allocation->ptr()); ASSERT_NE(cpu_buf, nullptr); @@ -39,23 +51,10 @@ TEST(Allocator, CPU) { } } -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -using paddle::memory::Copy; - -template -class Scale { - public: - explicit Scale(const T& scale) : scale_(scale) {} - HOSTDEVICE T operator()(const T& a) const { return a * scale_; } - - private: - T scale_; -}; - TEST(Allocator, GPU) { phi::GPUPlace gpu0(0); float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4}; - phi::Allocator* allocator = phi::GetAllocator(gpu0); + phi::Allocator* allocator = paddle::GetAllocator(gpu0); auto gpu_allocation = allocator->Allocate(sizeof(cpu_buf)); float* gpu_buf = static_cast(gpu_allocation->ptr()); @@ -70,4 +69,3 @@ TEST(Allocator, GPU) { ASSERT_NEAR(cpu_buf[i], static_cast(i + 1), 1e-5); } } -#endif diff --git a/paddle/phi/tests/api/test_cuda_stream.cu b/paddle/phi/tests/api/test_cuda_stream.cu new file mode 100644 index 0000000000000..698f161c356f1 --- /dev/null +++ b/paddle/phi/tests/api/test_cuda_stream.cu @@ -0,0 +1,26 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include + +#include "paddle/phi/api/include/context_pool.h" +#include "paddle/phi/core/cuda_stream.h" + +TEST(CUDAStream, GPU) { + phi::GPUPlace gpu0(0); + phi::CUDAStream* stream = paddle::GetCurrentCUDAStream(gpu0); + EXPECT_TRUE(stream != nullptr); + gpuStream_t raw_stream = stream->raw_stream(); + EXPECT_TRUE(raw_stream != nullptr); +}