diff --git a/paddle/fluid/framework/var_type_traits.cc b/paddle/fluid/framework/var_type_traits.cc index 132da0d177178..0b289b8a6ddff 100644 --- a/paddle/fluid/framework/var_type_traits.cc +++ b/paddle/fluid/framework/var_type_traits.cc @@ -42,7 +42,6 @@ #include "paddle/fluid/operators/nccl/nccl_gpu_common.h" // NOLINT #include "paddle/fluid/platform/device/gpu/nccl_helper.h" // NOLINT #endif -#include "paddle/fluid/operators/mudnn_rnn_cache.h" #endif #if defined(PADDLE_WITH_XPU_BKCL) diff --git a/paddle/fluid/framework/var_type_traits.h b/paddle/fluid/framework/var_type_traits.h index 286ee379d82dd..fe0eed9719c5e 100644 --- a/paddle/fluid/framework/var_type_traits.h +++ b/paddle/fluid/framework/var_type_traits.h @@ -33,6 +33,12 @@ #include #endif #endif +#ifdef PADDLE_WITH_MUSA +#include +#if defined(PADDLE_WITH_MCCL) +#include +#endif +#endif #ifdef PADDLE_WITH_HIP #include #ifdef PADDLE_WITH_RCCL @@ -190,13 +196,15 @@ using VarTypeRegistry = detail::VarTypeRegistryImpl< FeedList, operators::reader::OrderedMultiDeviceLoDTensorBlockingQueueHolder, #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA) -#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) +#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || defined(PADDLE_WITH_MCCL) ncclUniqueId, platform::Communicator, platform::NCCLCommunicator, #endif +#ifndef PADDLE_WITH_MUSA operators::CudnnRNNCache, #endif +#endif #if defined(PADDLE_WITH_XPU_BKCL) BKCLUniqueId, platform::BKCLCommunicator, diff --git a/paddle/fluid/operators/mudnn_rnn_cache.h b/paddle/fluid/operators/mudnn_rnn_cache.h deleted file mode 100644 index af9ebd800fa3c..0000000000000 --- a/paddle/fluid/operators/mudnn_rnn_cache.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" - -namespace paddle { -namespace operators { - -struct CudnnRNNCache { - CudnnRNNCache() { - } - ~CudnnRNNCache() {} -}; - -} // namespace operators -} // namespace paddle - diff --git a/paddle/fluid/platform/device/gpu/gpu_helper.h b/paddle/fluid/platform/device/gpu/gpu_helper.h index f94f5d55b7eee..df6fda56f2b13 100644 --- a/paddle/fluid/platform/device/gpu/gpu_helper.h +++ b/paddle/fluid/platform/device/gpu/gpu_helper.h @@ -17,9 +17,7 @@ #ifdef PADDLE_WITH_HIP #include "paddle/fluid/platform/device/gpu/rocm/rocm_helper.h" -#elif defined(PADDLE_WITH_MUSA) -#include "paddle/fluid/platform/device/gpu/musa/musa_helper.h" -#else +#elif defined(PADDLE_WITH_CUDA) #include "paddle/fluid/platform/device/gpu/cuda/cuda_helper.h" #include "paddle/fluid/platform/device/gpu/cuda/cusparse_helper.h" #endif diff --git a/paddle/fluid/platform/device/gpu/musa/musa_helper.h b/paddle/fluid/platform/device/gpu/musa/musa_helper.h deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 6cd13cb4e4f0a..453d9e9f1e18d 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -45,7 +45,6 @@ limitations under the License. */ #ifdef PADDLE_WITH_MUSA #include "paddle/fluid/platform/device/gpu/gpu_helper.h" #include "paddle/fluid/platform/dynload/mublas.h" -#include "paddle/fluid/platform/dynload/mudnn.h" #include "paddle/fluid/platform/dynload/musparse.h" #include "paddle/phi/backends/gpu/gpu_context.h" #if !defined(__APPLE__) && defined(PADDLE_WITH_MCCL) diff --git a/paddle/fluid/platform/dynload/mudnn.h b/paddle/fluid/platform/dynload/mudnn.h deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/paddle/fluid/platform/dynload/musartc.cc b/paddle/fluid/platform/dynload/musartc.cc index 5bc7b6737b3fb..4e15dab9c1359 100644 --- a/paddle/fluid/platform/dynload/musartc.cc +++ b/paddle/fluid/platform/dynload/musartc.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,13 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/platform/dynload/musartc.h" + +#include "paddle/phi/backends/dynload/musartc.h" + namespace paddle { namespace platform { namespace dynload { -bool HasNVRTC() { return false; } +#define DEFINE_WRAP(__name) DynLoad__##__name __name + +MUSARTC_ROUTINE_EACH(DEFINE_WRAP); + +bool HasNVRTC() { return phi::dynload::HasNVRTC(); } } // namespace dynload } // namespace platform } // namespace paddle - diff --git a/paddle/fluid/platform/dynload/musartc.h b/paddle/fluid/platform/dynload/musartc.h index a81254119de57..c383c85d7ab04 100644 --- a/paddle/fluid/platform/dynload/musartc.h +++ b/paddle/fluid/platform/dynload/musartc.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,13 +14,40 @@ limitations under the License. */ #pragma once +#include + +#include // NOLINT + +#include "paddle/phi/backends/dynload/musartc.h" + namespace paddle { namespace platform { namespace dynload { extern bool HasNVRTC(); +#define PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP(__name) \ + using DynLoad__##__name = phi::dynload::DynLoad__##__name; \ + extern DynLoad__##__name __name + +/** + * include all needed musartc functions + **/ +#define MUSARTC_ROUTINE_EACH(__macro) \ + __macro(mtrtcVersion); \ + __macro(mtrtcGetErrorString); \ + __macro(mtrtcCompileProgram); \ + __macro(mtrtcCreateProgram); \ + __macro(mtrtcDestroyProgram); \ + __macro(mtrtcGetMUSA); \ + __macro(mtrtcGetMUSASize); \ + __macro(mtrtcGetProgramLog); \ + __macro(mtrtcGetProgramLogSize) + +MUSARTC_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP); + +#undef PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP + } // namespace dynload } // namespace platform } // namespace paddle - diff --git a/paddle/phi/backends/dynload/mudnn.cc b/paddle/phi/backends/dynload/mudnn.cc index 19ada8408ed17..4e127b8cc001c 100644 --- a/paddle/phi/backends/dynload/mudnn.cc +++ b/paddle/phi/backends/dynload/mudnn.cc @@ -12,13 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/phi/backends/dynload/mudnn.h" + +#include "paddle/phi/core/enforce.h" + namespace phi { namespace dynload { +std::once_flag mudnn_dso_flag; +void* mudnn_dso_handle = nullptr; + +#define DEFINE_WRAP(__name) DynLoad__##__name __name + bool HasCUDNN() { - return false; + std::call_once(mudnn_dso_flag, + []() { mudnn_dso_handle = GetCUDNNDsoHandle(); }); + return mudnn_dso_handle != nullptr; +} + +void EnforceCUDNNLoaded(const char* fn_name) { + PADDLE_ENFORCE_NOT_NULL( + mudnn_dso_handle, + phi::errors::PreconditionNotMet( + "Cannot load mudnn shared library. Cannot invoke method %s.", + fn_name)); } } // namespace dynload } // namespace phi - diff --git a/paddle/phi/backends/dynload/mudnn.h b/paddle/phi/backends/dynload/mudnn.h index c96a2570210d2..ed4142d61dea4 100644 --- a/paddle/phi/backends/dynload/mudnn.h +++ b/paddle/phi/backends/dynload/mudnn.h @@ -14,15 +14,36 @@ limitations under the License. */ #pragma once #ifdef PADDLE_WITH_MUSA +#include + +#include // NOLINT + +#include "paddle/phi/backends/dynload/dynamic_loader.h" +#include "paddle/phi/backends/dynload/port.h" namespace phi { namespace dynload { +extern std::once_flag mudnn_dso_flag; +extern void* mudnn_dso_handle; extern bool HasCUDNN(); +extern void EnforceCUDNNLoaded(const char* fn_name); +#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ + using mudnn_func = decltype(&::__name); \ + std::call_once(mudnn_dso_flag, []() { \ + mudnn_dso_handle = phi::dynload::GetCUDNNDsoHandle(); \ + }); \ + EnforceCUDNNLoaded(#__name); \ + static void* p_##__name = dlsym(mudnn_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ + extern struct DynLoad__##__name __name } // namespace dynload } // namespace phi - #endif - diff --git a/paddle/phi/backends/gpu/musa/musa_device_function.h b/paddle/phi/backends/gpu/musa/musa_device_function.h index 3f0c6b6726849..5745af0212e3a 100644 --- a/paddle/phi/backends/gpu/musa/musa_device_function.h +++ b/paddle/phi/backends/gpu/musa/musa_device_function.h @@ -100,7 +100,8 @@ __forceinline__ __device__ phi::dtype::complex CudaShuffleDownSync( width)); return phi::dtype::complex(real, imag); } -#if 0 + +// TODO(@MTAI): there is compiling error when compiling the following code //template <> //__forceinline__ __device__ phi::dtype::float16 CudaShuffleXorSync( // unsigned mask, phi::dtype::float16 val, int width) { @@ -112,7 +113,7 @@ __forceinline__ __device__ phi::dtype::bfloat16 CudaShuffleXorSync( unsigned mask, phi::dtype::bfloat16 val, int width) { #if defined(PADDLE_MUSA_BF16) return phi::dtype::bfloat16( - __shfl_xor_sync(mask, val.to_nv_bfloat16(), width)); + __shfl_xor_sync(mask, val.to_mt_bfloat16(), width)); #else PADDLE_ENFORCE( false, "__shfl_xor_sync with bfloat16 is not supported on cuda <= 11."); @@ -149,7 +150,6 @@ template HOSTDEVICE T Infinity() { return INFINITY; } -#endif template __device__ T reduceSum(T val, int tid, int len) { diff --git a/paddle/phi/backends/gpu/musa/musa_info.cc b/paddle/phi/backends/gpu/musa/musa_info.cc index ced106d6c6b3d..a7f2f8dbb166d 100644 --- a/paddle/phi/backends/gpu/musa/musa_info.cc +++ b/paddle/phi/backends/gpu/musa/musa_info.cc @@ -31,12 +31,13 @@ namespace backends { namespace gpu { int DnnVersion() { - return 0; - //if (!dynload::HasCUDNN()) return -1; - //size_t version_major, version_minor, version_patch; - //PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenGetVersion( - // &version_major, &version_minor, &version_patch)); - //return version_major * 100 + version_minor * 10 + version_patch; + if (!dynload::HasCUDNN()) return -1; + // TODO(@caizhi): mudnnGetVersion is not supported now. + // version info will be returned from mudnnGetVersion later. + const int version_major = 1; + const int version_minor = 1; + const int version_patch = 0; + return version_major * 1000 + version_minor * 100 + version_patch; } static int GetGPUDeviceCountImpl() { @@ -49,22 +50,22 @@ static int GetGPUDeviceCountImpl() { return 0; } - const auto *cuda_visible_devices = std::getenv("MUSA_VISIBLE_DEVICES"); - - if (cuda_visible_devices != nullptr) { - std::string cuda_visible_devices_str(cuda_visible_devices); - if (!cuda_visible_devices_str.empty()) { - cuda_visible_devices_str.erase( - 0, cuda_visible_devices_str.find_first_not_of('\'')); - cuda_visible_devices_str.erase( - cuda_visible_devices_str.find_last_not_of('\'') + 1); - cuda_visible_devices_str.erase( - 0, cuda_visible_devices_str.find_first_not_of('\"')); - cuda_visible_devices_str.erase( - cuda_visible_devices_str.find_last_not_of('\"') + 1); + const auto *musa_visible_devices = std::getenv("MUSA_VISIBLE_DEVICES"); + + if (musa_visible_devices != nullptr) { + std::string musa_visible_devices_str(musa_visible_devices); + if (!musa_visible_devices_str.empty()) { + musa_visible_devices_str.erase( + 0, musa_visible_devices_str.find_first_not_of('\'')); + musa_visible_devices_str.erase( + musa_visible_devices_str.find_last_not_of('\'') + 1); + musa_visible_devices_str.erase( + 0, musa_visible_devices_str.find_first_not_of('\"')); + musa_visible_devices_str.erase( + musa_visible_devices_str.find_last_not_of('\"') + 1); } - if (std::all_of(cuda_visible_devices_str.begin(), - cuda_visible_devices_str.end(), + if (std::all_of(musa_visible_devices_str.begin(), + musa_visible_devices_str.end(), [](char ch) { return ch == ' '; })) { VLOG(2) << "MUSA_VISIBLE_DEVICES is set to be " "empty. No GPU detected."; diff --git a/paddle/phi/backends/musartc.h b/paddle/phi/backends/musartc.h deleted file mode 100644 index dc9ebc3faf0d7..0000000000000 --- a/paddle/phi/backends/musartc.h +++ /dev/null @@ -1,24 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -namespace phi { -namespace dynload { - -extern bool HasNVRTC(); - -} // namespace dynload -} // namespace phi - diff --git a/paddle/phi/kernels/funcs/mufft_util.h b/paddle/phi/kernels/funcs/mufft_util.h deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/paddle/phi/kernels/funcs/sparse/sparse_blas.h b/paddle/phi/kernels/funcs/sparse/sparse_blas.h index 9a6534c32a1c6..f6d67488d1f48 100644 --- a/paddle/phi/kernels/funcs/sparse/sparse_blas.h +++ b/paddle/phi/kernels/funcs/sparse/sparse_blas.h @@ -100,6 +100,3 @@ inline SparseBlasT GetSparseBlas( #if defined(PADDLE_WITH_HIP) && HIP_VERSION >= 402 #include "paddle/phi/kernels/funcs/sparse/sparse_blas_impl.hip.h" #endif -#if defined(PADDLE_WITH_MUSA) -#include "paddle/phi/kernels/funcs/sparse/sparse_blas_impl.mu.h" -#endif diff --git a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.mu.h b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.mu.h deleted file mode 100644 index 1f4ffb82624a4..0000000000000 --- a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.mu.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -#include "glog/logging.h" diff --git a/paddle/phi/kernels/gpu/mudnn_lstm_cache.h b/paddle/phi/kernels/gpu/mudnn_lstm_cache.h deleted file mode 100644 index e69de29bb2d1d..0000000000000