Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#44 from mthreads/new_files
Browse files Browse the repository at this point in the history
[MTAI-484] fix(build): optimize new files for MUSA
  • Loading branch information
caizhi-mt authored and mt-robot committed Aug 15, 2023
2 parents 1ee924d + c15d408 commit db90713
Show file tree
Hide file tree
Showing 18 changed files with 117 additions and 102 deletions.
1 change: 0 additions & 1 deletion paddle/fluid/framework/var_type_traits.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h" // NOLINT
#include "paddle/fluid/platform/device/gpu/nccl_helper.h" // NOLINT
#endif
#include "paddle/fluid/operators/mudnn_rnn_cache.h"
#endif

#if defined(PADDLE_WITH_XPU_BKCL)
Expand Down
10 changes: 9 additions & 1 deletion paddle/fluid/framework/var_type_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
#include <nccl.h>
#endif
#endif
#ifdef PADDLE_WITH_MUSA
#include <mudnn.h>
#if defined(PADDLE_WITH_MCCL)
#include <mccl.h>
#endif
#endif
#ifdef PADDLE_WITH_HIP
#include <miopen/miopen.h>
#ifdef PADDLE_WITH_RCCL
Expand Down Expand Up @@ -190,13 +196,15 @@ using VarTypeRegistry = detail::VarTypeRegistryImpl<
FeedList,
operators::reader::OrderedMultiDeviceLoDTensorBlockingQueueHolder,
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || defined(PADDLE_WITH_MCCL)
ncclUniqueId,
platform::Communicator,
platform::NCCLCommunicator,
#endif
#ifndef PADDLE_WITH_MUSA
operators::CudnnRNNCache,
#endif
#endif
#if defined(PADDLE_WITH_XPU_BKCL)
BKCLUniqueId,
platform::BKCLCommunicator,
Expand Down
33 changes: 0 additions & 33 deletions paddle/fluid/operators/mudnn_rnn_cache.h

This file was deleted.

4 changes: 1 addition & 3 deletions paddle/fluid/platform/device/gpu/gpu_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/device/gpu/rocm/rocm_helper.h"
#elif defined(PADDLE_WITH_MUSA)
#include "paddle/fluid/platform/device/gpu/musa/musa_helper.h"
#else
#elif defined(PADDLE_WITH_CUDA)
#include "paddle/fluid/platform/device/gpu/cuda/cuda_helper.h"
#include "paddle/fluid/platform/device/gpu/cuda/cusparse_helper.h"
#endif
Expand Down
Empty file.
1 change: 0 additions & 1 deletion paddle/fluid/platform/device_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ limitations under the License. */
#ifdef PADDLE_WITH_MUSA
#include "paddle/fluid/platform/device/gpu/gpu_helper.h"
#include "paddle/fluid/platform/dynload/mublas.h"
#include "paddle/fluid/platform/dynload/mudnn.h"
#include "paddle/fluid/platform/dynload/musparse.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#if !defined(__APPLE__) && defined(PADDLE_WITH_MCCL)
Expand Down
Empty file.
13 changes: 10 additions & 3 deletions paddle/fluid/platform/dynload/musartc.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -12,13 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/platform/dynload/musartc.h"

#include "paddle/phi/backends/dynload/musartc.h"

namespace paddle {
namespace platform {
namespace dynload {

bool HasNVRTC() { return false; }
#define DEFINE_WRAP(__name) DynLoad__##__name __name

MUSARTC_ROUTINE_EACH(DEFINE_WRAP);

bool HasNVRTC() { return phi::dynload::HasNVRTC(); }

} // namespace dynload
} // namespace platform
} // namespace paddle

31 changes: 29 additions & 2 deletions paddle/fluid/platform/dynload/musartc.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -14,13 +14,40 @@ limitations under the License. */

#pragma once

#include <mtrtc.h>

#include <mutex> // NOLINT

#include "paddle/phi/backends/dynload/musartc.h"

namespace paddle {
namespace platform {
namespace dynload {

extern bool HasNVRTC();

#define PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP(__name) \
using DynLoad__##__name = phi::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name

/**
* include all needed musartc functions
**/
#define MUSARTC_ROUTINE_EACH(__macro) \
__macro(mtrtcVersion); \
__macro(mtrtcGetErrorString); \
__macro(mtrtcCompileProgram); \
__macro(mtrtcCreateProgram); \
__macro(mtrtcDestroyProgram); \
__macro(mtrtcGetMUSA); \
__macro(mtrtcGetMUSASize); \
__macro(mtrtcGetProgramLog); \
__macro(mtrtcGetProgramLogSize)

MUSARTC_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP);

#undef PLATFORM_DECLARE_DYNAMIC_LOAD_NVRTC_WRAP

} // namespace dynload
} // namespace platform
} // namespace paddle

22 changes: 20 additions & 2 deletions paddle/phi/backends/dynload/mudnn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/backends/dynload/mudnn.h"

#include "paddle/phi/core/enforce.h"

namespace phi {
namespace dynload {

std::once_flag mudnn_dso_flag;
void* mudnn_dso_handle = nullptr;

#define DEFINE_WRAP(__name) DynLoad__##__name __name

bool HasCUDNN() {
return false;
std::call_once(mudnn_dso_flag,
[]() { mudnn_dso_handle = GetCUDNNDsoHandle(); });
return mudnn_dso_handle != nullptr;
}

void EnforceCUDNNLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL(
mudnn_dso_handle,
phi::errors::PreconditionNotMet(
"Cannot load mudnn shared library. Cannot invoke method %s.",
fn_name));
}

} // namespace dynload
} // namespace phi

25 changes: 23 additions & 2 deletions paddle/phi/backends/dynload/mudnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,36 @@ limitations under the License. */

#pragma once
#ifdef PADDLE_WITH_MUSA
#include <mudnn.h>

#include <mutex> // NOLINT

#include "paddle/phi/backends/dynload/dynamic_loader.h"
#include "paddle/phi/backends/dynload/port.h"

namespace phi {
namespace dynload {

extern std::once_flag mudnn_dso_flag;
extern void* mudnn_dso_handle;
extern bool HasCUDNN();

extern void EnforceCUDNNLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using mudnn_func = decltype(&::__name); \
std::call_once(mudnn_dso_flag, []() { \
mudnn_dso_handle = phi::dynload::GetCUDNNDsoHandle(); \
}); \
EnforceCUDNNLoaded(#__name); \
static void* p_##__name = dlsym(mudnn_dso_handle, #__name); \
return reinterpret_cast<mudnn_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name

} // namespace dynload
} // namespace phi

#endif

6 changes: 3 additions & 3 deletions paddle/phi/backends/gpu/musa/musa_device_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ __forceinline__ __device__ phi::dtype::complex<double> CudaShuffleDownSync(
width));
return phi::dtype::complex<double>(real, imag);
}
#if 0

// TODO(@MTAI): there is compiling error when compiling the following code
//template <>
//__forceinline__ __device__ phi::dtype::float16 CudaShuffleXorSync(
// unsigned mask, phi::dtype::float16 val, int width) {
Expand All @@ -112,7 +113,7 @@ __forceinline__ __device__ phi::dtype::bfloat16 CudaShuffleXorSync(
unsigned mask, phi::dtype::bfloat16 val, int width) {
#if defined(PADDLE_MUSA_BF16)
return phi::dtype::bfloat16(
__shfl_xor_sync(mask, val.to_nv_bfloat16(), width));
__shfl_xor_sync(mask, val.to_mt_bfloat16(), width));
#else
PADDLE_ENFORCE(
false, "__shfl_xor_sync with bfloat16 is not supported on cuda <= 11.");
Expand Down Expand Up @@ -149,7 +150,6 @@ template <typename T>
HOSTDEVICE T Infinity() {
return INFINITY;
}
#endif

template <typename T>
__device__ T reduceSum(T val, int tid, int len) {
Expand Down
43 changes: 22 additions & 21 deletions paddle/phi/backends/gpu/musa/musa_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@ namespace backends {
namespace gpu {

int DnnVersion() {
return 0;
//if (!dynload::HasCUDNN()) return -1;
//size_t version_major, version_minor, version_patch;
//PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenGetVersion(
// &version_major, &version_minor, &version_patch));
//return version_major * 100 + version_minor * 10 + version_patch;
if (!dynload::HasCUDNN()) return -1;
// TODO(@caizhi): mudnnGetVersion is not supported now.
// version info will be returned from mudnnGetVersion later.
const int version_major = 1;
const int version_minor = 1;
const int version_patch = 0;
return version_major * 1000 + version_minor * 100 + version_patch;
}

static int GetGPUDeviceCountImpl() {
Expand All @@ -49,22 +50,22 @@ static int GetGPUDeviceCountImpl() {
return 0;
}

const auto *cuda_visible_devices = std::getenv("MUSA_VISIBLE_DEVICES");

if (cuda_visible_devices != nullptr) {
std::string cuda_visible_devices_str(cuda_visible_devices);
if (!cuda_visible_devices_str.empty()) {
cuda_visible_devices_str.erase(
0, cuda_visible_devices_str.find_first_not_of('\''));
cuda_visible_devices_str.erase(
cuda_visible_devices_str.find_last_not_of('\'') + 1);
cuda_visible_devices_str.erase(
0, cuda_visible_devices_str.find_first_not_of('\"'));
cuda_visible_devices_str.erase(
cuda_visible_devices_str.find_last_not_of('\"') + 1);
const auto *musa_visible_devices = std::getenv("MUSA_VISIBLE_DEVICES");

if (musa_visible_devices != nullptr) {
std::string musa_visible_devices_str(musa_visible_devices);
if (!musa_visible_devices_str.empty()) {
musa_visible_devices_str.erase(
0, musa_visible_devices_str.find_first_not_of('\''));
musa_visible_devices_str.erase(
musa_visible_devices_str.find_last_not_of('\'') + 1);
musa_visible_devices_str.erase(
0, musa_visible_devices_str.find_first_not_of('\"'));
musa_visible_devices_str.erase(
musa_visible_devices_str.find_last_not_of('\"') + 1);
}
if (std::all_of(cuda_visible_devices_str.begin(),
cuda_visible_devices_str.end(),
if (std::all_of(musa_visible_devices_str.begin(),
musa_visible_devices_str.end(),
[](char ch) { return ch == ' '; })) {
VLOG(2) << "MUSA_VISIBLE_DEVICES is set to be "
"empty. No GPU detected.";
Expand Down
24 changes: 0 additions & 24 deletions paddle/phi/backends/musartc.h

This file was deleted.

Empty file.
3 changes: 0 additions & 3 deletions paddle/phi/kernels/funcs/sparse/sparse_blas.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,3 @@ inline SparseBlasT<DeviceContext, T> GetSparseBlas(
#if defined(PADDLE_WITH_HIP) && HIP_VERSION >= 402
#include "paddle/phi/kernels/funcs/sparse/sparse_blas_impl.hip.h"
#endif
#if defined(PADDLE_WITH_MUSA)
#include "paddle/phi/kernels/funcs/sparse/sparse_blas_impl.mu.h"
#endif
3 changes: 0 additions & 3 deletions paddle/phi/kernels/funcs/sparse/sparse_blas_impl.mu.h

This file was deleted.

Empty file.

0 comments on commit db90713

Please sign in to comment.