From bb923e562e9c1eb902ac92194e7471283597f5e8 Mon Sep 17 00:00:00 2001 From: ForFishes <2282912238@qq.com> Date: Thu, 23 Dec 2021 03:50:40 +0000 Subject: [PATCH 1/3] fix bug in pfp16 --- paddle/fluid/imperative/reducer.cc | 6 +++--- paddle/fluid/imperative/reducer.cu | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index 9014871229b39b..c27d9c63b97aea 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -48,9 +48,9 @@ void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) { } else if (platform::is_cpu_place(tensor->place())) { VLOG(4) << "before div 2" << *tensor; VLOG(4) << "NDiv for cpu devices : rank = " << nranks; - framework::VisitDataTypeSmall( - dtype_, DivNRanksForAllReduce( - tensor, nranks, context)); + framework::VisitDataType(dtype_, + DivNRanksForAllReduce( + tensor, nranks, context)); VLOG(4) << "after div 2" << *tensor; } else if (platform::is_xpu_place(tensor->place())) { #ifdef PADDLE_WITH_XPU_BKCL diff --git a/paddle/fluid/imperative/reducer.cu b/paddle/fluid/imperative/reducer.cu index ca233292b34704..c3daab9581ab7c 100644 --- a/paddle/fluid/imperative/reducer.cu +++ b/paddle/fluid/imperative/reducer.cu @@ -20,7 +20,7 @@ namespace imperative { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) void Group::DivNRanks(framework::Tensor *tensor, int64_t nranks, const platform::DeviceContext &context) { - framework::VisitDataTypeSmall( + framework::VisitDataType( dtype_, DivNRanksForAllReduce(tensor, nranks, context)); } From 26a0bb315bb303e74c922a3e109c065bc8f393d9 Mon Sep 17 00:00:00 2001 From: ForFishes <2282912238@qq.com> Date: Thu, 23 Dec 2021 07:18:05 +0000 Subject: [PATCH 2/3] fix hip --- paddle/fluid/framework/data_type.h | 30 ++++++++++++++++++++++++++++++ paddle/fluid/imperative/reducer.cc | 6 ++++++ paddle/fluid/imperative/reducer.cu | 6 ++++++ 3 files changed, 42 insertions(+) diff --git a/paddle/fluid/framework/data_type.h b/paddle/fluid/framework/data_type.h index 08749b6b7515b9..15d45d8386dad2 100644 --- a/paddle/fluid/framework/data_type.h +++ b/paddle/fluid/framework/data_type.h @@ -89,6 +89,22 @@ struct DataTypeTrait { _ForEachDataTypeHelper_(callback, int, INT32); \ _ForEachDataTypeHelper_(callback, int64_t, INT64); +// It's only for DataParallel in HIP +#define _ForEachDataTypeForHIP_(callback) \ + _ForEachDataTypeHelper_(callback, float, FP32); \ + _ForEachDataTypeHelper_(callback, ::paddle::platform::float16, FP16); \ + _ForEachDataTypeHelper_(callback, double, FP64); \ + _ForEachDataTypeHelper_(callback, int, INT32); \ + _ForEachDataTypeHelper_(callback, int64_t, INT64); \ + _ForEachDataTypeHelper_(callback, bool, BOOL); \ + _ForEachDataTypeHelper_(callback, uint8_t, UINT8); \ + _ForEachDataTypeHelper_(callback, int16_t, INT16); \ + _ForEachDataTypeHelper_(callback, int8_t, INT8); \ + _ForEachDataTypeHelper_(callback, ::paddle::platform::complex, \ + COMPLEX64); \ + _ForEachDataTypeHelper_(callback, ::paddle::platform::complex, \ + COMPLEX128); + #define DefineDataTypeTrait(cpp_type, proto_type) \ template <> \ struct DataTypeTrait { \ @@ -147,6 +163,20 @@ inline void VisitDataTypeTiny(proto::VarType::Type type, Visitor visitor) { #undef VisitDataTypeCallbackTiny } +template +inline void VisitDataTypeForHIP(proto::VarType::Type type, Visitor visitor) { +#define VisitDataTypeCallbackHIP(cpp_type, proto_type) \ + do { \ + if (type == proto_type) { \ + visitor.template apply(); \ + return; \ + } \ + } while (0) + + _ForEachDataTypeForHIP_(VisitDataTypeCallbackHIP); +#undef VisitDataTypeCallbackHIP +} + extern std::string DataTypeToString(const proto::VarType::Type type); extern size_t SizeOfType(proto::VarType::Type type); inline std::ostream& operator<<(std::ostream& out, diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index c27d9c63b97aea..746df54a7dc11d 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -48,9 +48,15 @@ void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) { } else if (platform::is_cpu_place(tensor->place())) { VLOG(4) << "before div 2" << *tensor; VLOG(4) << "NDiv for cpu devices : rank = " << nranks; +#ifdef PADDLE_WITH_HIP + framework::VisitDataTypeForHIP( + dtype_, DivNRanksForAllReduce( + tensor, nranks, context)); +#else framework::VisitDataType(dtype_, DivNRanksForAllReduce( tensor, nranks, context)); +#endif VLOG(4) << "after div 2" << *tensor; } else if (platform::is_xpu_place(tensor->place())) { #ifdef PADDLE_WITH_XPU_BKCL diff --git a/paddle/fluid/imperative/reducer.cu b/paddle/fluid/imperative/reducer.cu index c3daab9581ab7c..88326d66211cc5 100644 --- a/paddle/fluid/imperative/reducer.cu +++ b/paddle/fluid/imperative/reducer.cu @@ -20,9 +20,15 @@ namespace imperative { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) void Group::DivNRanks(framework::Tensor *tensor, int64_t nranks, const platform::DeviceContext &context) { +#ifdef PADDLE_WITH_HIP + framework::VisitDataTypeForHIP( + dtype_, DivNRanksForAllReduce(tensor, nranks, + context)); +#else framework::VisitDataType( dtype_, DivNRanksForAllReduce(tensor, nranks, context)); +#endif } #endif From 581b1a2128402e8b12245f630daf0a1f98115446 Mon Sep 17 00:00:00 2001 From: ForFishes <2282912238@qq.com> Date: Thu, 23 Dec 2021 07:28:38 +0000 Subject: [PATCH 3/3] fix hip --- paddle/fluid/framework/data_type.h | 2 +- paddle/fluid/imperative/reducer.cc | 4 ++++ paddle/fluid/imperative/reducer.cu | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/data_type.h b/paddle/fluid/framework/data_type.h index 15d45d8386dad2..ec8284b8255007 100644 --- a/paddle/fluid/framework/data_type.h +++ b/paddle/fluid/framework/data_type.h @@ -89,7 +89,7 @@ struct DataTypeTrait { _ForEachDataTypeHelper_(callback, int, INT32); \ _ForEachDataTypeHelper_(callback, int64_t, INT64); -// It's only for DataParallel in HIP +// It's only for DataParallel in HIP, bf16 not support in HIP. #define _ForEachDataTypeForHIP_(callback) \ _ForEachDataTypeHelper_(callback, float, FP32); \ _ForEachDataTypeHelper_(callback, ::paddle::platform::float16, FP16); \ diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index 746df54a7dc11d..beddbd5d120080 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -49,6 +49,10 @@ void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) { VLOG(4) << "before div 2" << *tensor; VLOG(4) << "NDiv for cpu devices : rank = " << nranks; #ifdef PADDLE_WITH_HIP + if (dtype_ == paddle::framework::proto::VarType_Type_BF16) { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Unsupport BF16 in DataParallel for now")); + } framework::VisitDataTypeForHIP( dtype_, DivNRanksForAllReduce( tensor, nranks, context)); diff --git a/paddle/fluid/imperative/reducer.cu b/paddle/fluid/imperative/reducer.cu index 88326d66211cc5..05453a61b7e393 100644 --- a/paddle/fluid/imperative/reducer.cu +++ b/paddle/fluid/imperative/reducer.cu @@ -21,6 +21,10 @@ namespace imperative { void Group::DivNRanks(framework::Tensor *tensor, int64_t nranks, const platform::DeviceContext &context) { #ifdef PADDLE_WITH_HIP + if (dtype_ == paddle::framework::proto::VarType_Type_BF16) { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Unsupport BF16 in DataParallel for now")); + } framework::VisitDataTypeForHIP( dtype_, DivNRanksForAllReduce(tensor, nranks, context));