PaddlePaddle · chengduoZH · Apr 7, 2018 · Apr 8, 2018
diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt
@@ -4,13 +4,18 @@ cc_library(scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_h
 cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory)
 nv_library(nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
         dynload_cuda)
+cc_library(collect_parameters SRCS collect_parameters.cc)
+nv_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
+        dynload_cuda collect_parameters)
+nv_library(all_gather_op_handle SRCS all_gather_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
+        dynload_cuda collect_parameters)
 cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry)
 
 cc_library(ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base)
 cc_library(ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph)
 
 if(WITH_GPU)
-    set(multi_devices_graph_builder_deps nccl_all_reduce_op_handle)
+    set(multi_devices_graph_builder_deps nccl_all_reduce_op_handle all_reduce_op_handle all_gather_op_handle)
 else()
     set(multi_devices_graph_builder_deps)
 endif()

diff --git a/paddle/fluid/framework/details/all_gather_op_handle.cc b/paddle/fluid/framework/details/all_gather_op_handle.cc
@@ -0,0 +1,41 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/details/all_gather_op_handle.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+AllGatherOpHandle::AllGatherOpHandle(const Scope &local_scope,
+                                     const platform::Place &place)
+    : local_scope_(local_scope), place_(place) {}
+
+void AllGatherOpHandle::RunImpl() {
+  PADDLE_ENFORCE_EQ(this->inputs_.size(), 1);
+  auto &var_name = static_cast<VarHandle *>(this->inputs_[0])->name_;
+
+  // Wait input done, this Wait is asynchronous operation
+  auto &p = static_cast<VarHandle *>(this->inputs_[0])->place_;
+  this->inputs_[0]->generated_op_->Wait(dev_ctxes_[p]);
+  auto var = local_scope_.FindVar(var_name);
+  PADDLE_ENFORCE(var);
+
+  ParameterCollection::Instance().Get(var_name)->Send<Variable *>(&var);
+}
+
+std::string AllGatherOpHandle::Name() const { return "all_gather"; }
+}  // namespace details
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/details/all_gather_op_handle.h b/paddle/fluid/framework/details/all_gather_op_handle.h
@@ -0,0 +1,45 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/framework/details/collect_parameters.h"
+#include "paddle/fluid/framework/details/op_handle_base.h"
+#include "paddle/fluid/framework/scope.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+struct AllGatherOpHandle : public OpHandleBase {
+  const Scope& local_scope_;
+  const platform::Place& place_;
+
+  AllGatherOpHandle(const Scope& local_scope, const platform::Place& place);
+
+  std::string Name() const override;
+
+  bool IsMultiDeviceTransfer() override { return false; };
+
+ protected:
+  void RunImpl() override;
+};
+
+}  // namespace details
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc
@@ -0,0 +1,122 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/details/all_reduce_op_handle.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+AllReduceOpHandle::AllReduceOpHandle(const std::vector<Scope *> &local_scopes,
+                                     const std::vector<platform::Place> &places,
+                                     const platform::NCCLContextMap &ctxs,
+                                     const int device_count)
+    : local_scopes_(local_scopes),
+      places_(places),
+      nccl_ctxs_(ctxs),
+      device_count_(device_count) {
+  for (auto &p : places_) {
+    this->dev_ctxes_[p] = nccl_ctxs_.DevCtx(p);
+  }
+}
+
+void AllReduceOpHandle::RunImpl() {
+  PADDLE_ENFORCE_EQ(this->inputs_.size(), device_count_);
+  auto in_var0 = static_cast<VarHandle *>(this->inputs_[0]);
+  auto &var_name = in_var0->name_;
+
+  // Wait input done, this Wait is asynchronous operation
+  for (auto *in : inputs_) {
+    auto &p = static_cast<VarHandle *>(in)->place_;
+    in->generated_op_->Wait(dev_ctxes_[p]);
+  }
+
+  platform::Place cuda_pinned_place = platform::CUDAPinnedPlace();
+  // sum
+  Variable *var;
+  ParameterCollection::Instance().Get(var_name)->Receive<Variable *>(&var);
+
+  Tensor reducer;
+  Tensor temp;
+  if (var->IsType<framework::SelectedRows>()) {
+    // reduce sparse gradient
+
+  } else if (var->IsType<framework::LoDTensor>()) {
+    auto param = var->Get<LoDTensor>();
+
+    PADDLE_ENFORCE(platform::is_gpu_place(param.place()));
+
+    auto dev_id = boost::get<platform::CUDAPlace>(param.place()).device;
+    auto dev_ctx = nccl_ctxs_.DevCtx(dev_id);
+
+    reducer.Resize(param.dims());
+    temp.Resize(param.dims());
+    reducer.mutable_data(cuda_pinned_place, param.type());
+    temp.mutable_data(cuda_pinned_place, param.type());
+
+    framework::TensorCopy(param, cuda_pinned_place, *dev_ctx, &reducer);
+    dev_ctx->Wait();
+  } else {
+    PADDLE_THROW("Gradient should be LoDTensor or SelectedRows");
+  }
+
+  // TODO(zcd): float should be T
+  float *reducer_ptr = reducer.data<float>();
+  for (int j = 0; j < device_count_ - 1; ++j) {
+    Variable *other_var;
+    ParameterCollection::Instance().Get(var_name)->Receive<Variable *>(
+        &other_var);
+    PADDLE_ENFORCE(other_var->Type() == var->Type());
+
+    if (var->IsType<framework::SelectedRows>()) {
+      // reduce sparse gradient
+
+    } else if (var->IsType<framework::LoDTensor>()) {
+      auto param = other_var->Get<LoDTensor>();
+      PADDLE_ENFORCE_EQ(reducer.numel(), param.numel());
+
+      auto dev_id = boost::get<platform::CUDAPlace>(param.place()).device;
+      auto dev_ctx = nccl_ctxs_.DevCtx(dev_id);
+
+      framework::TensorCopy(param, cuda_pinned_place, *dev_ctx, &temp);
+
+      dev_ctx->Wait();
+      float *temp_ptr = temp.data<float>();
+      for (int k = 0; k < reducer.numel(); ++k) {
+        reducer_ptr[k] += temp_ptr[k];
+      }
+    }
+  }
+
+  // broadcast
+  for (size_t i = 0; i < local_scopes_.size(); ++i) {
+    auto &p = places_[i];
+    auto *s = local_scopes_[i];
+    int dev_id = boost::get<platform::CUDAPlace>(p).device;
+    auto var = s->FindVar(var_name);
+    if (var->IsType<framework::SelectedRows>()) {
+      // reduce sparse gradient
+
+    } else if (var->IsType<framework::LoDTensor>()) {
+      auto lod_tensor = var->GetMutable<LoDTensor>();
+      auto dev_ctx = nccl_ctxs_.DevCtx(dev_id);
+      framework::TensorCopy(reducer, p, *dev_ctx, lod_tensor);
+    }
+  }
+}
+
+std::string AllReduceOpHandle::Name() const { return "all_reduce"; }
+}  // namespace details
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.h b/paddle/fluid/framework/details/all_reduce_op_handle.h
@@ -0,0 +1,53 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/framework/details/collect_parameters.h"
+#include "paddle/fluid/framework/details/op_handle_base.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/framework/selected_rows.h"
+#include "paddle/fluid/platform/nccl_helper.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+struct AllReduceOpHandle : public OpHandleBase {
+  const std::vector<Scope *> &local_scopes_;
+  const std::vector<platform::Place> &places_;
+  const platform::NCCLContextMap &nccl_ctxs_;
+  const int device_count_;
+
+  AllReduceOpHandle(const std::vector<Scope *> &local_scopes,
+                    const std::vector<platform::Place> &places,
+                    const platform::NCCLContextMap &ctxs,
+                    const int device_count);
+
+  std::string Name() const override;
+
+  bool IsMultiDeviceTransfer() override { return false; };
+
+ protected:
+  void RunImpl() override;
+};
+
+}  // namespace details
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/details/collect_parameters.cc b/paddle/fluid/framework/details/collect_parameters.cc
@@ -0,0 +1,47 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/details/collect_parameters.h"
+#include "paddle/fluid/framework/variable.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+ParameterCollection *ParameterCollection::param_collect = nullptr;
+
+ParameterCollection::ParameterCollection(
+    const std::unordered_set<std::string> &para_names, const int device_count)
+    : device_count_(device_count) {
+  PADDLE_ENFORCE_GT(para_names.size(), 0);
+
+  using PtrType = std::unique_ptr<ChannelHolder>;
+  for (auto pn : para_names) {
+    param_channels_.emplace(pn, PtrType(new ChannelHolder()));
+    param_channels_[pn]->Reset<Variable *>(device_count);
+  }
+}
+
+ChannelHolder *ParameterCollection::Get(const std::string &param_name) {
+  PADDLE_ENFORCE_NOT_NULL(param_collect,
+                          "Need to Create ParameterCollection first!");
+  auto it = param_channels_.find(param_name);
+  if (it == param_channels_.end()) {
+    PADDLE_THROW("%s is in the ParameterCollection.", param_name);
+  }
+  return it->second.get();
+}
+
+}  // namespace details
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/details/collect_parameters.h b/paddle/fluid/framework/details/collect_parameters.h
@@ -0,0 +1,63 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include "paddle/fluid/framework/channel.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+class ParameterCollection {
+ public:
+  explicit ParameterCollection(
+      const std::unordered_set<std::string> &para_names,
+      const int device_count);
+
+  static ParameterCollection &Instance() {
+    PADDLE_ENFORCE_NOT_NULL(param_collect,
+                            "Need to Create ParameterCollection first!");
+    return *param_collect;
+  }
+
+  /*! \brief  Create should only called by Init function */
+  static ParameterCollection &Init(
+      const std::unordered_set<std::string> &para_names,
+      const int device_count) {
+    if (param_collect == nullptr) {
+      param_collect = new ParameterCollection(para_names, device_count);
+    }
+    return *param_collect;
+  }
+
+  ChannelHolder *Get(const std::string &para_name);
+
+  size_t size() const { return param_channels_.size(); }
+
+ private:
+  static ParameterCollection *param_collect;
+  const int device_count_;
+  std::unordered_map<std::string, std::unique_ptr<ChannelHolder>>
+      param_channels_;
+  DISABLE_COPY_AND_ASSIGN(ParameterCollection);
+};
+}  // namespace details
+}  // namespace framework
+}  // namespace paddle