From ec35b6dbdc9eeb90c6a5ec15709a3970f7967df6 Mon Sep 17 00:00:00 2001 From: co63oc Date: Sat, 18 May 2024 15:12:39 +0800 Subject: [PATCH 1/3] Fix --- paddle/fluid/operators/CMakeLists.txt | 3 +- paddle/fluid/operators/dgc_op.cc | 143 ------------------ paddle/fluid/pir/dialect/operator/ir/ops.yaml | 8 - paddle/phi/api/yaml/op_compat.yaml | 6 + paddle/phi/api/yaml/ops.yaml | 13 ++ paddle/phi/infermeta/multiary.cc | 13 ++ paddle/phi/infermeta/multiary.h | 13 ++ 7 files changed, 46 insertions(+), 153 deletions(-) delete mode 100644 paddle/fluid/operators/dgc_op.cc diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 18e5d90e6d9df0..6fbc11df5cd3ce 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -82,7 +82,7 @@ endif() set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi common phi_utils static_prim_api get_expected_kernel_func) -register_operators(EXCLUDES py_func_op dgc_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op +register_operators(EXCLUDES py_func_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op recurrent_op save_combine_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} processgroup_comm_utils) op_library(generated_op UNITY SRCS generated_op1.cc generated_op2.cc generated_op3.cc generated_op4.cc DEPS ${OP_HEADER_DEPS}) @@ -119,7 +119,6 @@ endif() set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) if (WITH_DGC) - op_library(dgc_op DEPS dgc) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dgc) endif() diff --git a/paddle/fluid/operators/dgc_op.cc b/paddle/fluid/operators/dgc_op.cc deleted file mode 100644 index 7325c4271f9c4f..00000000000000 --- a/paddle/fluid/operators/dgc_op.cc +++ /dev/null @@ -1,143 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -class DGCOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("U"), "Input", "U", "DGCOp"); - OP_INOUT_CHECK(ctx->HasInput("V"), "Input", "V", "DGCOp"); - OP_INOUT_CHECK(ctx->HasInput("Grad"), "Input", "Grad", "DGCOp"); - OP_INOUT_CHECK(ctx->HasInput("Param"), "Input", "Param", "DGCOp"); - OP_INOUT_CHECK( - ctx->HasInput("current_step"), "Input", "current_step", "DGCOp"); - OP_INOUT_CHECK(ctx->HasInput("nranks"), "Input", "nranks", "DGCOp"); - - OP_INOUT_CHECK(ctx->HasOutput("U_out"), "Output", "U_out", "DGCOp"); - OP_INOUT_CHECK(ctx->HasOutput("V_out"), "Output", "V_out", "DGCOp"); - OP_INOUT_CHECK(ctx->HasOutput("k"), "Output", "k", "DGCOp"); - OP_INOUT_CHECK( - ctx->HasOutput("EncodeGrad"), "Output", "EncodeGrad", "DGCOp"); - OP_INOUT_CHECK( - ctx->HasOutput("GatherBuff"), "Output", "GatherBuff", "DGCOp"); - } - - protected: - phi::KernelKey GetKernelTypeForVar( - const std::string& var_name, - const phi::DenseTensor& tensor, - const phi::KernelKey& expected_kernel_type) const override { - if (var_name == "current_step" || var_name == "k" || var_name == "nranks") { - VLOG(10) << "var_name:" << var_name << " need not to transform"; - return phi::KernelKey(phi::Backend::ALL_BACKEND, - expected_kernel_type.layout(), - expected_kernel_type.dtype()); - } - - return framework::OperatorWithKernel::GetKernelTypeForVar( - var_name, tensor, expected_kernel_type); - } -}; - -class DGCOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("U", "(Tensor) U velocity tensor of DGC"); - AddInput("V", "(Tensor) V velocity tensor of DGC"); - AddInput("Grad", "(Tensor) Input gradient"); - AddInput("Param", "(Tensor) Input parameter"); - AddInput("current_step", "(Tensor) Current step."); - AddInput("nranks", "(Tensor) nranks."); - - AddOutput("U_out", "(Tensor) Output U velocity of DGC"); - AddOutput("V_out", "(Tensor) Output V velocity of DGC"); - AddOutput("EncodeGrad", "(Tensor) Output encoded gradient"); - AddOutput("Grad_out", "(Tensor) Output grad gradient"); - AddOutput("k", "(Tensor) Output top-k value"); - AddOutput("GatherBuff", "(Tensor) Gather buffer"); - - AddAttr("m", - "(float, 0.9) " - "The momentum of learning rate.") - .SetDefault(0.9); - - AddAttr("use_nesterov", - "(bool, true)" - "The momentum of learning rate.") - .SetDefault(true); - - AddAttr>("sparsity", - "(vector, float)" - "The period sparsity of k_select."); - - AddAttr("rampup_begin_step", - "(float, 0.0)" - "The period when begin k_select.") - .SetDefault(0.0); - - AddAttr("rampup_step", - "(float, 0.0)" - "The period when begin k_select."); - - AddAttr("regular_coeff", - "(float, 0.0)" - "The coeff of regularization, weight decay parameter") - .SetDefault(0.0); - - AddAttr("regular_type", - "(int, 0)" - "The type of regularization, {0:None, 1:L1Decay, 2:L2Decay") - .SetDefault(0); - - AddComment(R"DOC( - Original paper is https://arxiv.org/abs/1712.01887 - - DGC reduce the communication bandwidth by sending only the important gradients (sparse update):\ - only gradients larger than a threshold are transmitted. - - To avoid losing information, DGC accumulate the rest of the gradients locally. - - Eventually, these gradients become large enough to be transmitted. - - Thus, DGC send the large gradients immediately but eventually send all of the gradients over time. - - To ensure no loss of accuracy, DGC employs momentum correc-tionandlocal gradient clipping on top of the gradient sparsification to maintain model performance. - - DGC also uses momentum factor masking and warmup training to overcome the staleness problem caused by reduced communication. - - This optimizer will do two things: - - 1. Compress the gradient by get TopK import value from tensor \ - and use it for allreduce to reduce network bandwidth. - - 2. Call momentum to optimize on the cost. - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(dgc, ops::DGCOp, ops::DGCOpMaker); diff --git a/paddle/fluid/pir/dialect/operator/ir/ops.yaml b/paddle/fluid/pir/dialect/operator/ir/ops.yaml index bc735a2d0871ce..645bfb6ee513fe 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ops.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/ops.yaml @@ -476,14 +476,6 @@ func: dequantize_log data_type: x -- op : dgc - args : (Tensor u, Tensor v, Tensor grad, Tensor param, Tensor current_step, Tensor nranks, float[] sparsity, float m=0.9, bool use_nesterov=true, float rampup_begin_step=0.0, float rampup_step=0.0, float regular_coeff=0.0, int regular_type=0) - output : Tensor(u_out), Tensor(v_out), Tensor(encode_grad), Tensor(grad_out), Tensor(k), Tensor(gather_buff) - kernel : - func : dgc - param : [u, v, grad, param, current_step, nranks, m, use_nesterov, sparsity, rampup_begin_step, rampup_step, regular_coeff, regular_type] - optional: param - - op : dgc_momentum args : (Tensor param, Tensor grad, Tensor velocity, Tensor learning_rate, Tensor master_param, Tensor current_step_tensor, Tensor nranks_tensor, float mu, bool use_nesterov=false, str regularization_method="", float regularization_coeff=0.0f, bool multi_precision=false, float rescale_grad=1.0f, float rampup_begin_step=-1.0f) output : Tensor(param_out), Tensor(velocity_out), Tensor(master_param_out), Tensor(grad_out) diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index f4a87f4b1b8d08..e1ee247b45cbad 100755 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -3921,6 +3921,12 @@ outputs: {u_out: U_out, v_out: V_out, encode_grad: EncodeGrad, grad_out: Grad_out, gather_buff: GatherBuff} +- op: dgc + inputs: + {u : U, v : V, grad : Grad, param : Param} + outputs: + {u_out : U_out, v_out : V_out, encode_grad : EncodeGrad, grad_out : Grad_out, gather_buff : GatherBuff} + - op: distribute_fpn_proposals inputs : {fpn_rois: FpnRois, rois_num: RoisNum} diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index c8fa2957cdf665..80fca8440c6b46 100755 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -881,6 +881,19 @@ data_type: detect_res optional: has_state, pos_count, true_pos, false_pos +- op : dgc + args : (Tensor u, Tensor v, Tensor grad, Tensor param, Tensor current_step, Tensor nranks, float[] sparsity, float m=0.9, bool use_nesterov=true, float rampup_begin_step=0.0, float rampup_step=0.0, float regular_coeff=0.0, int regular_type=0) + output : Tensor(u_out), Tensor(v_out), Tensor(encode_grad), Tensor(grad_out), Tensor(k), Tensor(gather_buff) + infer_meta: + func: DgcInferMeta + param : [u, v, grad, param, current_step, nranks] + kernel : + func : dgc + param : [u, v, grad, param, current_step, nranks, m, use_nesterov, sparsity, rampup_begin_step, rampup_step, regular_coeff, regular_type] + optional: param + data_transform : + skip_transform : current_step, nranks + - op : diag args : (Tensor x, int offset = 0, float padding_value = 0.0) output : Tensor diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 8bb3861144717a..e806c925c80371 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -1661,6 +1661,19 @@ void DetectionMapInferMeta(const MetaTensor& detect_res, m_ap->set_dims(common::make_ddim({1})); } +void DgcInferMeta(const MetaTensor& u, + const MetaTensor& v, + const MetaTensor& grad, + const MetaTensor& param, + const MetaTensor& current_step_tensor, + const MetaTensor& nranks_tensor, + MetaTensor* u_out, + MetaTensor* v_out, + MetaTensor* encode_grad_out, + MetaTensor* grad_out, + MetaTensor* k_out, + MetaTensor* gather_buff) {} + void DGCMomentumInferMeta(const MetaTensor& param, const MetaTensor& grad, const MetaTensor& velocity, diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index be50b08fe56e2f..ec0da11ec109b3 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -331,6 +331,19 @@ void DetectionMapInferMeta(const MetaTensor& detect_res, MetaTensor* m_ap, MetaConfig config = MetaConfig()); +void DgcInferMeta(const MetaTensor& u, + const MetaTensor& v, + const MetaTensor& grad, + const MetaTensor& param, + const MetaTensor& current_step_tensor, + const MetaTensor& nranks_tensor, + MetaTensor* u_out, + MetaTensor* v_out, + MetaTensor* encode_grad_out, + MetaTensor* grad_out, + MetaTensor* k_out, + MetaTensor* gather_buff); + void DGCMomentumInferMeta(const MetaTensor& param, const MetaTensor& grad, const MetaTensor& velocity, From acec0540ff18a0ef713c862617498814215f9b30 Mon Sep 17 00:00:00 2001 From: co63oc Date: Sun, 19 May 2024 20:16:10 +0800 Subject: [PATCH 2/3] Fix --- paddle/phi/api/yaml/ops.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 80fca8440c6b46..23f41c10b912b3 100755 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -882,7 +882,7 @@ optional: has_state, pos_count, true_pos, false_pos - op : dgc - args : (Tensor u, Tensor v, Tensor grad, Tensor param, Tensor current_step, Tensor nranks, float[] sparsity, float m=0.9, bool use_nesterov=true, float rampup_begin_step=0.0, float rampup_step=0.0, float regular_coeff=0.0, int regular_type=0) + args : (Tensor u, Tensor v, Tensor grad, Tensor param, Tensor current_step, Tensor nranks, float m=0.9, bool use_nesterov=true, float[] sparsity={}, float rampup_begin_step=0.0, float rampup_step=0.0, float regular_coeff=0.0, int regular_type=0) output : Tensor(u_out), Tensor(v_out), Tensor(encode_grad), Tensor(grad_out), Tensor(k), Tensor(gather_buff) infer_meta: func: DgcInferMeta From 4c9889d0e7d985c605e6dc56a77d40bcfc73418d Mon Sep 17 00:00:00 2001 From: co63oc Date: Mon, 20 May 2024 06:01:10 +0800 Subject: [PATCH 3/3] ci