From 6b03886350dd558c4988fef723b49c6b31f0c498 Mon Sep 17 00:00:00 2001 From: co63oc Date: Fri, 17 May 2024 18:07:40 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90Hackathon=206th=20Fundable=20Projects?= =?UTF-8?q?=203=20No.2=E3=80=91fluid=20operator=20adadelta=20=20(#64343)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix * ci --- .../operators/ops_signature/adadelta_sig.cc | 40 ------- .../fluid/operators/optimizers/adadelta_op.cc | 102 ------------------ paddle/fluid/pir/dialect/operator/ir/ops.yaml | 11 -- paddle/phi/api/yaml/legacy_ops.yaml | 11 -- paddle/phi/api/yaml/ops.yaml | 13 +++ 5 files changed, 13 insertions(+), 164 deletions(-) delete mode 100644 paddle/fluid/operators/ops_signature/adadelta_sig.cc delete mode 100644 paddle/fluid/operators/optimizers/adadelta_op.cc diff --git a/paddle/fluid/operators/ops_signature/adadelta_sig.cc b/paddle/fluid/operators/ops_signature/adadelta_sig.cc deleted file mode 100644 index da7e4229a0d22f..00000000000000 --- a/paddle/fluid/operators/ops_signature/adadelta_sig.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature AdadeltaOpArgumentMapping(const ArgumentMappingContext& ctx) { - if (ctx.IsDenseTensorInput("Grad")) { - return KernelSignature("adadelta", - {"Param", - "Grad", - "AvgSquaredGrad", - "AvgSquaredUpdate", - "LearningRate", - "MasterParam"}, - {"rho", "epsilon", "multi_precision"}, - {"ParamOut", - "AvgSquaredGradOut", - "AvgSquaredUpdateOut", - "MasterParamOut"}); - } - - return KernelSignature("unregistered", {}, {}, {}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(adadelta, phi::AdadeltaOpArgumentMapping); diff --git a/paddle/fluid/operators/optimizers/adadelta_op.cc b/paddle/fluid/operators/optimizers/adadelta_op.cc deleted file mode 100644 index f2faa3fc7c2ead..00000000000000 --- a/paddle/fluid/operators/optimizers/adadelta_op.cc +++ /dev/null @@ -1,102 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/multiary.h" - -namespace paddle { -namespace operators { - -class AdadeltaOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "Param"), - ctx.GetPlace()); - } -}; - -class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Param", "(Tensor) Input parameter"); - AddInput("Grad", "(Tensor) Input gradient"); - AddInput("AvgSquaredGrad", "(Tensor) Input average of squared gradient"); - AddInput("AvgSquaredUpdate", - "(Tensor) Input average of squared parameter updates"); - AddInput("LearningRate", "(Tensor) Learning rate"); - AddInput("MasterParam", "FP32 master weight for AMP.").AsDispensable(); - - AddOutput("ParamOut", "(Tensor) Output parameter"); - AddOutput("AvgSquaredGradOut", - "(Tensor) Output average of squared gradient"); - AddOutput("AvgSquaredUpdateOut", - "(Tensor) Output average of squared parameter updates"); - AddOutput("MasterParamOut", - "The updated FP32 master weight for AMP. " - "It shared memory with Input(MasterParam).") - .AsDispensable(); - - AddAttr("rho", - "(float, default 0.95) Exponential decay rate " - "for squared gradients.") - .SetDefault(0.95f); - AddAttr("epsilon", - "(float, default 1.0e-6) Constant for " - "numerical stability") - .SetDefault(1.0e-6f); - AddAttr("multi_precision", - "(bool, default false) " - "Whether to use multi-precision during weight updating.") - .SetDefault(false); - AddComment(R"DOC( -Adadelta Optimizer. - -Adadelta optimizer is implemented as explained in: -https://arxiv.org/abs/1212.5701 -Adadelta is a per-dimension adaptive learning rate method used -for gradient descent. - -Adadelta updates are as follows: - -$$ -avg\_squared\_grad\_out = \rho * avg\_squared\_grad + (1 - \rho) * grad * grad \\ -param\_update = - \sqrt{\frac{avg\_squared\_update + \epsilon}{avg\_squared\_grad\_out + \epsilon}} * grad \\ -avg\_squared\_update\_out = \rho * avg\_squared\_update + (1 - \rho) * {param\_update}^2 \\ -param\_out = param + param\_update -$$ - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -DECLARE_INFER_SHAPE_FUNCTOR(adadelta, - AdadeltaInferMetaFunctor, - PD_INFER_META(phi::AdadeltaInferMeta)); -REGISTER_OPERATOR( - adadelta, - ops::AdadeltaOp, - ops::AdadeltaOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - AdadeltaInferMetaFunctor); diff --git a/paddle/fluid/pir/dialect/operator/ir/ops.yaml b/paddle/fluid/pir/dialect/operator/ir/ops.yaml index 2bcb901174f0d3..bc735a2d0871ce 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ops.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/ops.yaml @@ -3,17 +3,6 @@ # 2) The definitions of static graphs and dynamic graphs are inconsistent, but the final definition plan has not yet been clarified. # After the definition is clearly defined, migrate to paddle/fluid/pir/dialect/operator/ir/update_ops.yaml or paddle/phi/api/yaml/ops.yaml -- op : adadelta_ - args : (Tensor param, Tensor grad, Tensor avg_squared_grad, Tensor avg_squared_update, Tensor learning_rate, Tensor master_param, float rho, float epsilon, bool multi_precision) - output : Tensor(param_out), Tensor(moment_out), Tensor(inf_norm_out), Tensor(master_param_out) - infer_meta : - func : AdadeltaInferMeta - kernel : - func : adadelta - data_type : param - optional : master_param, master_param_out - inplace : (param -> param_out), (avg_squared_grad -> moment_out), (avg_squared_update -> inf_norm_out), (master_param -> master_param_out) - - op : add args : (Tensor x, Tensor y) output : Tensor(out) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index ca66ebac014ab6..f27c19984c5993 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -1,17 +1,6 @@ # The apis in this file are unstandardized that may caused by a variety of reasons, # we are trying to fix these apis and will move standardized apis into ops.yaml. -- op : adadelta_ - args : (Tensor param, Tensor grad, Tensor avg_squared_grad, Tensor avg_squared_update, Tensor learning_rate, Tensor master_param, float rho, float epsilon, bool multi_precision) - output : Tensor(param_out), Tensor(moment_out), Tensor(inf_norm_out), Tensor(master_param_out) - infer_meta : - func : AdadeltaInferMeta - kernel : - func : adadelta - data_type : param - optional : master_param, master_param_out - inplace : (param -> param_out), (avg_squared_grad -> moment_out), (avg_squared_update -> inf_norm_out), (master_param -> master_param_out) - - op : add args : (Tensor x, Tensor y) output : Tensor(out) diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 25c2198285c0e1..c8fa2957cdf665 100755 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -58,6 +58,19 @@ backward : acosh_grad interfaces : paddle::dialect::InferSymbolicShapeInterface +- op : adadelta_ + args : (Tensor param, Tensor grad, Tensor avg_squared_grad, Tensor avg_squared_update, + Tensor learning_rate, Tensor master_param, float rho = 0.95f, float epsilon = + 1.0e-6f, bool multi_precision = false) + output : Tensor(param_out), Tensor(moment_out), Tensor(inf_norm_out), Tensor(master_param_out) + infer_meta : + func : AdadeltaInferMeta + kernel : + func : adadelta + data_type : param + optional : master_param, master_param_out + inplace : (param -> param_out), (avg_squared_grad -> moment_out), (avg_squared_update -> inf_norm_out), (master_param -> master_param_out) + - op : adagrad_ args : (Tensor param, Tensor grad, Tensor moment, Tensor learning_rate, Tensor master_param, float epsilon = 1.0e-6f, bool multi_precision = false) output : Tensor(param_out), Tensor(moment_out), Tensor(master_param_out)