From d99c0ef748182a0c28646a78292143ed332567c2 Mon Sep 17 00:00:00 2001 From: co63oc Date: Mon, 6 May 2024 16:58:20 +0800 Subject: [PATCH 1/5] Fix --- paddle/phi/api/yaml/ops.yaml | 48 ++++++ .../legacy/cpu/fused_elementwise_kernel.cc | 153 ++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 98da34dd2d442..08713e4bfa432 100755 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -1227,6 +1227,54 @@ backend : place interfaces : paddle::dialect::InferSymbolicShapeInterface +- op : fused_elementwise_add + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_add + data_type : x + +- op : fused_elementwise_div + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_div + data_type : x + +- op : fused_elementwise_mul + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_mul + data_type : x + +- op : fused_elementwise_sub + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_sub + data_type : x + - op : gammaincc args : (Tensor x, Tensor y) output : Tensor(out) diff --git a/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc b/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc new file mode 100644 index 0000000000000..f8c19b3a3f9ad --- /dev/null +++ b/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc @@ -0,0 +1,153 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/api/ext/dispatch.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/elementwise.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" +#include "paddle/phi/kernels/legacy/elementwise_add_kernel.h" +#include "paddle/phi/kernels/legacy/elementwise_divide_kernel.h" +#include "paddle/phi/kernels/legacy/elementwise_multipy_kernel.h" +#include "paddle/phi/kernels/legacy/elementwise_subtract_kernel.h" + +namespace phi { + +template +void FusedElementwiseAddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + const std::string& fuse_activation UNUSED, + float fuse_alpha UNUSED, + float fuse_beta UNUSED, + float fused_output_scale UNUSED, + const std::vector& fused_unsqueeze2_axes + UNUSED, + float scale_x UNUSED, + float scale_y UNUSED, + float scale_out UNUSED, + DenseTensor* out) { + AddRawKernel(dev_ctx, x, y, axis, out); +} + +template +void FusedElementwiseDivKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + const std::string& fuse_activation UNUSED, + float fuse_alpha UNUSED, + float fuse_beta UNUSED, + float fused_output_scale UNUSED, + const std::vector& fused_unsqueeze2_axes + UNUSED, + float scale_x UNUSED, + float scale_y UNUSED, + float scale_out UNUSED, + DenseTensor* out) { + DivideRawKernel(dev_ctx, x, y, axis, out); +} + +template +void FusedElementwiseMulKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + const std::string& fuse_activation UNUSED, + float fuse_alpha UNUSED, + float fuse_beta UNUSED, + float fused_output_scale UNUSED, + const std::vector& fused_unsqueeze2_axes + UNUSED, + float scale_x UNUSED, + float scale_y UNUSED, + float scale_out UNUSED, + DenseTensor* out) { + MultiplyRawKernel(dev_ctx, x, y, axis, out); +} + +template +void FusedElementwiseSubKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + const std::string& fuse_activation UNUSED, + float fuse_alpha UNUSED, + float fuse_beta UNUSED, + float fused_output_scale UNUSED, + const std::vector& fused_unsqueeze2_axes + UNUSED, + float scale_x UNUSED, + float scale_y UNUSED, + float scale_out UNUSED, + DenseTensor* out) { + SubtractRawKernel(dev_ctx, x, y, axis, out); +} +} // namespace phi + +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +PD_REGISTER_KERNEL(fused_elementwise_add, + CPU, + ALL_LAYOUT, + phi::FusedElementwiseAddKernel, + float, + double, + int, + bool, + int64_t, + complex64, + complex128) {} + +PD_REGISTER_KERNEL(fused_elementwise_div, + CPU, + ALL_LAYOUT, + phi::FusedElementwiseDivKernel, + float, + double, + int, + int64_t, + bool, + complex64, + complex128) {} + +PD_REGISTER_KERNEL(fused_elementwise_mul, + CPU, + ALL_LAYOUT, + phi::FusedElementwiseMulKernel, + float, + double, + int, + int64_t, + bool, + complex64, + complex128, + phi::dtype::bfloat16) {} + +PD_REGISTER_KERNEL(fused_elementwise_sub, + CPU, + ALL_LAYOUT, + phi::FusedElementwiseSubKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128, + phi::dtype::bfloat16) {} From 4e0a13fdfa3a18d713cd5787f5acc35eb010ae29 Mon Sep 17 00:00:00 2001 From: co63oc Date: Mon, 6 May 2024 18:51:51 +0800 Subject: [PATCH 2/5] Fix --- .../operators/fused/fused_elementwise_op.cc | 95 ------------------- .../ops_signature/fused_elementwise_sig.cc | 92 ------------------ paddle/phi/api/yaml/legacy_ops.yaml | 48 ++++++++++ paddle/phi/api/yaml/ops.yaml | 48 ---------- 4 files changed, 48 insertions(+), 235 deletions(-) delete mode 100644 paddle/fluid/operators/fused/fused_elementwise_op.cc delete mode 100644 paddle/fluid/operators/ops_signature/fused_elementwise_sig.cc diff --git a/paddle/fluid/operators/fused/fused_elementwise_op.cc b/paddle/fluid/operators/fused/fused_elementwise_op.cc deleted file mode 100644 index e6c2743e9385d..0000000000000 --- a/paddle/fluid/operators/fused/fused_elementwise_op.cc +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/elementwise/elementwise_op.h" - -namespace paddle { -namespace operators { - -class FusedElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() final { - AddInput("X", "The first input tensor of elementwise op."); - AddInput("Y", "The second input tensor of elementwise op."); - AddOutput("Out", "A location into which the result is stored."); - AddAttr( - "axis", - "If X.dimension != Y.dimension, Y.dimension must be a " - "subsequence of X.dimension. And axis is the start dimension index " - "for broadcasting Y onto X.") - .SetDefault(-1); - AddAttr( - "fuse_activation", - "Activation type from elementwise_act_onednn_fuse_pass") - .SetDefault(""); - AddAttr("fuse_alpha", - "Activation alpha from elementwise_act_onednn_fuse_pass") - .SetDefault(0.0f); - AddAttr("fuse_beta", - "Activation beta from elementwise_act_onednn_fuse_pass") - .SetDefault(0.0f); - AddAttr("fused_output_scale", - "Obtained from operator_scale_onednn_fuse_pass") - .SetDefault(1.0f); - AddAttr>( - "fused_unsqueeze2_axes", - "Obtained from operator_unsqueeze2_onednn_fuse_pass") - .SetDefault({}); - AddAttr("scale_x", "Elementwise X input quantization scale") - .SetDefault(1.0f); - AddAttr("scale_y", "Elementwise Y input quantization scale") - .SetDefault(1.0f); - AddAttr("scale_out", "Elementwise Out output quantization scale") - .SetDefault(1.0f); - AddComment( - R"DOC(Elementwise operator extended with oneDNN-specific fusion logic.)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - fused_elementwise_add, - ops::ElementwiseOp, - ops::FusedElementwiseOpMaker, - ops::ElementwiseOpInferVarType, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -REGISTER_OPERATOR( - fused_elementwise_sub, - ops::ElementwiseOp, - ops::FusedElementwiseOpMaker, - ops::ElementwiseOpInferVarType, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -REGISTER_OPERATOR( - fused_elementwise_mul, - ops::ElementwiseOp, - ops::FusedElementwiseOpMaker, - ops::ElementwiseOpInferVarType, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -REGISTER_OPERATOR( - fused_elementwise_div, - ops::ElementwiseOp, - ops::FusedElementwiseOpMaker, - ops::ElementwiseOpInferVarType, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/ops_signature/fused_elementwise_sig.cc b/paddle/fluid/operators/ops_signature/fused_elementwise_sig.cc deleted file mode 100644 index 34e0bfd314fd6..0000000000000 --- a/paddle/fluid/operators/ops_signature/fused_elementwise_sig.cc +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature FusedElementwiseAddOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("fused_elementwise_add", - {"X", "Y"}, - {"axis", - "fuse_activation", - "fuse_alpha", - "fuse_beta", - "fused_output_scale", - "fused_unsqueeze2_axes", - "scale_x", - "scale_y", - "scale_out"}, - {"Out"}); -} - -KernelSignature FusedElementwiseSubOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("fused_elementwise_sub", - {"X", "Y"}, - {"axis", - "fuse_activation", - "fuse_alpha", - "fuse_beta", - "fused_output_scale", - "fused_unsqueeze2_axes", - "scale_x", - "scale_y", - "scale_out"}, - {"Out"}); -} - -KernelSignature FusedElementwiseMulOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("fused_elementwise_mul", - {"X", "Y"}, - {"axis", - "fuse_activation", - "fuse_alpha", - "fuse_beta", - "fused_output_scale", - "fused_unsqueeze2_axes", - "scale_x", - "scale_y", - "scale_out"}, - {"Out"}); -} - -KernelSignature FusedElementwiseDivOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("fused_elementwise_div", - {"X", "Y"}, - {"axis", - "fuse_activation", - "fuse_alpha", - "fuse_beta", - "fused_output_scale", - "fused_unsqueeze2_axes", - "scale_x", - "scale_y", - "scale_out"}, - {"Out"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(fused_elementwise_add, - phi::FusedElementwiseAddOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(fused_elementwise_sub, - phi::FusedElementwiseSubOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(fused_elementwise_mul, - phi::FusedElementwiseMulOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(fused_elementwise_div, - phi::FusedElementwiseDivOpArgumentMapping); diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index 6260945b48971..dd73030642fbe 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -603,6 +603,54 @@ view : (mean -> mean_out), (variance -> variance_out) backward : fused_bn_add_activation_grad +- op : fused_elementwise_add + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_add + data_type : x + +- op : fused_elementwise_div + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_div + data_type : x + +- op : fused_elementwise_mul + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_mul + data_type : x + +- op : fused_elementwise_sub + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_sub + data_type : x + - op : fused_gemm_epilogue args : (Tensor x, Tensor y, Tensor bias, bool trans_x, bool trans_y, str activation) output : Tensor(out), Tensor(reserve_space) diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 08713e4bfa432..98da34dd2d442 100755 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -1227,54 +1227,6 @@ backend : place interfaces : paddle::dialect::InferSymbolicShapeInterface -- op : fused_elementwise_add - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_add - data_type : x - -- op : fused_elementwise_div - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_div - data_type : x - -- op : fused_elementwise_mul - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_mul - data_type : x - -- op : fused_elementwise_sub - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_sub - data_type : x - - op : gammaincc args : (Tensor x, Tensor y) output : Tensor(out) From 5998e2b8879f08040569fc81733e54a8e8165a9e Mon Sep 17 00:00:00 2001 From: co63oc Date: Mon, 6 May 2024 18:56:17 +0800 Subject: [PATCH 3/5] Fix --- paddle/phi/api/yaml/legacy_ops.yaml | 48 ----------------------------- paddle/phi/api/yaml/ops.yaml | 48 +++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index dd73030642fbe..6260945b48971 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -603,54 +603,6 @@ view : (mean -> mean_out), (variance -> variance_out) backward : fused_bn_add_activation_grad -- op : fused_elementwise_add - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_add - data_type : x - -- op : fused_elementwise_div - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_div - data_type : x - -- op : fused_elementwise_mul - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_mul - data_type : x - -- op : fused_elementwise_sub - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_sub - data_type : x - - op : fused_gemm_epilogue args : (Tensor x, Tensor y, Tensor bias, bool trans_x, bool trans_y, str activation) output : Tensor(out), Tensor(reserve_space) diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 98da34dd2d442..08713e4bfa432 100755 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -1227,6 +1227,54 @@ backend : place interfaces : paddle::dialect::InferSymbolicShapeInterface +- op : fused_elementwise_add + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_add + data_type : x + +- op : fused_elementwise_div + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_div + data_type : x + +- op : fused_elementwise_mul + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_mul + data_type : x + +- op : fused_elementwise_sub + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_sub + data_type : x + - op : gammaincc args : (Tensor x, Tensor y) output : Tensor(out) From 6cf1b06286d2903c6f86facdad97acbd30ff0a4e Mon Sep 17 00:00:00 2001 From: co63oc Date: Tue, 7 May 2024 19:04:57 +0800 Subject: [PATCH 4/5] Fix --- paddle/phi/api/yaml/fused_ops.yaml | 48 +++++++++++++++++++ paddle/phi/api/yaml/ops.yaml | 48 ------------------- .../legacy/cpu/fused_elementwise_kernel.cc | 3 +- 3 files changed, 49 insertions(+), 50 deletions(-) diff --git a/paddle/phi/api/yaml/fused_ops.yaml b/paddle/phi/api/yaml/fused_ops.yaml index 304c543d1a463..597b007b107d0 100644 --- a/paddle/phi/api/yaml/fused_ops.yaml +++ b/paddle/phi/api/yaml/fused_ops.yaml @@ -232,6 +232,54 @@ backward : fused_dropout_add_grad support_dygraph_mode : true +- op : fused_elementwise_add + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_add + data_type : x + +- op : fused_elementwise_div + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_div + data_type : x + +- op : fused_elementwise_mul + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_mul + data_type : x + +- op : fused_elementwise_sub + args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha + = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes + = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) + output: Tensor (out) + infer_meta: + func: ElementwiseInferMeta + param : [x, y] + kernel : + func : fused_elementwise_sub + data_type : x + - op : fused_embedding_eltwise_layernorm args : (Tensor[] ids, Tensor[] embs, Tensor bias, Tensor scale, float epsilon = 0.00001f) output : Tensor(out) diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 08713e4bfa432..98da34dd2d442 100755 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -1227,54 +1227,6 @@ backend : place interfaces : paddle::dialect::InferSymbolicShapeInterface -- op : fused_elementwise_add - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_add - data_type : x - -- op : fused_elementwise_div - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_div - data_type : x - -- op : fused_elementwise_mul - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_mul - data_type : x - -- op : fused_elementwise_sub - args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha - = 0.0f, float fuse_beta = 0.0f, float fused_output_scale = 1.0f, int[] fused_unsqueeze2_axes - = {}, float scale_x = 1.0f, float scale_y = 1.0f, float scale_out = 1.0f) - output: Tensor (out) - infer_meta: - func: ElementwiseInferMeta - param : [x, y] - kernel : - func : fused_elementwise_sub - data_type : x - - op : gammaincc args : (Tensor x, Tensor y) output : Tensor(out) diff --git a/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc b/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc index f8c19b3a3f9ad..ec640c2257c3f 100644 --- a/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc +++ b/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/api/ext/dispatch.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" From dc0acac734ed1f9ae985ac3524ff8dfeaa02a810 Mon Sep 17 00:00:00 2001 From: co63oc Date: Thu, 9 May 2024 13:18:23 +0800 Subject: [PATCH 5/5] Fix --- paddle/fluid/pir/dialect/op_generator/ops_api_gen.py | 4 ++++ paddle/phi/api/yaml/fused_ops.yaml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py index 2647b579f2bc7..ef33da14eb6ab 100644 --- a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py @@ -161,6 +161,10 @@ 'fused_token_prune', 'fused_dconv_drelu_dbn', 'fused_dot_product_attention', + 'fused_elementwise_add', + 'fused_elementwise_div', + 'fused_elementwise_mul', + 'fused_elementwise_sub', 'nce', 'lars_momentum', 'lars_momentum_', diff --git a/paddle/phi/api/yaml/fused_ops.yaml b/paddle/phi/api/yaml/fused_ops.yaml index 597b007b107d0..df4ca1ce124d3 100644 --- a/paddle/phi/api/yaml/fused_ops.yaml +++ b/paddle/phi/api/yaml/fused_ops.yaml @@ -243,6 +243,7 @@ kernel : func : fused_elementwise_add data_type : x + support_dygraph_mode : true - op : fused_elementwise_div args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha @@ -255,6 +256,7 @@ kernel : func : fused_elementwise_div data_type : x + support_dygraph_mode : true - op : fused_elementwise_mul args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha @@ -267,6 +269,7 @@ kernel : func : fused_elementwise_mul data_type : x + support_dygraph_mode : true - op : fused_elementwise_sub args: (Tensor x, Tensor y, int axis = -1, str fuse_activation = "", float fuse_alpha @@ -279,6 +282,7 @@ kernel : func : fused_elementwise_sub data_type : x + support_dygraph_mode : true - op : fused_embedding_eltwise_layernorm args : (Tensor[] ids, Tensor[] embs, Tensor bias, Tensor scale, float epsilon = 0.00001f)