diff --git a/paddle/fluid/framework/type_info.cc b/paddle/fluid/framework/type_info.cc index bd7f8a8e3b1314..deaf8d30af3b9e 100644 --- a/paddle/fluid/framework/type_info.cc +++ b/paddle/fluid/framework/type_info.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/raw_tensor.h" #include "paddle/fluid/framework/string_array.h" +#include "paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h" #include "paddle/fluid/prim/utils/static/desc_tensor.h" #include "paddle/fluid/primitive/type/lazy_tensor.h" @@ -46,5 +47,6 @@ template class TypeInfoTraits; template class TypeInfoTraits; template class TypeInfoTraits; +template class TypeInfoTraits; } // namespace phi diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc index f7889e065885d5..cd3cfa558a294e 100644 --- a/paddle/fluid/ir_adaptor/translator/op_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc @@ -978,9 +978,9 @@ struct EmbeddingGradOpTranscriber : public OpTranscriber { bool is_sparse = paddle::get(op_desc.GetAttr("is_sparse")); if (is_sparse) { - target_op_name = "pd_op.embedding_grad_sparse"; + target_op_name = "pd_op.embedding_sparse_grad"; } else { - target_op_name = "pd_op.embedding_grad_dense"; + target_op_name = "pd_op.embedding_grad"; } VLOG(6) << "[op name normalizing: " << op_desc.Type() << " to " << target_op_name; diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt index 582351fdefbd99..7cac45069a9e1d 100644 --- a/paddle/fluid/pir/dialect/CMakeLists.txt +++ b/paddle/fluid/pir/dialect/CMakeLists.txt @@ -11,15 +11,9 @@ set(op_compat_yaml_file ${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/op_compat.yaml) set(op_forward_yaml_file1 ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/ops.parsed.yaml ) -set(op_forward_yaml_file2 - ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/legacy_ops.parsed.yaml -) set(op_backward_yaml_file1 ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/backward_ops.parsed.yaml ) -set(op_backward_yaml_file2 - ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/legacy_backward_ops.parsed.yaml -) set(fused_op_forward_yaml_file ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/fused_ops.parsed.yaml ) @@ -44,7 +38,7 @@ set(op_yaml_file4 ${parsed_op_dir}/ops_backward.parsed.yaml) set(op_yaml_file5 ${parsed_op_dir}/update_ops.parsed.yaml) set(op_yaml_files - ${op_forward_yaml_file1},${op_forward_yaml_file2},${op_backward_yaml_file1},${op_backward_yaml_file2},${fused_op_forward_yaml_file},${fused_op_backward_yaml_file},${op_yaml_file3},${op_yaml_file4},${op_yaml_file5} + ${op_forward_yaml_file1},${op_backward_yaml_file1},${fused_op_forward_yaml_file},${fused_op_backward_yaml_file},${op_yaml_file3},${op_yaml_file4},${op_yaml_file5} ) set(op_namespace paddle,dialect) set(dialect_name pd_op) @@ -78,7 +72,7 @@ set(generated_files_pd_op "${op_header_file}" "${op_source_file}" "${op_vjp_source_file}") set(api_gen_yaml_files - ${op_forward_yaml_file1},${op_forward_yaml_file2},${op_backward_yaml_file1},${op_backward_yaml_file2},${op_yaml_file3},${op_yaml_file4},${op_yaml_file5} + ${op_forward_yaml_file1},${op_backward_yaml_file1},${op_yaml_file3},${op_yaml_file4},${op_yaml_file5} ) set(api_gen_file ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/op_generator/api_gen.py) diff --git a/paddle/fluid/pir/dialect/op_generator/api_gen.py b/paddle/fluid/pir/dialect/op_generator/api_gen.py index 355aa79a48a898..70abfb8d4ab314 100644 --- a/paddle/fluid/pir/dialect/op_generator/api_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/api_gen.py @@ -26,6 +26,10 @@ update_ops, ) +PD_MANUAL_API_LIST = { + 'embedding_grad', +} + H_FILE_TEMPLATE = """ #pragma once @@ -204,7 +208,7 @@ def _parse_yaml(self, op_yaml_files, op_compat_yaml_file): def _need_skip(self, op_info, op_name): return ( op_info.infer_meta_func is None and op_name not in PD_MANUAL_OP_LIST - ) + ) or op_name in PD_MANUAL_API_LIST def _is_optional_input(self, op_info, input_name): name_list = op_info.input_name_list diff --git a/paddle/fluid/pir/dialect/op_generator/op_build_gen.py b/paddle/fluid/pir/dialect/op_generator/op_build_gen.py index 346b5203691a52..e617b1f342d789 100644 --- a/paddle/fluid/pir/dialect/op_generator/op_build_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/op_build_gen.py @@ -326,7 +326,7 @@ def GenBuildOutputs( paddle::dialect::IrMetaTensor meta_{name}; paddle::dialect::IrTensor ir_tensor_{name}; if ({name}_.impl() != nullptr) {{ - paddle::dialect::DenseTensorType {name} = {name}_.type().dyn_cast(); + {type} {name} = {name}_.type().dyn_cast<{type}>(); VLOG(4) << "Builder construction dense_{name}"; ir_tensor_{name} = paddle::dialect::IrTensor(paddle::dialect::TransToPhiDataType({name}.dtype()), {name}.dims(), @@ -432,6 +432,9 @@ def GenBuildOutputs( CREATE_OUTPUT_METATENSOR_TEMPLATE = """ paddle::dialect::IrTensor dense_{name}; paddle::dialect::IrMetaTensor meta_{name}(&dense_{name}); +""" + CREATE_OUTPUT_METASELETEROWS_TEMPLATE = """ paddle::dialect::IrSelectedRows dense_{name}; + paddle::dialect::IrMetaTensor meta_{name}(&dense_{name}); """ CREATE_OUTPUT_VEC_METATENSOR_TEMPLATE = """ std::vector vec_dense_{name}(({output_size}), paddle::dialect::IrTensor()); std::vector vec_meta_{name}; @@ -454,8 +457,8 @@ def GenBuildOutputs( # is a Tensor else: if op_input_optional_list[idx] == 'false': - build_output_str += " paddle::dialect::DenseTensorType {name} = {name}_.type().dyn_cast(); (void){name};\n".format( - name=op_input_name_list[idx] + build_output_str += " {type} {name} = {name}_.type().dyn_cast<{type}>(); (void){name};\n".format( + type=op_input_type_list[idx], name=op_input_name_list[idx] ) # Prepare mutable attributes @@ -526,7 +529,8 @@ def GenBuildOutputs( if op_input_optional_list[input_index] == 'true': build_output_str += ( CREATE_OPTIONAL_INPUT_METATENSOR_TEMPLATE.format( - name=op_infer_meta_map['param'][idx] + name=op_infer_meta_map['param'][idx], + type=op_input_type_list[idx], ) ) else: @@ -552,10 +556,18 @@ def GenBuildOutputs( infer_meta_args.append(f"meta_{op_output_name_list[idx]}") # is a Tensor else: - build_output_str += CREATE_OUTPUT_METATENSOR_TEMPLATE.format( - name=op_output_name_list[idx] - ) - infer_meta_args.append(f"&meta_{op_output_name_list[idx]}") + if op_output_type_list[idx] == "paddle::dialect::DenseTensorType": + build_output_str += CREATE_OUTPUT_METATENSOR_TEMPLATE.format( + name=op_output_name_list[idx] + ) + infer_meta_args.append(f"&meta_{op_output_name_list[idx]}") + else: + build_output_str += ( + CREATE_OUTPUT_METASELETEROWS_TEMPLATE.format( + name=op_output_name_list[idx] + ) + ) + infer_meta_args.append(f"&meta_{op_output_name_list[idx]}") # Execute infer meta function CREATE_INFER_META_FUNC_TEMPLATE = """ @@ -579,13 +591,13 @@ def GenBuildOutputs( build_output_str += "\n std::vector argument_outputs;" CREATE_OUTPUT_DENSE_TENSOR_TEMPLATE = """ - pir::Type {name}_dense_tensor_type = paddle::dialect::DenseTensorType::get(pir::IrContext::Instance(), paddle::dialect::TransToIrDataType(dense_{name}.dtype()), dense_{name}.dims(), dense_{name}.layout(), dense_{name}.lod(), dense_{name}.offset()); + pir::Type {name}_dense_tensor_type = {type}::get(pir::IrContext::Instance(), paddle::dialect::TransToIrDataType(dense_{name}.dtype()), dense_{name}.dims(), dense_{name}.layout(), dense_{name}.lod(), dense_{name}.offset()); argument_outputs.push_back({name}_dense_tensor_type); """ CREATE_OUTPUT_INPLACE_OPTIONAL_DENSE_TENSOR_TEMPLATE = """ if ({input_name}_.impl() != nullptr) {{ - pir::Type {output_name}_dense_tensor_type = paddle::dialect::DenseTensorType::get(pir::IrContext::Instance(), paddle::dialect::TransToIrDataType(dense_{output_name}.dtype()), dense_{output_name}.dims(), dense_{output_name}.layout(), dense_{output_name}.lod(), dense_{output_name}.offset()); + pir::Type {output_name}_dense_tensor_type = {type}::get(pir::IrContext::Instance(), paddle::dialect::TransToIrDataType(dense_{output_name}.dtype()), dense_{output_name}.dims(), dense_{output_name}.layout(), dense_{output_name}.lod(), dense_{output_name}.offset()); argument_outputs.push_back({output_name}_dense_tensor_type); }} else {{ pir::Type {output_name}_type; @@ -622,11 +634,12 @@ def GenBuildOutputs( CREATE_OUTPUT_INPLACE_OPTIONAL_DENSE_TENSOR_TEMPLATE.format( input_name=op_inplace_map[output_name], output_name=output_name, + type=op_output_type_list[idx], ) ) else: build_output_str += CREATE_OUTPUT_DENSE_TENSOR_TEMPLATE.format( - name=output_name + type=op_output_type_list[idx], name=output_name ) build_output_str += " argument.AddOutputs(argument_outputs.begin(), argument_outputs.end());\n" diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py index d01ceeb829afbc..10d79bc8b27918 100644 --- a/paddle/fluid/pir/dialect/op_generator/op_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py @@ -135,6 +135,7 @@ class {op_name} : public pir::Op<{op_name}{interfaces}{traits}> {{ #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h" +#include "paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.h" #include "paddle/pir/core/builtin_attribute.h" #include "paddle/pir/core/builtin_type.h" diff --git a/paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.cc b/paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.cc index c033e0946e3deb..d8ad58ce1511b7 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.cc +++ b/paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.h" +#include "paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h" namespace paddle { @@ -82,7 +83,9 @@ void IrMetaTensor::share_meta(const MetaTensor& meta_tensor) { bool IrMetaTensor::initialized() const { return tensor_ != nullptr; } -bool IrMetaTensor::is_selected_rows() const { return false; } +bool IrMetaTensor::is_selected_rows() const { + return IrSelectedRows::classof(tensor_); +} bool IrMetaTensor::is_tensor_array() const { return false; } diff --git a/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.cc b/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.cc new file mode 100644 index 00000000000000..d08dfccc25250d --- /dev/null +++ b/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.h" + +#include "paddle/pir/core/enforce.h" + +namespace paddle { +namespace dialect { +IrSelectedRows::IrSelectedRows(phi::DataType dtype, + const phi::DDim& dims, + phi::DataLayout layout, + const LoD& lod, + size_t offset) + : dims_(dims), dtype_(dtype), layout_(layout), lod_(lod), offset_(offset) {} + +IrSelectedRows::IrSelectedRows(const IrSelectedRows& other) { + dims_ = other.dims(); + dtype_ = other.dtype(); + layout_ = other.layout(); + lod_ = other.lod(); + offset_ = other.offset(); +} + +IrSelectedRows& IrSelectedRows::operator=(const IrSelectedRows& other) { + dims_ = other.dims(); + dtype_ = other.dtype(); + layout_ = other.layout(); + lod_ = other.lod(); + offset_ = other.offset(); + return *this; +} + +IrSelectedRows& IrSelectedRows::operator=(IrSelectedRows&& other) noexcept { + dims_ = std::move(other.dims()); + dtype_ = other.dtype(); + layout_ = other.layout(); + lod_ = std::move(other.lod()); + offset_ = other.offset(); + return *this; +} + +int64_t IrSelectedRows::numel() const { return phi::product(dims_); } + +const phi::Place& IrSelectedRows::place() const { + IR_THROW("Don't use IrSelectedRows::place method."); +} + +void* IrSelectedRows::AllocateFrom(phi::Allocator* allocator, + phi::DataType dtype, + size_t requested_size, + bool fake_alloc) { + IR_THROW("Don't use IrSelectedRows::AllocateFrom method."); +} + +} // namespace dialect +} // namespace paddle diff --git a/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.h b/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.h new file mode 100644 index 00000000000000..37000c86b5b653 --- /dev/null +++ b/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.h @@ -0,0 +1,91 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/allocator.h" +#include "paddle/phi/core/tensor_base.h" +#include "paddle/phi/core/tensor_meta.h" + +namespace paddle { +namespace dialect { + +using LoD = std::vector>; + +class IrSelectedRows + : public phi::TensorBase, + public phi::TypeInfoTraits { + public: + IrSelectedRows() = default; + + IrSelectedRows(phi::DataType dtype, + const phi::DDim& dims, + phi::DataLayout layout, + const LoD& lod, + size_t offset = 0); + + IrSelectedRows(IrSelectedRows&& other) = default; + + IrSelectedRows(const IrSelectedRows& other); + + IrSelectedRows& operator=(const IrSelectedRows& other); + + IrSelectedRows& operator=(IrSelectedRows&& other) noexcept; + + virtual ~IrSelectedRows() = default; + + public: + static const char* name() { return "IrSelectedRows"; } + + int64_t numel() const override; + + const phi::DDim& dims() const noexcept override { return dims_; } + + void SetDims(const phi::DDim& dims) { dims_ = dims; } + + const phi::Place& place() const override; + + phi::DataType dtype() const noexcept override { return dtype_; } + + void SetDtype(phi::DataType dtype) { dtype_ = dtype; } + + phi::DataLayout layout() const noexcept override { return layout_; } + + void SetLayout(phi::DataLayout layout) { layout_ = layout; } + + const LoD& lod() const noexcept { return lod_; } + + void SetLod(LoD lod) { lod_ = lod; } + + size_t offset() const noexcept { return offset_; } + + bool valid() const noexcept override { return true; } + + bool initialized() const override { return true; } + + void* AllocateFrom(phi::Allocator* allocator, + phi::DataType dtype, + size_t requested_size = 0, + bool fake_alloc = false) override; + + private: + phi::DDim dims_; + phi::DataType dtype_{phi::DataType::FLOAT32}; + phi::DataLayout layout_{phi::DataLayout::ANY}; + LoD lod_; + size_t offset_{0}; +}; + +} // namespace dialect +} // namespace paddle diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_api.cc b/paddle/fluid/pir/dialect/operator/ir/manual_api.cc index 871136b17dbd6e..916b3fd770e4d6 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_api.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_api.cc @@ -69,11 +69,18 @@ pir::OpResult embedding_grad(const pir::Value& x, bool sparse) { if (weight.type().isa()) { if (sparse) { - return paddle::dialect::embedding_grad_sparse( - x, weight, out_grad, padding_idx, sparse); + auto embedding_grad_op = + ApiBuilder::Instance() + .GetBuilder() + ->Build( + x, weight, out_grad, padding_idx); + return embedding_grad_op.weight_grad(); } else { - return paddle::dialect::embedding_grad_dense( - x, weight, out_grad, padding_idx, sparse); + auto embedding_grad_op = ApiBuilder::Instance() + .GetBuilder() + ->Build( + x, weight, out_grad, padding_idx); + return embedding_grad_op.weight_grad(); } } else { PADDLE_THROW(phi::errors::Unimplemented( diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc index 7d9b144b758918..53b66b169d7407 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.h" +#include "paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h" #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" diff --git a/paddle/fluid/pir/dialect/operator/ir/ops.yaml b/paddle/fluid/pir/dialect/operator/ir/ops.yaml index 1b8cb61d572d69..dbb6ad62f10829 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ops.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/ops.yaml @@ -1,3 +1,34 @@ +# The apis in this file are unstandardized that may caused by a variety of reasons, +# we are trying to fix these apis and will move standardized apis into ops.yaml. + +- op : adadelta_ + args : (Tensor param, Tensor grad, Tensor avg_squared_grad, Tensor avg_squared_update, Tensor learning_rate, Tensor master_param, float rho, float epsilon, bool multi_precision) + output : Tensor(param_out), Tensor(moment_out), Tensor(inf_norm_out), Tensor(master_param_out) + infer_meta : + func : AdadeltaInferMeta + kernel : + func : adadelta + data_type : param + optional : master_param, master_param_out + inplace : (param -> param_out), (avg_squared_grad -> moment_out), (avg_squared_update -> inf_norm_out), (master_param -> master_param_out) + +- op : add + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : ElementwiseInferMeta + spmd_rule : ElementwiseBinaryInferSpmd + kernel : + func : add + inplace : (x -> out) + backward : add_grad + +- op : add_n + args : (Tensor[] inputs) + output : Tensor + invoke : add_n_impl(inputs) + backward : add_n_grad + - op : add_n_ args : (Tensor[] inputs) output : Tensor(out) @@ -18,6 +49,53 @@ func: add_n param: [inputs] +- op : all + args : (Tensor x, int64_t[] axis={}, bool keepdim=false) + output : Tensor(out) + infer_meta : + func : ReduceInferMeta + kernel : + func : all + +- op : amax + args : (Tensor x, int64_t[] axis={}, bool keepdim=false) + output : Tensor(out) + infer_meta : + func : ReduceInferMeta + kernel : + func : amax + backward : amax_grad + +- op : amin + args : (Tensor x, int64_t[] axis={}, bool keepdim=false) + output : Tensor(out) + infer_meta : + func : ReduceInferMeta + kernel : + func : amin + backward : amin_grad + +- op : any + args : (Tensor x, int64_t[] axis={}, bool keepdim=false) + output : Tensor(out) + infer_meta : + func : ReduceInferMeta + kernel : + func : any + +- op : arange + args : (Scalar start, Scalar end, Scalar step, DataType dtype=DataType::FLOAT64, Place place=CPUPlace()) + output : Tensor(out) + infer_meta : + func : ArangeInferMeta + param : [start, end, step, dtype] + kernel : + func : arange + param : [start, end, step] + data_type : dtype + backend : place + support_tensor : [start, end, step] + - op : assert args : (Tensor cond, Tensor[] data, int64_t summarize = -1) output : @@ -26,6 +104,28 @@ param : [cond, data, summarize] data_type : cond +- op : assign + args : (Tensor x) + output : Tensor + infer_meta : + func : UnchangedInferMeta + kernel : + func : assign + backward : assign_grad + inplace : (x -> out) + +- op : assign_out_ + args : (Tensor x, Tensor output) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : assign + param : [x] + inplace : (output -> out) + backward : assign_out__grad + - op : assign_value args : (int[] shape, DataType dtype, Scalar[] values, Place place = {}) output : Tensor(out) @@ -38,6 +138,110 @@ backend: place> data_type : dtype +- op : assign_value_ + args : (Tensor output, int[] shape, DataType dtype, Scalar[] values, Place place = {}) + output : Tensor(out) + inplace: (output -> out) + infer_meta : + func : AssignValueInferMeta + param : [shape, dtype] + kernel : + func : assign_value + param : [shape, dtype, values] + data_type : dtype + backend : place > output + +- op : batch_norm + args : (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics) + output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) + infer_meta: + func : BatchNormInferMeta + kernel : + func : batch_norm + data_type : x + view : (mean -> mean_out), (variance -> variance_out) + backward : batch_norm_grad + optional : scale, bias, reserve_space + +- op : c_allgather + args : (Tensor x, int ring_id, int nranks, bool use_calc_stream) + output : Tensor(out) + infer_meta : + func : AllGatherInferMeta + param: [x, nranks] + kernel : + func : c_allgather + +- op : c_allreduce_max + args : (Tensor x, int ring_id, bool use_calc_stream, bool use_model_parallel) + output : Tensor(out) + infer_meta : + func : AllReduceInferMeta + param : [x] + kernel : + func : c_allreduce_max + inplace : (x -> out) + +- op : c_allreduce_sum + args : (Tensor x, int ring_id, bool use_calc_stream, bool use_model_parallel) + output : Tensor(out) + infer_meta : + func : AllReduceInferMeta + param : [x] + kernel : + func : c_allreduce_sum + inplace : (x -> out) + +- op : c_broadcast + args : (Tensor x, int ring_id=0, int root=0, bool use_calc_stream=false) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : c_broadcast + inplace : (x -> out) + +- op : c_concat + args : (Tensor x, int rank, int nranks, int ring_id, bool use_calc_stream, bool use_model_parallel) + output : Tensor(out) + infer_meta : + func : CConcatInferMeta + param : [x, nranks] + kernel : + func : c_concat + +- op : c_embedding + args : (Tensor weight, Tensor x, int64_t start_index=0) + output : Tensor(out) + infer_meta : + func : CEmbeddingInferMeta + param : [weight, x, start_index] + kernel : + func : c_embedding + param : [weight, x, start_index] + data_type : weight + backward : c_embedding_grad + +- op : c_identity + args : (Tensor x, int ring_id, bool use_calc_stream, bool use_model_parallel) + output : Tensor(out) + infer_meta : + func : CIdentityInferMeta + kernel : + func : c_identity + inplace : (x -> out) + +- op : c_reduce_sum + args : (Tensor x, int ring_id, int root_id, bool use_calc_stream) + output : Tensor(out) + infer_meta : + func : DistReduceInferMeta + param : [x] + kernel : + func : c_reduce_sum + inplace : (x -> out) + - op : c_reducescatter args : (Tensor x, int ring_id = 0, int nranks = 1, bool use_calc_stream = false) output : Tensor(out) @@ -48,16 +252,238 @@ func : reduce_scatter param: [x, nranks] -- op : embedding_grad_sparse - args : (Tensor x, Tensor weight, Tensor out_grad, int64_t padding_idx = -1, bool sparse = false) - output : SelectedRows(weight_grad) +- op : c_sync_calc_stream + args : (Tensor x) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : c_sync_calc_stream + inplace : (x -> out) + +- op : c_sync_comm_stream + args : (Tensor x, int ring_id) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : c_sync_comm_stream + inplace : (x -> out) + +- op : cast + args : (Tensor x, DataType dtype) + output : Tensor(out) + infer_meta : + func : CastInferMeta + spmd_rule : CastInferSpmd + kernel : + func : cast + param : [x, dtype] + data_type : x + inplace: (x -> out) + backward : cast_grad + +- op : channel_shuffle + args : (Tensor x, int groups, str data_format="NCHW") + output : Tensor(out) + infer_meta : + func : ChannelShuffleInferMeta + kernel : + func : channel_shuffle + backward : channel_shuffle_grad + +- op : conv2d_transpose + args : (Tensor x, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, int[] output_padding={}, IntArray output_size={}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1}, str data_format="NCHW") + output : Tensor(out) + infer_meta : + func : Conv2dTransposeInferMeta + kernel : + func : conv2d_transpose + data_type : x + backward : conv2d_transpose_grad + +- op : copy_to + args : (Tensor x, Place place, bool blocking) + output : Tensor(out) + invoke : copy_to_impl(x, place, blocking) + +- op : decode_jpeg + args : (Tensor x, str mode, Place place) + output : Tensor(out) + infer_meta : + func : DecodeJpegInferMeta + param : [x, mode] + kernel : + func : decode_jpeg + param : [x, mode] + backend : place + +- op : deformable_conv + args : (Tensor x, Tensor offset, Tensor filter, Tensor mask, int[] strides, int[] paddings, int[] dilations, int deformable_groups, int groups, int im2col_step) + output : Tensor(out) + infer_meta : + func : DeformableConvInferMeta + kernel : + func : deformable_conv + data_type : x + optional : mask + backward : deformable_conv_grad + +- op : depthwise_conv2d_transpose + args : (Tensor x, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, int[] output_padding={}, IntArray output_size={}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1}, str data_format="NCHW") + output : Tensor(out) + infer_meta : + func : Conv2dTransposeInferMeta + kernel : + func : depthwise_conv2d_transpose + data_type : x + backward : depthwise_conv2d_transpose_grad + +- op : disable_check_model_nan_inf + args: (Tensor x, int flag = 0) + output: Tensor(out) infer_meta: - func: EmbeddingGradSparseInferMeta - param: [weight] + func: UnchangedInferMeta + param : [x] kernel: - func: embedding_sparse_grad - param: [x, weight, out_grad, padding_idx, sparse] + func: check_model_nan_inf + data_type: x + backward: disable_check_model_nan_inf_grad + +- op : distribute_fpn_proposals + args : (Tensor fpn_rois, Tensor rois_num, int min_level, int max_level, int refer_level, int refer_scale, bool pixel_offset) + output : Tensor[](multi_fpn_rois){max_level - min_level + 1}, Tensor[](multi_level_rois_num){max_level - min_level + 1}, Tensor(restore_index) + infer_meta : + func : DistributeFpnProposalsInferMeta + kernel : + func : distribute_fpn_proposals + data_type : fpn_rois + optional : rois_num + +- op : divide + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : ElementwiseInferMeta + spmd_rule : ElementwiseBinaryInferSpmd + kernel : + func : divide + inplace: (x -> out) + backward : divide_grad + +- op : dropout + args : (Tensor x, Tensor seed_tensor, Scalar p, bool is_test, str mode, int seed, bool fix_seed) + output : Tensor(out), Tensor(mask) + infer_meta : + func : DropoutInferMeta + kernel : + func : dropout + data_type : x + optional : seed_tensor + backward : dropout_grad + +- op : einsum + args : (Tensor[] x, str equation) + output : Tensor(out), Tensor[](inner_cache){x.size()}, Tensor[](xshape){x.size()} + infer_meta : + func : EinsumRawInferMeta + param : [x, equation] + kernel : + func : einsum + backward : einsum_grad + +- op : elementwise_pow + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : ElementwiseInferMeta + spmd_rule: ElementwiseBinaryInferSpmd + kernel : + func : elementwise_pow + backward : elementwise_pow_grad + +- op : embedding + args : (Tensor x, Tensor weight, int64_t padding_idx=-1, bool sparse=false) + output : Tensor + infer_meta : + func : EmbeddingInferMeta + param : [x, weight, padding_idx] + kernel : + func : embedding {dense, dense -> dense} + sparse_weight_embedding {dense, selected_rows -> dense} + param : [x, weight, padding_idx] data_type : weight + backward : embedding_grad + +- op : empty + args : (IntArray shape, DataType dtype=DataType::FLOAT32, Place place=CPUPlace()) + output: Tensor(out) + infer_meta : + func : CreateInferMeta + param : [shape, dtype] + kernel : + func : empty + param : [shape, dtype] + data_type : dtype + backend : place + +- op : empty_like + args : (Tensor x, DataType dtype = DataType::UNDEFINED, Place place = {}) + output: Tensor(out) + infer_meta : + func : CreateLikeInferMeta + param : [x, dtype] + kernel : + func : empty_like + param : [x, dtype] + data_type : dtype > x + backend : place > x + +- op : enable_check_model_nan_inf + args: (Tensor x, int flag = 1) + output: Tensor(out) + infer_meta: + func: UnchangedInferMeta + param : [x] + kernel: + func: check_model_nan_inf + data_type: x + backward: enable_check_model_nan_inf_grad + +- op : equal + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : CompareInferMeta + spmd_rule: ElementwiseBinaryInferSpmd + kernel : + func : equal + inplace: (x -> out) + +- op : exponential_ + args : (Tensor x, float lam) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : exponential + inplace : (x -> out) + backward : exponential__grad + +- op : eye + args : (Scalar num_rows, Scalar num_columns, DataType dtype=DataType::FLOAT32, Place place={}) + output : Tensor(out) + infer_meta : + func : EyeInferMeta + param : [num_rows, num_columns, dtype] + kernel : + func : eye + param : [num_rows, num_columns, dtype] + data_type : dtype + backend : place - op : feed args : (str name, int col) @@ -74,6 +500,141 @@ param : [x] traits : pir::SideEffectTrait +- op : floor_divide + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : ElementwiseInferMeta + kernel : + func : floor_divide + inplace: (x -> out) + +- op : frobenius_norm + args : (Tensor x, IntArray axis, bool keep_dim, bool reduce_all) + output : Tensor(out) + infer_meta : + func : ReduceIntArrayAxisInferMetaBase + kernel : + func : frobenius_norm + backward : frobenius_norm_grad + +- op : full + args : (IntArray shape, Scalar value, DataType dtype=DataType::FLOAT32, Place place=CPUPlace()) + output: Tensor(out) + infer_meta : + func : CreateInferMeta + param : [shape, dtype] + kernel : + func : full + param : [shape, value, dtype] + data_type : dtype + backend : place + +- op : full_ + args : (Tensor output, IntArray shape, Scalar value, DataType dtype=DataType::FLOAT32, Place place=CPUPlace()) + output : Tensor(out) + inplace : (output -> out) + infer_meta : + func : CreateInferMeta + param : [shape, dtype] + kernel : + func : full + param : [shape, value, dtype] + data_type : dtype + backend : place + +- op : full_batch_size_like + args : (Tensor input, int[] shape, DataType dtype, Scalar value, int input_dim_idx, int output_dim_idx, Place place=CPUPlace()) + output: Tensor(out) + infer_meta : + func : FullBatchSizeLikeInferMeta + param : [input, shape, value, dtype, input_dim_idx, output_dim_idx] + kernel : + func : full_batch_size_like + param : [input, shape, value, dtype, input_dim_idx, output_dim_idx] + data_type : dtype + backend : place + +- op : full_like + args : (Tensor x, Scalar value, DataType dtype = DataType::UNDEFINED, Place place = {}) + output: Tensor(out) + infer_meta : + func : CreateLikeInferMeta + param : [x, dtype] + kernel : + func : full_like + param : [x, value, dtype] + data_type : dtype > x + backend : place > x + data_transform : + skip_transform : x + +- op : full_with_tensor + args : (Tensor shape, Tensor value, DataType dtype=DataType::FLOAT32) + output: Tensor(out) + infer_meta : + func : FullWithTensorInferMeta + param : [shape, dtype] + kernel : + func : full_with_tensor + data_type : dtype + +- op : fused_adam_ + args : (Tensor[] params, Tensor[] grads, Tensor learning_rate, Tensor[] moments1, Tensor[] moments2, Tensor[] beta1_pows, Tensor[] beta2_pows, Tensor[] master_params, Tensor skip_update, Scalar beta1, Scalar beta2, Scalar epsilon, int chunk_size, float weight_decay, bool use_adamw, bool multi_precision, bool use_global_beta_pow) + output : Tensor[](params_out){params.size()}, Tensor[](moments1_out){params.size()}, Tensor[](moments2_out){params.size()}, Tensor[](beta1_pows_out){params.size()}, Tensor[](beta2_pows_out){params.size()}, Tensor[](master_params_out){params.size()} + infer_meta : + func : FusedAdamInferMeta + kernel : + func : fused_adam + data_type : params + optional : skip_update, master_params + inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out) + +- op : fused_batch_norm_act + args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type) + output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) + infer_meta: + func : FusedBatchNormActInferMeta + param : [x, scale, bias, mean, variance] + kernel : + func : fused_batch_norm_act + data_type : x + view : (mean -> mean_out), (variance -> variance_out) + backward : fused_batch_norm_act_grad + +- op : fused_bn_add_activation + args : (Tensor x, Tensor z, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type) + output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) + infer_meta: + func : FusedBatchNormActInferMeta + param : [x, scale, bias, mean, variance] + kernel : + func : fused_bn_add_activation + data_type : x + view : (mean -> mean_out), (variance -> variance_out) + backward : fused_bn_add_activation_grad + +- op : fused_softmax_mask_upper_triangle + args : (Tensor X) + output : Tensor(Out) + infer_meta : + func : UnchangedInferMeta + kernel: + func : fused_softmax_mask_upper_triangle + backward: fused_softmax_mask_upper_triangle_grad + +- op : gaussian + args : (IntArray shape, float mean, float std, int seed, DataType dtype, Place place={}) + output: Tensor(out) + infer_meta : + func : GaussianInferMeta + param : [shape, mean, std, seed, dtype] + kernel : + func : gaussian + param : [shape, mean, std, seed, dtype] + data_type : dtype + backend : place + - op : get_tensor_from_selected_rows args : (Tensor x) output : Tensor(out) @@ -82,6 +643,84 @@ kernel: func: get_tensor_from_selected_rows {selected_rows -> dense} +- op : greater_equal + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : CompareInferMeta + kernel : + func : greater_equal + inplace: (x -> out) + +- op : greater_than + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : CompareInferMeta + kernel : + func : greater_than + inplace: (x -> out) + +- op : hardswish + args : (Tensor x) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : hardswish + backward : hardswish_grad + +- op : hsigmoid_loss + args : (Tensor x, Tensor label, Tensor w, Tensor bias, Tensor path, Tensor code, int num_classes, bool is_sparse) + output : Tensor(out), Tensor(pre_out), Tensor(w_out) + infer_meta : + func : HSigmoidLossInferMeta + optional: path, code, bias + kernel : + func : hsigmoid_loss + data_type : x + backward : hsigmoid_loss_grad + +- op : increment + args : (Tensor x, float value = 1.0) + output : Tensor(out) + infer_meta : + func : IncrementInferMeta + kernel : + func : increment + inplace : (x -> out) + +- op : less_equal + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : CompareInferMeta + kernel : + func : less_equal + inplace: (x -> out) + +- op : less_than + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : CompareInferMeta + kernel : + func : less_than + inplace: (x -> out) + +- op : linspace + args : (Tensor start, Tensor stop, Tensor number, DataType dtype, Place place) + output : Tensor(out) + infer_meta : + func : LinspaceInferMeta + param: [start, stop, number, dtype] + kernel : + func : linspace + param: [start, stop, number, dtype] + data_type : dtype + backend : place + - op : load_combine args : (str file_path, bool load_as_fp16, bool model_from_memory) output : Tensor[](Out) @@ -94,6 +733,84 @@ args : (Tensor[] x) output : Tensor(out) +- op : logspace + args : (Tensor start, Tensor stop, Tensor num, Tensor base, DataType dtype, Place place={}) + output : Tensor(out) + infer_meta: + func : LogspaceInferMeta + param : [start, stop, num, base, dtype] + kernel : + func : logspace + param : [start, stop, num, base, dtype] + data_type : dtype + backend : place + +- op : logsumexp + args : (Tensor x, int64_t[] axis, bool keepdim, bool reduce_all) + output : Tensor(out) + infer_meta : + func : LogsumexpInferMeta + kernel : + func : logsumexp + backward : logsumexp_grad + +- op : matmul + args : (Tensor x, Tensor y, bool transpose_x = false, bool transpose_y = false) + output : Tensor + infer_meta : + func : MatmulInferMeta + spmd_rule : MatmulInferSpmd + kernel : + func : matmul + backward : matmul_grad + +- op : matrix_rank + args : (Tensor x, float tol, bool use_default_tol=true, bool hermitian=false) + output : Tensor(out) + infer_meta : + func : MatrixRankInferMeta + param : [x, use_default_tol, hermitian] + kernel : + func : matrix_rank + +- op : matrix_rank_tol + args : (Tensor x, Tensor atol_tensor, bool use_default_tol=true, bool hermitian=false) + output : Tensor(out) + infer_meta : + func : MatrixRankTolInferMeta + kernel : + func : matrix_rank_tol + +- op : max + args : (Tensor x, IntArray axis={}, bool keepdim=false) + output : Tensor(out) + infer_meta : + func : ReduceIntArrayAxisInferMeta + spmd_rule: ReductionMaxInferSpmdDynamic + kernel : + func : max + backward : max_grad + +- op : maximum + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : ElementwiseInferMeta + spmd_rule : ElementwiseBinaryInferSpmd + kernel : + func : maximum + backward : maximum_grad + +- op : mean + args : (Tensor x, IntArray axis={}, bool keepdim=false) + output : Tensor(out) + infer_meta : + func : ReduceIntArrayAxisInferMeta + spmd_rule : ReductionMeanInferSpmdDynamic + kernel : + func : mean + backward : mean_grad + - op : memcpy args : (Tensor x, int dst_place_type) output : Tensor(out) @@ -104,6 +821,132 @@ func : memcpy param: [x, dst_place_type] +- op : memcpy_d2h + args : (Tensor x, int dst_place_type) + output : Tensor + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : memcpy_d2h + +- op : memcpy_h2d + args : (Tensor x, int dst_place_type) + output : Tensor + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : memcpy_h2d + +- op : min + args : (Tensor x, IntArray axis={}, bool keepdim=false) + output : Tensor(out) + infer_meta : + func : ReduceIntArrayAxisInferMeta + kernel : + func : min + backward : min_grad + +- op : minimum + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : ElementwiseInferMeta + kernel : + func : minimum + backward : minimum_grad + +- op : mish + args : (Tensor x, float lambda) + output : Tensor + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : mish + backward : mish_grad + +- op : multiply + args : (Tensor x, Tensor y) + output : Tensor + infer_meta : + func : ElementwiseInferMeta + spmd_rule : ElementwiseBinaryInferSpmd + kernel : + func : multiply {dense, dense -> dense}, + multiply_sr {selected_rows, dense -> selected_rows} + inplace : (x -> out) + backward : multiply_grad + +- op : norm + args : (Tensor x, int axis, float epsilon, bool is_test) + output : Tensor(out), Tensor(norm) + infer_meta : + func : NormInferMeta + kernel : + func : norm + backward : norm_grad + +- op : not_equal + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : CompareInferMeta + spmd_rule : ElementwiseBinaryInferSpmd + kernel : + func : not_equal + inplace: (x -> out) + +- op : one_hot + args : (Tensor x, Scalar(int) num_classes) + output : Tensor(out) + infer_meta : + func : OneHotInferMeta + kernel : + func : one_hot + +- op : ones + args : (IntArray shape, DataType dtype=DataType::FLOAT32, Place place=CPUPlace()) + output : Tensor(out) + invoke : full(shape, 1, dtype, place) + +- op : ones_like + args : (Tensor x, DataType dtype=DataType::UNDEFINED, Place place={}) + output : Tensor(out) + invoke : full_like(x, 1, dtype, place) + +- op : pad + args : (Tensor x, int[] paddings, Scalar pad_value) + output : Tensor + infer_meta : + func : PadInferMeta + kernel : + func : pad + backward : pad_grad + +- op : pool2d + args : (Tensor x, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm) + output : Tensor(out) + infer_meta : + func : Pool2DInferMeta + param : [x, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm] + kernel : + func : pool2d + param : [x, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm] + backward : pool2d_grad + +- op : pool3d + args : (Tensor x, int[] kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm) + output : Tensor(out) + infer_meta : + func : PoolInferMeta + param : [x, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm] + kernel : + func : pool3d + param : [x, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm] + backward : pool3d_grad + - op : print args : (Tensor in, int first_n, str message, int summarize, bool print_tensor_name = true, bool print_tensor_type = true, bool print_tensor_shape = true, bool print_tensor_layout = true, bool print_tensor_lod = true, str print_phase = "BOTH", bool is_forward = true) output : Tensor(out) @@ -114,6 +957,39 @@ func : print_kernel param: [in, first_n, message, summarize, print_tensor_name, print_tensor_type, print_tensor_shape, print_tensor_layout, print_tensor_lod, print_phase, is_forward] +- op : prod + args : (Tensor x, IntArray dims, bool keep_dim, bool reduce_all) + output : Tensor + infer_meta : + func : ReduceIntArrayAxisInferMetaBase + kernel : + func : prod + backward : prod_grad + +- op : randint + args : (int low, int high, IntArray shape, DataType dtype=DataType::INT64, Place place={}) + output : Tensor(out) + infer_meta : + func : RandintInferMeta + param : [low, high, shape, dtype] + kernel : + func : randint + param : [low, high, shape, dtype] + data_type : dtype + backend : place + +- op : randperm + args : (int n, DataType dtype, Place place={}) + output : Tensor(out) + infer_meta : + func : RandpermInferMeta + param : [n, dtype] + kernel : + func : randperm + param : [n, dtype] + data_type : dtype + backend : place + - op : recv_v2 args : (int[] out_shape = {}, DataType dtype = DataType::FLOAT32, int peer = 0, int ring_id = 0, bool use_calc_stream = false, bool dynamic_shape = false) output : Tensor(out) @@ -125,6 +1001,73 @@ param : [ring_id, dynamic_shape, peer, out_shape, dtype, use_calc_stream] data_type : dtype +- op : remainder + args : (Tensor x, Tensor y) + output : Tensor (out) + infer_meta : + func : ElementwiseInferMeta + kernel : + func : remainder + inplace : (x -> out) + +- op : repeat_interleave + args : (Tensor x, int repeats, int axis) + output : Tensor(out) + infer_meta : + func : RepeatInterleaveInferMeta + kernel : + func : repeat_interleave + data_type : x + backward: repeat_interleave_grad + +- op : repeat_interleave_with_tensor_index + args : (Tensor x, Tensor repeats, int axis) + output : Tensor(out) + infer_meta : + func : RepeatInterleaveWithTensorIndexInferMeta + kernel : + func : repeat_interleave_with_tensor_index + data_type : x + backward: repeat_interleave_with_tensor_index_grad + +- op : reshape + args : (Tensor x, IntArray shape) + output : Tensor(out), Tensor(xshape) + infer_meta : + func : ReshapeWithXShapeInferMeta + kernel : + func : reshape + inplace : (x -> out) + view: (x -> out) + intermediate : xshape + backward: reshape_grad + +- op : rnn + args: (Tensor x, Tensor[] pre_state, Tensor[] weight_list, Tensor sequence_length, Tensor dropout_state_in, float dropout_prob=0.0, bool is_bidirec=false, int input_size=10, int hidden_size=100, int num_layers=1, str mode="RNN_TANH", int seed=0, bool is_test=false) + output: Tensor(out), Tensor(dropout_state_out), Tensor[](state){pre_state.size()}, Tensor(reserve) + infer_meta: + func: RnnInferMeta + param : [x, pre_state, weight_list, sequence_length, dropout_prob, is_bidirec, input_size, hidden_size, num_layers, mode, seed, is_test] + kernel: + func: rnn + param : [x, pre_state, weight_list, sequence_length, dropout_prob, is_bidirec, input_size, hidden_size, num_layers, mode, seed, is_test] + data_type: x + backward: rnn_grad + optional : sequence_length + intermediate : reserve + view : (dropout_state_in -> dropout_state_out) + +- op : rrelu + args : (Tensor x, float lower, float upper, bool is_test) + output : Tensor(out), Tensor(noise) + infer_meta : + func : RReluInferMeta + kernel : + func : rrelu + data_type : x + intermediate : noise + backward : rrelu_grad + - op : save_combine args : (Tensor[] x, str file_path, bool overwrite, bool save_as_fp16, bool save_to_memory) output : Tensor(out) @@ -152,6 +1095,28 @@ func : send_v2 param : [x, ring_id, dynamic_shape, peer, use_calc_stream] +- op : set_value + args : (Tensor x, IntArray starts, IntArray ends, IntArray steps, int64_t[] axes, int64_t[] decrease_axes, int64_t[] none_axes, int64_t[] shape, Scalar[] values) + output : Tensor(out) + inplace: (x -> out) + infer_meta : + func : SetValueInferMeta + param : [x] + kernel : + func : set_value + backward: set_value_grad + +- op : set_value_with_tensor + args : (Tensor x, Tensor values, IntArray starts, IntArray ends, IntArray steps, int64_t[] axes, int64_t[] decrease_axes, int64_t[] none_axes) + output : Tensor(out) + inplace: (x -> out) + infer_meta: + func: SetValueInferMeta + param: [x] + kernel: + func: set_value_with_tensor + backward: set_value_with_tensor_grad + - op : shadow_feed args : (Tensor x) output : Tensor(out) @@ -169,6 +1134,198 @@ func: share_data param: [x] +- op : slice + args : (Tensor input, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) + output : Tensor + infer_meta : + func : SliceRawInferMeta + spmd_rule : SliceInferSpmdDynamic + kernel : + func : slice + backward : slice_grad + +- op : softmax + args : (Tensor x, int axis) + output : Tensor(out) + infer_meta : + func : SoftmaxInferMeta + kernel : + func : softmax + inplace : (x -> out) + backward : softmax_grad + +- op : split + args : (Tensor x, IntArray sections, Scalar(int) axis) + output : Tensor[]{sections.size()} + infer_meta : + func : SplitInferMeta + kernel : + func : split + backward : split_grad + +- op : split_with_num + args : (Tensor x, int num, Scalar(int) axis) + output : Tensor[]{num} + infer_meta : + func : SplitWithNumInferMeta + spmd_rule : SplitWithNumInferSpmdDynamic + kernel : + func : split_with_num + backward : split_with_num_grad + +- op : strided_slice + args : (Tensor x, int[] axes, IntArray starts, IntArray ends, IntArray strides) + output : Tensor + infer_meta : + func : StridedSliceInferMeta + spmd_rule : StridedSliceInferSpmdDynamic + kernel : + func : strided_slice + backward : strided_slice_grad + +- op : subtract + args : (Tensor x, Tensor y) + output : Tensor(out) + infer_meta : + func : ElementwiseInferMeta + spmd_rule : ElementwiseBinaryInferSpmd + kernel : + func : subtract + inplace : (x -> out) + backward : subtract_grad + +- op : sum + args : (Tensor x, IntArray axis={}, DataType dtype=DataType::UNDEFINED, bool keepdim=false) + output : Tensor(out) + infer_meta : + func : SumInferMeta + spmd_rule : ReductionSumInferSpmdDynamic + kernel : + func : sum + data_type : x + backward : sum_grad + +- op : swish + args : (Tensor x) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : swish + backward : swish_grad + +- op : sync_batch_norm_ + args : (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics) + output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) + infer_meta : + func : BatchNormInferMeta + kernel : + func : sync_batch_norm + data_type : x + backward : sync_batch_norm_grad + inplace : (mean -> mean_out), (variance -> variance_out) + optional : reserve_space + +- op : tile + args : (Tensor x, IntArray repeat_times = {}) + output : Tensor(out) + infer_meta : + func : TileInferMeta + kernel : + func : tile + backward : tile_grad + +- op : trans_layout + args : (Tensor x, int[] perm) + output : Tensor + infer_meta : + func : TransposeInferMeta + kernel : + func : transpose + backward : trans_layout_grad + +- op : transpose + args : (Tensor x, int[] perm) + output : Tensor(out) + infer_meta : + func : TransposeInferMeta + spmd_rule: TransposeInferSpmd + kernel : + func : transpose + inplace : (x -> out) + backward : transpose_grad + +- op : tril + args : (Tensor x, int diagonal) + output : Tensor(out) + infer_meta : + func : TrilInferMeta + kernel : + func : tril + inplace: (x -> out) + backward : tril_grad + +- op : tril_indices + args : (int rows, int cols, int offset, DataType dtype, Place place={}) + output : Tensor(out) + infer_meta : + func : TrilIndicesInferMeta + param : [rows, cols, offset, dtype] + kernel : + func : tril_indices + param : [rows, cols, offset, dtype] + data_type : dtype + backend : place + +- op : triu + args : (Tensor x, int diagonal) + output : Tensor(out) + infer_meta : + func : TriuInferMeta + spmd_rule : TriuInferSpmd + kernel : + func : triu + inplace: (x -> out) + backward : triu_grad + +- op : triu_indices + args : (int row, int col, int offset, DataType dtype, Place place={}) + output : Tensor(out) + infer_meta : + func : TriuIndicesInferMeta + param : [row, col, offset, dtype] + kernel : + func : triu_indices + param : [row, col, offset, dtype] + data_type : dtype + backend : place + +# python API: paddle.nn.initializer.TruncatedNormal +- op : truncated_gaussian_random + args : (int[] shape, float mean, float std, int seed, DataType dtype=DataType::FLOAT32, Place place={}) + output : Tensor(out) + infer_meta : + func : TruncatedGaussianRandomInferMeta + param : [shape, mean, std, seed, dtype] + kernel : + func : truncated_gaussian_random + param : [shape, mean, std, seed, dtype] + backend : place + data_type : dtype + +- op : uniform + args : (IntArray shape, DataType dtype, Scalar min, Scalar max, int seed, Place place={}) + output : Tensor(out) + infer_meta : + func : UniformRandomInferMeta + param: [shape, dtype] + kernel : + func : uniform + param: [shape, dtype, min, max, seed] + data_type : dtype + backend : place + - op : uniform_random_batch_size_like args : (Tensor input, int[] shape, int input_dim_idx=0, int output_dim_idx=0, float min=-1.0f, float max=1.0f, int seed=0, int diag_num=0, int diag_step=0, float diag_val=1.0f, DataType dtype=DataType::FLOAT32) output : Tensor(out) @@ -179,10 +1336,41 @@ func : uniform_random_batch_size_like data_type : dtype +- op : unique + args : (Tensor x, bool return_index=false, bool return_inverse=false, bool return_counts=false, int[] axis={}, DataType dtype=DataType::INT64, bool is_sorted=false) + output : Tensor(out), Tensor(indices), Tensor(inverse), Tensor(counts) + optional : indices, counts + infer_meta : + func : UniqueRawInferMeta + kernel : + func : unique + data_type : x + interfaces : paddle::dialect::ParseKernelKeyInterface + +- op : unpool + args: (Tensor x, Tensor indices, int[] ksize, int[] strides, int[] padding, IntArray output_size, str data_format) + output: Tensor(out) + infer_meta: + func: UnpoolInferMeta + kernel: + func: unpool + data_type: x + backward: unpool_grad + - op : write_to_array args : (Tensor i, Tensor x) output : Tensor[](out) +- op : zeros + args : (IntArray shape, DataType dtype=DataType::FLOAT32, Place place=CPUPlace()) + output : Tensor(out) + invoke : full(shape, 0, dtype, place) + +- op : zeros_like + args : (Tensor x, DataType dtype=DataType::UNDEFINED, Place place = {}) + output : Tensor(out) + invoke : full_like(x, 0, dtype, place) + - op: dpsgd args: (Tensor param, Tensor grad, Tensor learning_rate, float clip = 10.0f, float batch_size = 16.0f, float sigma = 1.0f, int seed = 0) output: Tensor(param_out) diff --git a/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml b/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml index c5178b21b4964c..1ae80dbc8b8fc0 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml @@ -1,3 +1,270 @@ +- backward_op : add_double_grad + forward : add_grad (Tensor x, Tensor y, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y) + args : (Tensor y, Tensor grad_out, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1) + output : Tensor(grad_out_grad) + infer_meta : + func : UnchangedInferMeta + param : [grad_out] + kernel : + func : add_double_grad + optional : grad_x_grad, grad_y_grad + backward : add_triple_grad + inplace : (grad_x_grad -> grad_out_grad) + composite : add_double_grad(y, grad_out, grad_x_grad, grad_y_grad, axis, grad_out_grad) + +- backward_op : add_grad + forward : add (Tensor x, Tensor y) -> Tensor(out) + args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1) + output : Tensor(x_grad), Tensor(y_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [x, y] + spmd_rule : ElementwiseBinaryGradInferSpmd + kernel : + func : add_grad + no_need_buffer : x, y + composite : add_grad(x, y, out_grad, axis, x_grad, y_grad) + backward : add_double_grad + inplace : (out_grad -> x_grad) + +- backward_op : add_triple_grad + forward : add_double_grad (Tensor y, Tensor grad_out, Tensor grad_grad_x, Tensor grad_grad_y, int axis = -1) -> Tensor(grad_grad_out) + args : (Tensor grad_grad_x, Tensor grad_grad_y, Tensor grad_grad_out_grad, int axis = -1) + output : Tensor(grad_grad_x_grad), Tensor(grad_grad_y_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [grad_grad_x, grad_grad_y] + kernel : + func : add_triple_grad + inplace : (grad_grad_out_grad -> grad_grad_x_grad) + composite : add_triple_grad (grad_grad_x, grad_grad_y, grad_grad_out_grad, axis, grad_grad_x_grad, grad_grad_y_grad ) + +- backward_op : amax_grad + forward: amax (Tensor x, int64_t[] axis={}, bool keepdim=false) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, int64_t[] axis={}, bool keepdim=false, bool reduce_all=false) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : amax_grad + +- backward_op : amin_grad + forward: amin (Tensor x, int64_t[] axis={}, bool keepdim=false) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, int64_t[] axis={}, bool keepdim=false, bool reduce_all=false) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : amin_grad + +- backward_op : assign_grad + forward : assign (Tensor x) -> Tensor(out) + args : (Tensor out_grad) + output : Tensor(x_grad) + composite: assign_grad(out_grad, x_grad) + invoke : assign(out_grad) + +- backward_op : assign_out__grad + forward : assign_out_ (Tensor x, Tensor output) -> Tensor(out) + args : (Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + kernel : + func : assign + inplace : (out_grad -> x_grad) + +- backward_op : batch_norm_double_grad + forward : batch_norm_grad (Tensor x, Tensor scale, Tensor bias, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor grad_out, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics) -> Tensor(grad_x), Tensor(grad_scale), Tensor(grad_bias) + args : (Tensor x, Tensor scale, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor grad_out, Tensor grad_x_grad, Tensor grad_scale_grad, Tensor grad_bias_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics) + output : Tensor(x_grad), Tensor(scale_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param : [x, scale, x] + kernel : + func : batch_norm_double_grad + data_type : x + optional : scale, out_mean, out_variance, grad_x_grad, grad_scale_grad, grad_bias_grad + inplace : (grad_out -> grad_out_grad) + +- backward_op : batch_norm_grad + forward : batch_norm (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) + args : (Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics) + output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param : [x, scale, bias] + kernel : + func : batch_norm_grad + data_type : out_grad + optional : scale, bias, mean_out, variance_out, reserve_space + composite: batch_norm_grad(x, scale, bias, mean_out, variance_out, saved_mean, saved_variance, reserve_space, out_grad, momentum, epsilon, data_layout, is_test, use_global_stats, trainable_statistics) + backward : batch_norm_double_grad + +- backward_op : c_embedding_grad + forward : c_embedding (Tensor weight, Tensor x, int64_t start_index=0) -> Tensor(out) + args : (Tensor weight, Tensor x, Tensor out_grad, int64_t start_index=0) + output : Tensor(weight_grad) + infer_meta : + func : EmbeddingGradInferMeta + param : [x, weight] + kernel : + func : c_embedding_grad + no_need_buffer : weight + +- backward_op : cast_grad + forward : cast (Tensor x, DataType dtype) -> Tensor(out) + args : (Tensor x, Tensor out_grad) + output : Tensor(x_grad) + invoke : cast (out_grad, x.dtype()) + composite: cast_grad(x, out_grad, x_grad) + no_need_buffer : x + +- backward_op : channel_shuffle_grad + forward : channel_shuffle (Tensor x, int groups, str data_format="NCHW") -> Tensor(out) + args : (Tensor out_grad, int groups, str data_format="NCHW") + output : Tensor(x_grad) + infer_meta : + func : ChannelShuffleGradInferMeta + kernel : + func : channel_shuffle_grad + +- backward_op : conv2d_transpose_double_grad + forward : conv2d_transpose_grad(Tensor x, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_x), Tensor(grad_filter) + args : (Tensor x, Tensor filter, Tensor grad_out, Tensor grad_x_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) + output : Tensor(x_grad), Tensor(filter_grad), Tensor(grad_out_grad) + infer_meta : + func : Conv2dTransposeDoubleGradInferMeta + kernel : + func : conv2d_transpose_double_grad + data_type : x + +- backward_op : conv2d_transpose_grad + forward : conv2d_transpose(Tensor x, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, int[] output_padding={}, IntArray output_size={}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1}, str data_format="NCHW") -> Tensor(out) + args : (Tensor x, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) + output : Tensor(x_grad), Tensor(filter_grad) + infer_meta : + func : Conv2dTransposeGradInferMeta + kernel : + func : conv2d_transpose_grad + data_type : x + backward : conv2d_transpose_double_grad + +- backward_op : deformable_conv_grad + forward : deformable_conv(Tensor x, Tensor offset, Tensor filter, Tensor mask, int[] strides, int[] paddings, int[] dilations, int deformable_groups, int groups, int im2col_step) -> Tensor(out) + args : (Tensor x, Tensor offset, Tensor filter, Tensor mask, Tensor out_grad, int[] strides, int[] paddings, int[] dilations, int deformable_groups, int groups, int im2col_step) + output : Tensor(x_grad), Tensor(offset_grad), Tensor(filter_grad), Tensor(mask_grad) + infer_meta : + func : DeformableConvGradInferMeta + kernel : + func : deformable_conv_grad + data_type : x + optional : mask + +- backward_op : depthwise_conv2d_transpose_grad + forward : depthwise_conv2d_transpose(Tensor x, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, int[] output_padding={}, IntArray output_size={}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1}, str data_format="NCHW") -> Tensor(out) + args : (Tensor x, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) + output : Tensor(x_grad), Tensor(filter_grad) + infer_meta : + func : Conv2dTransposeGradInferMeta + kernel : + func : depthwise_conv2d_transpose_grad + data_type : x + +- backward_op : divide_double_grad + forward : divide_grad (Tensor x, Tensor y, Tensor out, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y) + args : (Tensor y, Tensor out, Tensor grad_x, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1) + output : Tensor(y_grad), Tensor(out_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param : [y, grad_x, grad_x] + kernel : + func : divide_double_grad + data_type : out + optional : grad_x_grad, grad_y_grad + inplace : (grad_x_grad -> grad_out_grad) + +- backward_op : divide_grad + forward : divide (Tensor x, Tensor y) -> Tensor(out) + args : (Tensor x, Tensor y, Tensor out, Tensor out_grad, int axis = -1) + output : Tensor(x_grad), Tensor(y_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [x, y] + spmd_rule : ElementwiseBinaryGradInferSpmd + kernel : + func : divide_grad + composite : divide_grad(x, y, out, out_grad, axis, x_grad, y_grad) + backward : divide_double_grad + +- backward_op : dropout_grad + forward : dropout (Tensor x, Tensor seed_tensor, Scalar p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask) + args : (Tensor mask, Tensor out_grad, Scalar p, bool is_test, str mode) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [out_grad] + kernel : + func : dropout_grad + composite : dropout_grad(mask, out_grad, p, is_test, mode, x_grad) + +- backward_op : einsum_grad + forward : einsum (Tensor[] x, str equation) -> Tensor(out), Tensor[](inner_cache), Tensor[](x_shape) + args : (Tensor[] x_shape, Tensor[] inner_cache, Tensor out_grad, str equation) + output : Tensor[](x_grad){x_shape.size()} + infer_meta : + func : UnchangedMultiInferMeta + param : [x_shape] + kernel : + func : einsum_grad + +- backward_op : elementwise_pow_grad + forward : elementwise_pow(Tensor x, Tensor y) -> Tensor(out) + args : (Tensor x, Tensor y, Tensor out_grad) + output : Tensor(x_grad), Tensor(y_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param: [x, y] + spmd_rule : ElementwiseBinaryGradInferSpmd + composite : elementwise_pow_grad(x, y, out_grad, x_grad, y_grad) + kernel : + func : elementwise_pow_grad + +- backward_op : embedding_grad + forward : embedding (Tensor x, Tensor weight, int64_t padding_idx=-1) -> Tensor(out) + args : (Tensor x, Tensor weight, Tensor out_grad, int64_t padding_idx=-1) + output : Tensor(weight_grad) + infer_meta : + func : EmbeddingGradSparseInferMeta + param : [x,weight] + kernel : + func : embedding_grad {dense, dense, dense -> dense} + embedding_sparse_grad {dense, dense, dense -> selected_rows} + sparse_weight_embedding_grad {selected_rows, dense, dense -> dense} + sparse_weight_embedding_sparse_grad {selected_rows, dense, dense -> selected_rows} + data_type : out_grad + no_need_buffer : weight + +- backward_op : exponential__grad + forward : exponential_ (Tensor x, float lam) -> Tensor(out) + args : (Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + invoke : zeros_like(out_grad) + +- backward_op : frobenius_norm_grad + forward : frobenius_norm(Tensor x, IntArray axis, bool keep_dim, bool reduce_all) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, IntArray axis, bool keep_dim, bool reduce_all) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : frobenius_norm_grad + - backward_op : fused_attention_grad args : (Tensor out_grad, Tensor x, Tensor qkv_weight, Tensor qkv_bias, Tensor qkv_bias_out, Tensor src_mask, Tensor src_mask_out, Tensor out_linear_weight, Tensor out_linear_bias, Tensor ln_scale, Tensor ln_bias, Tensor ln_scale_2, Tensor ln_bias_2, Tensor ln_out, Tensor ln_mean, Tensor ln_var, Tensor ln_mean_2, Tensor ln_var_2, Tensor bias_dropout_residual_out, Tensor qkv_out, Tensor transpose_out_2, Tensor qk_out, Tensor qktv_out, Tensor softmax_out, Tensor attn_dropout_mask_out, Tensor attn_dropout_out, Tensor fmha_out, Tensor out_linear_out, Tensor dropout_mask_out, int num_heads, bool transpose_qkv_wb, bool pre_layer_norm, float epsilon, float attn_dropout_rate, bool is_test, bool attn_dropout_fix_seed, int attn_dropout_seed, str attn_dropout_implementation, float dropout_rate, bool dropout_fix_seed, int dropout_seed, str dropout_implementation, float ln_epsilon, bool add_residual, int ring_id) output : Tensor(qkv_bias_grad), Tensor(qkv_bias_out_grad), Tensor(src_mask_out_grad), Tensor(out_linear_bias_grad), Tensor(ln_scale_grad), Tensor(ln_bias_grad), Tensor(ln_scale_2_grad), Tensor(ln_bias_2_grad), Tensor(x_grad), Tensor(qkv_weight_grad), Tensor(out_linear_weight_grad), Tensor(ln_out_grad), Tensor(bias_dropout_residual_out_grad), Tensor(qkv_out_grad), Tensor(qktv_out_grad), Tensor(transpose_out_2_grad), Tensor(qk_out_grad), Tensor(softmax_out_grad), Tensor(attn_dropout_out_grad), Tensor(fmha_out_grad), Tensor(out_linear_out_grad) @@ -9,6 +276,30 @@ optional: ln_scale, ln_bias, qkv_bias, src_mask, out_linear_bias, ln_scale_2, ln_bias_2, qkv_bias_grad, qkv_bias_out_grad, src_mask_out_grad, out_linear_bias_grad, ln_scale_grad, ln_bias_grad, ln_scale_2_grad, ln_bias_2_grad, ln_out_grad, bias_dropout_residual_out_grad, ln_out, ln_mean, ln_var, ln_mean_2, ln_var_2, bias_dropout_residual_out, qkv_bias, qkv_bias_out, src_mask, src_mask_out, out_linear_bias no_need_buffer: qkv_bias_out, qkv_out, qk_out, qktv_out, out_linear_out, src_mask +- backward_op : fused_batch_norm_act_grad + forward : fused_batch_norm_act (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) + args : (Tensor x, Tensor scale, Tensor bias, Tensor out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str act_type) + output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param : [x, scale, bias] + kernel : + func : fused_batch_norm_act_grad + data_type : out_grad + optional : reserve_space + +- backward_op : fused_bn_add_activation_grad + forward : fused_bn_add_activation (Tensor x, Tensor z, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) + args : (Tensor x, Tensor scale, Tensor bias, Tensor out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str act_type) + output : Tensor(x_grad), Tensor(z_grad), Tensor(scale_grad), Tensor(bias_grad) + infer_meta : + func : GeneralQuaternaryGradInferMeta + param : [x, x, scale, bias] + kernel : + func : fused_bn_add_activation_grad + data_type : out_grad + optional : reserve_space + - backward_op : fused_feedforward_grad args : (Tensor out_grad, Tensor x, Tensor linear1_weight, Tensor linear1_bias, Tensor linear2_weight, Tensor dropout1_mask, Tensor dropout2_mask, Tensor linear1_out, Tensor dropout1_out, Tensor dropout2_out, Tensor ln1_scale, Tensor ln1_bias, Tensor ln1_out, Tensor ln1_mean, Tensor ln1_variance, Tensor ln2_scale, Tensor ln2_bias, Tensor ln2_mean, Tensor ln2_variance, Tensor linear2_bias, bool pre_layer_norm, float ln1_epsilon, float ln2_epsilon, str act_method, float dropout1_prob, float dropout2_prob, str dropout1_implementation, str dropout2_implementation, bool is_test, bool dropout1_fix_seed, bool dropout2_fix_seed, int dropout1_seed_val, int dropout2_seed_val, bool add_residual, int ring_id) output : Tensor(x_grad), Tensor(ln1_scale_grad), Tensor(ln1_bias_grad), Tensor(ln2_scale_grad), Tensor(ln2_bias_grad), Tensor(linear1_weight_grad), Tensor(linear1_bias_grad), Tensor(linear2_weight_grad), Tensor(linear2_bias_grad) @@ -18,6 +309,577 @@ func: fused_feedforward_grad optional: linear1_bias, linear2_bias, ln1_scale, ln1_bias, ln1_out, ln1_mean, ln1_variance, ln2_scale, ln2_bias, ln2_mean, ln2_variance, dropout2_out, ln1_scale_grad, ln1_bias_grad, ln2_scale_grad, ln2_bias_grad, linear2_bias_grad +- backward_op : fused_softmax_mask_upper_triangle_grad + forward : fused_softmax_mask_upper_triangle(Tensor X) -> Tensor(Out) + args: (Tensor Out, Tensor Out_grad) + output : Tensor(X_grad) + infer_meta : + func : UnchangedInferMeta + param : [Out_grad] + kernel: + func : fused_softmax_mask_upper_triangle_grad + +- backward_op : hardswish_grad + forward : hardswish (Tensor x) -> Tensor(out) + args : (Tensor x, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : hardswish_grad + inplace : (out_grad -> x_grad) + +- backward_op : hsigmoid_loss_grad + forward : hsigmoid_loss (Tensor x, Tensor label, Tensor w, Tensor bias, Tensor path, Tensor code, int num_classes, bool is_sparse) -> Tensor(out), Tensor(pre_out), Tensor(w_out) + args : (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, Tensor pre_out, Tensor out_grad, int num_classes, bool is_sparse) + output : Tensor(x_grad), Tensor(w_grad), Tensor(bias_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param : [x ,w, bias] + optional: path, code, bias + kernel : + func : hsigmoid_loss_grad + +- backward_op : logsumexp_grad + forward : logsumexp(Tensor x, int64_t[] axis, bool keepdim, bool reduce_all) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, int64_t[] axis, bool keepdim, bool reduce_all) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : logsumexp_grad + +- backward_op : matmul_double_grad + forward : matmul_grad (Tensor x, Tensor y, Tensor grad_out, bool transpose_x=false, bool transpose_y=false) -> Tensor(grad_x), Tensor(grad_y) + args : (Tensor x, Tensor y, Tensor grad_out, Tensor grad_x_grad, Tensor grad_y_grad, bool transpose_x=false, bool transpose_y=false) + output : Tensor(x_grad), Tensor(y_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param : [x, y, grad_out] + kernel : + func : matmul_double_grad + composite : matmul_double_grad(x, y, grad_out, grad_x_grad, grad_y_grad, transpose_x=false, transpose_y=false) + optional : grad_x_grad, grad_y_grad + +- backward_op : matmul_grad + forward : matmul (Tensor x, Tensor y, bool transpose_x=false, bool transpose_y=false) -> Tensor(out) + args : (Tensor x, Tensor y, Tensor out_grad, bool transpose_x=false, bool transpose_y=false) + output : Tensor(x_grad), Tensor(y_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [x, y] + spmd_rule : MatmulGradInferSpmd + kernel : + func : matmul_grad + backward : matmul_double_grad + +- backward_op : max_grad + forward: max (Tensor x, IntArray axis={}, bool keepdim=false) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, IntArray axis={}, bool keepdim=false, bool reduce_all=false) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + spmd_rule : ReductionGradInferSpmd + kernel : + func : max_grad + composite : max_grad(x, out, out_grad, axis, keepdim, reduce_all, x_grad) + +- backward_op : maximum_grad + forward : maximum(Tensor x, Tensor y) -> Tensor(out) + args : (Tensor x, Tensor y, Tensor out_grad) + output : Tensor(x_grad), Tensor(y_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param: [x, y] + spmd_rule: ElementwiseBinaryGradInferSpmd + kernel : + func : maximum_grad + composite : maximum_grad(x, y, out_grad, x_grad, y_grad) + +- backward_op : mean_double_grad + forward: mean_grad (Tensor x, Tensor grad_out, IntArray axis={}, bool keepdim=false, bool reduce_all = false) -> Tensor(grad_x) + args : (Tensor grad_x_grad, IntArray axis={}, bool keepdim=false) + output : Tensor(grad_out_grad) + invoke : mean(grad_x_grad, axis, keepdim) + +- backward_op : mean_grad + forward: mean (Tensor x, IntArray axis={}, bool keepdim=false) -> Tensor(out) + args : (Tensor x, Tensor out_grad, IntArray axis={}, bool keepdim=false, bool reduce_all=false) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + spmd_rule : ReductionGradInferSpmd + kernel : + func : mean_grad + backward : mean_double_grad + no_need_buffer : x + +- backward_op : min_grad + forward: min (Tensor x, IntArray axis={}, bool keepdim=false) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, IntArray axis={}, bool keepdim=false, bool reduce_all=false) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : min_grad + +- backward_op : minimum_grad + forward : minimum(Tensor x, Tensor y) -> Tensor(out) + args : (Tensor x, Tensor y, Tensor out_grad) + output : Tensor(x_grad), Tensor(y_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param: [x, y] + kernel : + func : minimum_grad + composite : minimum_grad(x, y, out_grad, axis, x_grad, y_grad) + +- backward_op : mish_grad + forward : mish (Tensor x, float lambda) -> Tensor(out) + args : (Tensor x, Tensor out_grad, float lambda) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : mish_grad + inplace : (out_grad -> x_grad) + +- backward_op : multiply_double_grad + forward : multiply_grad (Tensor x, Tensor y, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y) + args : (Tensor x, Tensor y, Tensor grad_out, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1) + output : Tensor(x_grad), Tensor(y_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param : [x, y, grad_out] + kernel : + func : multiply_double_grad + optional : grad_x_grad, grad_y_grad + inplace : (grad_x_grad -> grad_out_grad) + backward : multiply_triple_grad + composite : multiply_double_grad(x, y, grad_out, grad_x_grad, grad_y_grad, axis, x_grad, y_grad, grad_out_grad) + +- backward_op : multiply_grad + forward : multiply (Tensor x, Tensor y) -> Tensor(out) + args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1) + output : Tensor(x_grad), Tensor(y_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [x, y] + spmd_rule : ElementwiseBinaryGradInferSpmd + kernel : + func : multiply_grad + composite: multiply_grad(x, y, out_grad, axis, x_grad, y_grad) + backward : multiply_double_grad + +- backward_op : multiply_triple_grad + forward : multiply_double_grad (Tensor x, Tensor y, Tensor fwd_grad_out, Tensor fwd_grad_grad_x, Tensor fwd_grad_grad_y, int aixs = -1) -> Tensor(grad_x), Tensor(grad_y), Tensor(grad_grad_out) + args : (Tensor x, Tensor y, Tensor fwd_grad_out, Tensor fwd_grad_grad_x, Tensor fwd_grad_grad_y, Tensor grad_x_grad, Tensor grad_y_grad, Tensor grad_grad_out_grad, int axis = -1) + output : Tensor(x_grad), Tensor(y_grad), Tensor(fwd_grad_out_grad), Tensor(fwd_grad_grad_x_grad), Tensor(fwd_grad_grad_y_grad) + infer_meta : + func : GeneralQuinaryGradInferMeta + param : [x, y, fwd_grad_out, fwd_grad_grad_x, fwd_grad_grad_y] + kernel : + func : multiply_triple_grad + optional : fwd_grad_grad_x, fwd_grad_grad_y, grad_x_grad, grad_y_grad, grad_grad_out_grad + +- backward_op : norm_grad + forward : norm (Tensor x, int axis, float epsilon, bool is_test) -> Tensor(out), Tensor(norm) + args : (Tensor x, Tensor norm, Tensor out_grad, int axis, float epsilon, bool is_test) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : norm_grad + +- backward_op : pad_double_grad + forward : pad_grad(Tensor x, Tensor grad_out, int[] paddings, Scalar pad_value) -> Tensor(grad_x) + args : (Tensor grad_x_grad, int[] paddings, Scalar pad_value) + output : Tensor(grad_out_grad) + infer_meta : + func : PadInferMeta + kernel : + func : pad + +- backward_op : pad_grad + forward : pad(Tensor x, int[] paddings, Scalar pad_value) -> Tensor(out) + args : (Tensor x, Tensor out_grad, int[] paddings, Scalar pad_value) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : pad_grad + param: [out_grad, paddings, pad_value] + no_need_buffer : x + composite : pad_grad(x, out_grad, paddings, pad_value, x_grad) + backward : pad_double_grad + +- backward_op : pool2d_double_grad + forward : pool2d_grad(Tensor x, Tensor out, Tensor grad_out, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm) -> Tensor(grad_x) + args : (Tensor x, Tensor grad_x_grad, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm) + output : Tensor(grad_out_grad) + infer_meta : + func : Pool2DInferMeta + param : [grad_x_grad, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm] + kernel : + func : pool2d_double_grad + param : [grad_x_grad, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm] + no_need_buffer : x + +- backward_op : pool2d_grad + forward : pool2d(Tensor x, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : pool2d_grad + param : [x, out, out_grad, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm] + backward : pool2d_double_grad + +- backward_op : pool3d_grad + forward : pool3d(Tensor x, int[] kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, int[] kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : pool3d_grad + param : [x, out, out_grad, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm] + +- backward_op : prod_grad + forward : prod (Tensor x, IntArray dims, bool keep_dim, bool reduce_all) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, IntArray dims, bool keep_dim, bool reduce_all) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : prod_grad + composite: prod_grad(x, out, out_grad, dims, keep_dim, reduce_all, x_grad) + +- backward_op : repeat_interleave_grad + forward : repeat_interleave(Tensor x, int repeats, int axis) -> Tensor(out) + args : (Tensor x, Tensor out_grad, int repeats, int axis) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : repeat_interleave_grad + +- backward_op : repeat_interleave_with_tensor_index_grad + forward : repeat_interleave_with_tensor_index(Tensor x, Tensor repeats, int axis) -> Tensor(out) + args : (Tensor x, Tensor repeats, Tensor out_grad, int axis) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : repeat_interleave_with_tensor_index_grad + data_type : x + +- backward_op : reshape_double_grad + forward : reshape_grad (Tensor xshape, Tensor grad_out) -> Tensor(grad_x) + args : (Tensor grad_out, Tensor grad_x_grad) + output : Tensor(grad_out_grad) + infer_meta : + func : UnchangedInferMeta + param : [grad_out] + kernel : + func : reshape_double_grad + no_need_buffer : grad_out + inplace : (grad_x_grad -> grad_out_grad) + +- backward_op : reshape_grad + forward : reshape (Tensor x, IntArray shape) -> Tensor(out), Tensor(xshape) + args : (Tensor xshape, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : KernelWithXShapeInferMeta + param : [xshape, out_grad] + kernel : + func : reshape_grad + param : [out_grad] + data_type: out_grad + backend: out_grad + layout: out_grad + backward : reshape_double_grad + inplace : (out_grad -> x_grad) + +- backward_op : rnn_grad + forward : rnn (Tensor x, Tensor[] pre_state, Tensor[] weight_list, Tensor sequence_length, Tensor dropout_state_in, float dropout_prob, bool is_bidirec, int input_size, int hidden_size, int num_layers, str mode, int seed, bool is_test) -> Tensor(out), Tensor(dropout_state_out), Tensor[](state), Tensor(reserve) + args : (Tensor x, Tensor[] pre_state, Tensor[] weight_list, Tensor sequence_length, Tensor out, Tensor dropout_state_out, Tensor reserve, Tensor out_grad, Tensor[] state_grad, float dropout_prob, bool is_bidirec, int input_size, int hidden_size, int num_layers, str mode, int seed, bool is_test) + output : Tensor(x_grad), Tensor[](pre_state_grad){pre_state.size()}, Tensor[](weight_list_grad){weight_list.size()} + infer_meta : + func : RnnGradInferMeta + param : [x, pre_state, weight_list] + kernel : + func : rnn_grad + data_type: out_grad + optional : sequence_length + +- backward_op : rrelu_grad + forward : rrelu (Tensor x, float lower, float upper, bool is_test) -> Tensor(out), Tensor(noise) + args : (Tensor x, Tensor noise, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : RReluGradInferMeta + param : [out_grad, noise] + kernel : + func : rrelu_grad + data_type : x + +- backward_op : set_value_grad + forward : set_value (Tensor x, IntArray starts, IntArray ends, IntArray steps, int64_t[] axes, int64_t[] decrease_axes, int64_t[] none_axes, int64_t[] shape, Scalar[] values) -> Tensor(out) + args : (Tensor out_grad) + output : Tensor(x_grad) + infer_meta: + func: UnchangedInferMeta + param: [out_grad] + kernel: + func: assign + param: [out_grad] + +- backward_op : set_value_with_tensor_grad + forward: set_value_with_tensor (Tensor x, Tensor values, IntArray starts, IntArray ends, IntArray steps, int64_t[] axes, int64_t[] decrease_axes, int64_t[] none_axes) -> Tensor(out) + args : (Tensor values,Tensor out_grad, IntArray starts, IntArray ends, IntArray steps, int64_t[] axes, int64_t[] decrease_axes, int64_t[] none_axes) + output : Tensor(x_grad), Tensor(values_grad) + infer_meta: + func: SetValueGradInferMeta + param: [out_grad, values] + kernel: + func: set_value_grad + param: [out_grad, starts, ends, steps, axes, decrease_axes, none_axes] + +- backward_op : slice_double_grad + forward : slice_grad (Tensor input, Tensor grad_out, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) -> Tensor(grad_input) + args : (Tensor grad_input_grad, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) + output : Tensor(grad_out_grad) + invoke : slice(grad_input_grad, axes, starts, ends, infer_flags, decrease_axis) + +- backward_op : slice_grad + forward : slice (Tensor input, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) -> Tensor(out) + args : (Tensor input, Tensor out_grad, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) + output : Tensor(input_grad) + infer_meta : + func : UnchangedInferMeta + param : [input] + spmd_rule: SliceGradInferSpmdDynamic + kernel : + func : slice_grad + composite: slice_grad(input, out_grad, axes, starts, ends, infer_flags, decrease_axis, input_grad) + backward : slice_double_grad + no_need_buffer : input + +- backward_op : softmax_grad + forward : softmax (Tensor x, int axis) -> Tensor(out) + args : (Tensor out, Tensor out_grad, int axis) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [out] + kernel : + func : softmax_grad + composite : softmax_grad(out, out_grad, axis, x_grad) + +- backward_op : split_grad + forward : split (Tensor x, IntArray num_or_sections, Scalar axis) -> Tensor[](out) + args : (Tensor[] out_grad, Scalar axis = -1) + output : Tensor(x_grad) + invoke : concat( out_grad, axis) + composite : split_grad(out_grad, axis, x_grad) + +- backward_op : split_with_num_grad + forward : split_with_num (Tensor x, int num, Scalar axis) -> Tensor[](out) + args : (Tensor[] out_grad, Scalar axis = -1) + output : Tensor(x_grad) + invoke : concat( out_grad, axis) + composite : split_grad(out_grad, axis, x_grad) + +- backward_op : strided_slice_grad + forward : strided_slice (Tensor x, int[] axes, IntArray starts, IntArray ends, IntArray strides) -> Tensor(out) + args : (Tensor x, Tensor out_grad, int[] axes, IntArray starts, IntArray ends, IntArray strides) + output : Tensor(x_grad) + infer_meta : + func : GeneralUnaryGradInferMeta + param : [x] + spmd_rule : StridedSliceGradInferSpmdDynamic + kernel : + func : strided_slice_grad + no_need_buffer : x + +- backward_op : subtract_double_grad + forward : subtract_grad (Tensor x, Tensor y, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y) + args : (Tensor y, Tensor grad_out, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1) + output : Tensor(grad_out_grad) + infer_meta : + func : UnchangedInferMeta + param : [grad_out] + kernel : + func : subtract_double_grad + optional : grad_x_grad, grad_y_grad + no_need_buffer : y, grad_out + inplace : (grad_x_grad -> grad_out_grad) + composite : subtract_double_grad(y, grad_out, grad_x_grad, grad_y_grad, axis, grad_out_grad) + +- backward_op : subtract_grad + forward : subtract (Tensor x, Tensor y) -> Tensor(out) + args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1) + output : Tensor(x_grad), Tensor(y_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [x, y] + spmd_rule : ElementwiseBinaryGradInferSpmd + kernel : + func : subtract_grad + no_need_buffer : x, y + composite : subtract_grad(x, y, out_grad, axis, x_grad, y_grad) + backward : subtract_double_grad + inplace : (out_grad -> x_grad) + +- backward_op : sum_double_grad + forward : sum_grad (Tensor x, Tensor grad_out, IntArray axis, bool keepdim, bool reduce_all=false) -> Tensor(grad_x) + args : (Tensor grad_x_grad, IntArray axis={}, bool keepdim=false) + output : Tensor(grad_out_grad) + invoke : sum(grad_x_grad, axis, grad_x_grad.dtype(), keepdim) + +- backward_op : sum_grad + forward : sum (Tensor x, IntArray axis={}, DataType dtype=DataType::UNDEFINED, bool keepdim=false) -> Tensor(out) + args : (Tensor x, Tensor out_grad, IntArray axis, bool keepdim, bool reduce_all=false) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + spmd_rule : ReductionGradInferSpmd + kernel : + func : sum_grad + composite : sum_grad(x, out_grad, axis, keepdim, reduce_all, x_grad) + no_need_buffer : x + backward : sum_double_grad + +- backward_op : swish_grad + forward : swish (Tensor x) -> Tensor(out) + args : (Tensor x, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : GeneralUnaryGradInferMeta + param : [x] + kernel : + func : swish_grad + inplace : (out_grad -> x_grad) + +- backward_op : sync_batch_norm_grad + forward : sync_batch_norm_ (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) + args : (Tensor x, Tensor scale, Tensor bias, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics) + output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param : [x, scale, bias] + kernel : + func : sync_batch_norm_grad + data_type : out_grad + optional : reserve_space + +- backward_op : tile_double_grad + forward : tile_grad (Tensor x, Tensor grad_out, IntArray repeat_times) -> Tensor(grad_x) + args : (Tensor grad_x_grad, IntArray repeat_times) + output : Tensor(grad_out_grad) + invoke : tile(grad_x_grad, repeat_times) + +- backward_op : tile_grad + forward : tile (Tensor x, IntArray repeat_times) -> Tensor(out) + args : (Tensor x, Tensor out_grad, IntArray repeat_times) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : tile_grad + no_need_buffer : x + composite : tile_grad(x, out_grad, repeat_times, x_grad) + backward : tile_double_grad + +- backward_op : trans_layout_grad + forward : trans_layout (Tensor x, int[] perm) -> Tensor(out) + args : (Tensor x, Tensor out_grad, int[] perm) + output : Tensor(x_grad) + infer_meta : + func : TransLayoutGradInferMeta + kernel : + func : trans_layout_grad + +- backward_op : transpose_double_grad + forward : transpose_grad (Tensor grad_out, int[] perm) -> Tensor(grad_x) + args : (Tensor grad_x_grad, int[] perm) + output : Tensor(grad_out_grad) + invoke : transpose(grad_x_grad, perm) + +- backward_op : transpose_grad + forward : transpose (Tensor x, int[] perm) -> Tensor(out) + args : (Tensor out_grad, int[] perm) + output : Tensor(x_grad) + infer_meta : + func : TransposeGradInferMeta + param : [out_grad, perm] + spmd_rule: TransposeGradInferSpmd + kernel : + func : transpose_grad + backward : transpose_double_grad + composite: transpose_grad(out_grad, perm, x_grad) + +- backward_op : tril_grad + forward : tril(Tensor x, int diagonal) -> Tensor(out) + args : (Tensor out_grad, int diagonal) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [out_grad] + kernel : + func : tril_grad + +- backward_op : triu_grad + forward : triu(Tensor x, int diagonal) -> Tensor(out) + args : (Tensor out_grad, int diagonal) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [out_grad] + spmd_rule : TriuGradInferSpmd + kernel : + func : triu_grad + +- backward_op: disable_check_model_nan_inf_grad + forward: disable_check_model_nan_inf (Tensor x, int flag=0) -> Tensor(out) + args: (Tensor out_grad, int unsetflag = 1) + output : Tensor(x_grad) + infer_meta: + func: UnchangedInferMeta + param : [out_grad] + kernel: + func: check_model_nan_inf + data_type: out_grad + +- backward_op: enable_check_model_nan_inf_grad + forward: enable_check_model_nan_inf (Tensor x, int flag=1) -> Tensor(out) + args: (Tensor out_grad, int unsetflag = 0) + output : Tensor(x_grad) + infer_meta: + func: UnchangedInferMeta + param : [out_grad] + kernel: + func: check_model_nan_inf + data_type: out_grad + - backward_op: fused_elemwise_add_activation_grad forward: fused_elemwise_add_activation(Tensor x, Tensor y, str[] functor_list, float scale=0.0, int axis=-1, bool save_intermediate_out=false) -> Tensor(out), Tensor(intermediate_out) args: (Tensor x, Tensor y, Tensor out, Tensor intermediate_out, Tensor out_grad, str[] functor_list, float scale=0.0, int axis=-1, bool save_intermediate_out=false) @@ -27,3 +889,14 @@ kernel: func: fused_elemwise_add_activation_grad optional : x, intermediate_out + +- backward_op: unpool_grad + forward: unpool (Tensor x, Tensor indices, int[] ksize, int[] strides, int[] padding, IntArray output_size, str data_format) -> Tensor(out) + args: (Tensor x, Tensor indices, Tensor out, Tensor out_grad, int[] ksize, int[] strides, int[] padding, IntArray output_size, str data_format) + output: Tensor(x_grad) + infer_meta: + func: UnchangedInferMeta + param : [x] + kernel: + func: unpool_grad + data_type: x diff --git a/paddle/fluid/pir/dialect/operator/ir/update_ops.yaml b/paddle/fluid/pir/dialect/operator/ir/update_ops.yaml index d825016157ff73..de542e68f30b9d 100644 --- a/paddle/fluid/pir/dialect/operator/ir/update_ops.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/update_ops.yaml @@ -12,14 +12,3 @@ data_type : dtype backend : place support_tensor : [start, end, step] - -- op : unique - args : (Tensor x, bool return_index=false, bool return_inverse=false, bool return_counts=false, int[] axis={}, DataType dtype=DataType::INT64, bool is_sorted=false) - output : Tensor(out), Tensor(indices), Tensor(inverse), Tensor(counts) - optional : indices, counts - infer_meta : - func : UniqueRawInferMeta - kernel : - func : unique - data_type : x - interfaces : paddle::dialect::ParseKernelKeyInterface diff --git a/paddle/fluid/pir/drr/CMakeLists.txt b/paddle/fluid/pir/drr/CMakeLists.txt index 6643f303926eb7..fa43d828d05bc8 100644 --- a/paddle/fluid/pir/drr/CMakeLists.txt +++ b/paddle/fluid/pir/drr/CMakeLists.txt @@ -8,18 +8,12 @@ set(op_forward_yaml_file1 ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/ops.parsed.yaml ) -set(op_forward_yaml_file2 - ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/legacy_ops.parsed.yaml -) set(op_forward_yaml_file3 ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/update_ops.parsed.yaml ) set(op_backward_yaml_file1 ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/backward_ops.parsed.yaml ) -set(op_backward_yaml_file2 - ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/legacy_backward_ops.parsed.yaml -) set(fused_op_forward_yaml_file ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/fused_ops.parsed.yaml ) @@ -41,7 +35,7 @@ set(op_yaml_file4 ${parsed_op_dir}/ops_backward.parsed.yaml) set(op_yaml_file5 ${parsed_op_dir}/update_ops.parsed.yaml) set(op_yaml_files - ${op_forward_yaml_file1},${op_forward_yaml_file2},${op_backward_yaml_file1},${op_backward_yaml_file2},${fused_op_forward_yaml_file},${fused_op_backward_yaml_file},${op_yaml_file3},${op_yaml_file4},${op_yaml_file5} + ${op_forward_yaml_file1},${op_backward_yaml_file1},${fused_op_forward_yaml_file},${fused_op_backward_yaml_file},${op_yaml_file3},${op_yaml_file4},${op_yaml_file5} ) set(op_creator_file @@ -67,9 +61,7 @@ add_custom_command( COMMENT "copy_if_different ${op_creator_file}" DEPENDS ${op_creator_gen_file} ${op_forward_yaml_file1} - ${op_forward_yaml_file2} ${op_backward_yaml_file1} - ${op_backward_yaml_file2} ${op_compat_yaml_file} ${op_yaml_file3} ${op_yaml_file4} diff --git a/paddle/fluid/primitive/codegen/CMakeLists.txt b/paddle/fluid/primitive/codegen/CMakeLists.txt index 91fce795787721..885fcb83dc38c1 100644 --- a/paddle/fluid/primitive/codegen/CMakeLists.txt +++ b/paddle/fluid/primitive/codegen/CMakeLists.txt @@ -1,9 +1,7 @@ set(parsed_yaml_path "${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops") set(fwd_path ${parsed_yaml_path}/ops.parsed.yaml) -set(fwd_legacy_path ${parsed_yaml_path}/legacy_ops.parsed.yaml) set(rev_path ${parsed_yaml_path}/backward_ops.parsed.yaml) -set(rev_legacy_path ${parsed_yaml_path}/legacy_backward_ops.parsed.yaml) set(fwd_pd_op_path ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/generated/ops.parsed.yaml ) @@ -24,12 +22,11 @@ message("Automatic code generation for paddle/fluid/primitive") execute_process( WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/primitive/codegen COMMAND - ${PYTHON_EXECUTABLE} ${scripts} --fwd_path ${fwd_path} --fwd_legacy_path - ${fwd_legacy_path} --rev_path ${rev_path} --rev_legacy_path - ${rev_legacy_path} --fwd_pd_op_path ${fwd_pd_op_path} - --update_fwd_pd_op_path ${update_fwd_pd_op_path} --rev_pd_op_path - ${rev_pd_op_path} --prim_path ${prim_path} --templates_dir ${templates_dir} - --compat_path ${compat_path} --destination_dir ${destination_dir} + ${PYTHON_EXECUTABLE} ${scripts} --fwd_path ${fwd_path} --rev_path + ${rev_path} --fwd_pd_op_path ${fwd_pd_op_path} --update_fwd_pd_op_path + ${update_fwd_pd_op_path} --rev_pd_op_path ${rev_pd_op_path} --prim_path + ${prim_path} --templates_dir ${templates_dir} --compat_path ${compat_path} + --destination_dir ${destination_dir} RESULT_VARIABLE _result) if(${_result}) message( @@ -43,9 +40,8 @@ execute_process( COMMAND ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/fluid/primitive/codegen/decomp_gen.py --fwd_path - ${fwd_path} --fwd_legacy_path ${fwd_legacy_path} --fwd_pd_op_path - ${fwd_pd_op_path} --templates_dir ${templates_dir} --compat_path - ${compat_path} --destination_dir + ${fwd_path} --fwd_pd_op_path ${fwd_pd_op_path} --templates_dir + ${templates_dir} --compat_path ${compat_path} --destination_dir ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/op_decomp.cc RESULT_VARIABLE _result) if(${_result}) diff --git a/paddle/fluid/primitive/codegen/decomp_gen.py b/paddle/fluid/primitive/codegen/decomp_gen.py index 822456cc897182..95b40f9f875069 100644 --- a/paddle/fluid/primitive/codegen/decomp_gen.py +++ b/paddle/fluid/primitive/codegen/decomp_gen.py @@ -146,7 +146,6 @@ def process_optional_output_info(apis): def gen( fwd_path: pathlib.Path, - fwd_legacy_path: pathlib.Path, compat_path: pathlib.Path, fwd_pd_op_path: pathlib.Path, templates_dir: pathlib.Path, @@ -158,11 +157,7 @@ def gen( Args: prim_path (pathlib.Path): The YAML file path of the primitive API. fwd_path (pathlib.Path): The YAML file path of the forwad API. - fwd_legacy_path (pathlib.Path): The YAML file path of the legacy - forwad API. rev_path (pathlib.Path): The YAML file path of the backward API. - rev_legacy_path (pathlib.Path): The YAML file path of the legacy - backward API. compat_path: (pathlib.Path): The YAML file path of the ops compat. fwd_pd_op_path (pathlib.Path): The YAML file path of the ir forward API. rev_pd_op_path (pathlib.Path): The YAML file path of the ir backward API. @@ -174,19 +169,17 @@ def gen( """ ( fwds, - legacy_fwds, compats, ir_fwds, ) = ( load(fwd_path), - load(fwd_legacy_path), load(compat_path), load(fwd_pd_op_path), ) filter_compat_info(compats) apis = [ {**api, **{'class_name': to_pascal_case(api["name"]) + "Op"}} - for api in fwds + legacy_fwds + ir_fwds + for api in fwds + ir_fwds ] apis = extend_compat_info(apis, compats) @@ -226,11 +219,6 @@ def gen( parser.add_argument( '--fwd_path', type=str, help='The parsed ops yaml file.' ) - parser.add_argument( - '--fwd_legacy_path', - type=str, - help='The parsed ops yaml file.', - ) parser.add_argument( '--compat_path', type=str, @@ -255,7 +243,6 @@ def gen( gen( pathlib.Path(args.fwd_path), - pathlib.Path(args.fwd_legacy_path), pathlib.Path(args.compat_path), pathlib.Path(args.fwd_pd_op_path), pathlib.Path(args.templates_dir), diff --git a/paddle/fluid/primitive/codegen/gen.py b/paddle/fluid/primitive/codegen/gen.py index f652764131bc51..056dac8b6640ab 100644 --- a/paddle/fluid/primitive/codegen/gen.py +++ b/paddle/fluid/primitive/codegen/gen.py @@ -50,7 +50,8 @@ "isclose", "send_v2", "assert", - "embedding_grad_sparse", + "embedding_sparse_grad", + "embedding_grad", ] @@ -320,9 +321,7 @@ def update_apis(op_yaml_items, update_yaml_file): def gen( prim_path: pathlib.Path, fwd_path: pathlib.Path, - fwd_legacy_path: pathlib.Path, rev_path: pathlib.Path, - rev_legacy_path: pathlib.Path, compat_path: pathlib.Path, fwd_pd_op_path: pathlib.Path, update_fwd_pd_op_path: pathlib.Path, @@ -336,11 +335,7 @@ def gen( Args: prim_path (pathlib.Path): The YAML file path of the primitive API. fwd_path (pathlib.Path): The YAML file path of the forwad API. - fwd_legacy_path (pathlib.Path): The YAML file path of the legacy - forwad API. rev_path (pathlib.Path): The YAML file path of the backward API. - rev_legacy_path (pathlib.Path): The YAML file path of the legacy - backward API. compat_path: (pathlib.Path): The YAML file path of the ops compat. fwd_pd_op_path (pathlib.Path): The YAML file path of the ir forward API. update_fwd_pd_op_path (pathlib.Path): The YAML file path of the ir update_ops. @@ -354,33 +349,27 @@ def gen( ( prims, fwds, - legacy_fwds, revs, - legacy_revs, compats, ir_fwds, ir_revs, ) = ( load(prim_path), load(fwd_path), - load(fwd_legacy_path), load(rev_path), - load(rev_legacy_path), load(compat_path), load(fwd_pd_op_path), load(rev_pd_op_path), ) filter_compat_info(compats) - fwd_apis = fwds + legacy_fwds + ir_fwds + fwd_apis = fwds + ir_fwds # replace old ir ops with pir ops if os.path.exists(update_fwd_pd_op_path): update_apis(fwd_apis, update_fwd_pd_op_path) apis = [{**api, **{'is_fwd': True}} for api in fwd_apis] - apis = apis + [ - {**api, **{'is_fwd': False}} for api in revs + legacy_revs + ir_revs - ] + apis = apis + [{**api, **{'is_fwd': False}} for api in revs + ir_revs] apis = [ {**api, **{'is_prim': True}} if api['name'] in prims @@ -413,19 +402,9 @@ def gen( parser.add_argument( '--fwd_path', type=str, help='The parsed ops yaml file.' ) - parser.add_argument( - '--fwd_legacy_path', - type=str, - help='The parsed ops yaml file.', - ) parser.add_argument( '--rev_path', type=str, help='The parsed ops yaml file.' ) - parser.add_argument( - '--rev_legacy_path', - type=str, - help='The parsed ops yaml file.', - ) parser.add_argument( '--compat_path', type=str, @@ -461,9 +440,7 @@ def gen( gen( pathlib.Path(args.prim_path), pathlib.Path(args.fwd_path), - pathlib.Path(args.fwd_legacy_path), pathlib.Path(args.rev_path), - pathlib.Path(args.rev_legacy_path), pathlib.Path(args.compat_path), pathlib.Path(args.fwd_pd_op_path), pathlib.Path(args.update_fwd_pd_op_path), diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 0671869846f0dc..254158cc289145 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -942,9 +942,17 @@ void EigInferMeta(const MetaTensor& x, MetaTensor* out_w, MetaTensor* out_v) { out_v->set_dtype(out_dtype); } -void EmbeddingGradSparseInferMeta(const MetaTensor& x, MetaTensor* out) { - out->set_dims(x.dims()); - out->set_dtype(x.dtype()); +void EmbeddingGradSparseInferMeta(const MetaTensor& x, + const MetaTensor& weight, + MetaTensor* out) { + if (weight) { + if (out->is_selected_rows()) { + out->set_dims(x.dims()); + out->set_dtype(x.dtype()); + } else { + out->share_dims(weight); + } + } } void EighInferMeta(const MetaTensor& x, diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h index daab02f2b46b14..b56776018d6d32 100644 --- a/paddle/phi/infermeta/unary.h +++ b/paddle/phi/infermeta/unary.h @@ -171,7 +171,9 @@ void DistConcatInferMeta(const MetaTensor& x, int nranks, MetaTensor* out); void DistReduceInferMeta(const MetaTensor& x, MetaTensor* out); -void EmbeddingGradSparseInferMeta(const MetaTensor& x, MetaTensor* out); +void EmbeddingGradSparseInferMeta(const MetaTensor& x, + const MetaTensor& weight, + MetaTensor* out); void EigInferMeta(const MetaTensor& x, MetaTensor* out_w, MetaTensor* out_v);