From 2a248c5031a3c3d24fbe00a70030f8cee1504143 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Fri, 6 May 2022 14:35:17 +0000 Subject: [PATCH 01/16] change the output format of C++ backward api --- .../final_state_generator/eager_gen.py | 96 +++++++++---------- paddle/phi/api/lib/api_custom_impl.cc | 61 +++++------- paddle/phi/api/lib/api_custom_impl.h | 42 ++++---- paddle/phi/api/lib/api_gen_utils.cc | 12 +++ paddle/phi/api/lib/api_gen_utils.h | 3 + paddle/phi/tests/api/test_matmul_api.cc | 14 ++- python/paddle/utils/code_gen/api_base.py | 34 ++++--- python/paddle/utils/code_gen/api_gen.py | 6 +- python/paddle/utils/code_gen/backward.yaml | 10 +- .../paddle/utils/code_gen/backward_api_gen.py | 62 ++++++++---- .../paddle/utils/code_gen/sparse_api_gen.py | 10 +- .../utils/code_gen/sparse_bw_api_gen.py | 38 ++++---- .../paddle/utils/code_gen/strings_api_gen.py | 2 +- 13 files changed, 211 insertions(+), 179 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 54c6e39283ec5..caaa771c132b6 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -162,10 +162,6 @@ class {} : public egr::GradNodeBase {{ // Call grad_api function VLOG(3) << \"Final State Running: \" << \"{}\"; {} - - // Get Output -{} - // Get GradIn autograd_meta {} @@ -1350,28 +1346,40 @@ def GenerateNodeDefinition(self, grad_node_creation_str): get_grad_in_args_list.append(get_attr_str) get_grad_in_args_str = "\n".join(get_grad_in_args_list) - grad_api_args_str = ", ".join(grad_api_args) - - # Grad Function Call String - grad_api_namespace = f"paddle::experimental::{namespace}" - grad_function_call_str = f"{indent}auto grad_api_result = {grad_api_namespace}{backward_api_name}({grad_api_args_str});" - # Get Grad Outputs - get_outputs_str = "" - num_outputs = len(backward_grad_outputs_map.keys()) + # Grad Outputs for name, (ttype, fwd_position, grad_api_position) in backward_grad_outputs_map.items(): transformed_tensor_name = self.TransformToNextGradName(name) - - if num_outputs == 1: - get_tensor_str = f"{indent}auto& {transformed_tensor_name} = grad_api_result;" + if IsPlainTensorType(ttype): + grad_api_args.append(f"api_output[{grad_api_position}][0]") else: - if IsPlainTensorType(ttype): - get_tensor_str = f"{indent}auto& {transformed_tensor_name} = grad_api_result[{grad_api_position}][0];" - else: - assert IsVectorTensorType(ttype) - get_tensor_str = f"{indent}auto& {transformed_tensor_name} = grad_api_result[{grad_api_position}];" - get_outputs_str += get_tensor_str + "\n" + assert IsVectorTensorType(ttype) + get_tensor_str = f"{indent}auto& {transformed_tensor_name} = returns[{grad_api_position}];" + grad_api_args.append(f"api_output[{grad_api_position}]") + + grad_api_args_str = ", ".join(grad_api_args) + + # Grad Function Call String + slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys()) + grad_api_namespace = f"paddle::experimental::{namespace}" + grad_function_call_str = f""" + std::vector> returns({slot_num_bwd_outputs}); + const auto& out_metas = OutputMeta(); + std::vector> api_output({slot_num_bwd_outputs}); + for (int i = 0; i < {slot_num_bwd_outputs}; ++i) {{ + api_output[i].reserve(returns.size()); + for (size_t j = 0; j < returns[i].size(); ++j) {{ + if (out_metas[i][j].IsStopGradient()) {{ + api_output[i].push_back(nullptr); + }} else {{ + api_output[i].push_back(&returns[i][j]); + }} + }} + }} +""" + + grad_function_call_str = grad_function_call_str + f"{indent}{grad_api_namespace}{backward_api_name}({grad_api_args_str});" # Prepare for Node Creation if Necessary inputs_autograd_meta_str = "" @@ -1423,28 +1431,26 @@ def GenerateNodeDefinition(self, grad_node_creation_str): # 3. Get Output AutoGradMeta outputs_autograd_meta_list = [] num_fwd_outputs = len(backward_grad_outputs_map.keys()) - for name, (rtype, pos, _) in backward_grad_outputs_map.items(): + for name, (rtype, pos, + grad_api_position) in backward_grad_outputs_map.items(): transformed_tensor_name = self.TransformToNextGradName(name) output_autograd_meta_name = GetAutoGradMetaName( transformed_tensor_name) output_autograd_meta_vec_name = GetAutoGradMetaVectorName( transformed_tensor_name) - if num_fwd_outputs == 1: - if IsPlainTensorType(rtype): - output_autograd_meta = f"{indent}egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});" - else: - assert IsVectorTensorType(rtype) - output_autograd_meta = f"{indent}std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});\n" - output_autograd_meta += f"{indent}std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name};" + if IsPlainTensorType(rtype): + output_autograd_meta = f""" + auto& {transformed_tensor_name} = returns[{grad_api_position}][0]; + egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});""" + else: - # Tuple api_result - if IsPlainTensorType(rtype): - output_autograd_meta = f"{indent}egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});" - else: - assert IsVectorTensorType(rtype) - output_autograd_meta = f"{indent}std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});\n" - output_autograd_meta += f"{indent}std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name};" + assert IsVectorTensorType(rtype) + output_autograd_meta = f""" + auto& {transformed_tensor_name} = returns[{grad_api_position}]; + std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name}); + std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name}; +""" outputs_autograd_meta_list.append(output_autograd_meta) outputs_autograd_meta_str = "\n".join(outputs_autograd_meta_list) @@ -1452,28 +1458,14 @@ def GenerateNodeDefinition(self, grad_node_creation_str): compute_require_grad_str = f"{indent}bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;\n" compute_require_grad_str += f"{indent}bool require_any_grad = egr::EagerUtils::ComputeRequireGrad({compute_require_grad_args_str});" - # Construct grad_api returns - slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys()) - returns_str = f"{indent}std::vector> returns({slot_num_bwd_outputs});\n" - for name, (ttype, fwd_position, - grad_api_position) in backward_grad_outputs_map.items(): - transformed_tensor_name = self.TransformToNextGradName(name) - - # Rearrange output order accordingly - if IsPlainTensorType(ttype): - returns_str += f"{indent}returns[{fwd_position}] = {{ {transformed_tensor_name} }};\n" - else: - assert IsVectorTensorType(ttype) - returns_str += f"{indent}returns[{fwd_position}] = {transformed_tensor_name};\n" - - returns_str += f"{indent}if(NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);\n" + returns_str = f"{indent}if(NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);\n" returns_str += f"{indent}return returns;\n" grad_node_name = GetGradNodeName(forward_api_name) self.node_definition_str = GRAD_FUNCTION_TEMPLATE.format( grad_node_name, fill_zero_str, get_grad_in_args_str, grad_node_name, - grad_function_call_str, get_outputs_str, inputs_autograd_meta_str, + grad_function_call_str, inputs_autograd_meta_str, outputs_autograd_meta_str, compute_require_grad_str, grad_node_creation_str, returns_str) diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index ae248a7bf1280..180c51e0ca33d 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -512,19 +512,20 @@ Tensor conv2d_impl(const Tensor& input, return api_output; } -std::vector> conv2d_grad_impl( - const Tensor& input, - const Tensor& filter, - const Tensor& out_grad, - const std::vector& strides, - const std::vector& paddings, - const std::string& paddding_algorithm, - int groups, - const std::vector& dilations, - const std::string& data_format, - bool use_addto, - int workspace_size_MB, - bool exhaustive_search) { +void conv2d_grad_impl(const Tensor& input, + const Tensor& filter, + const Tensor& out_grad, + const std::vector& strides, + const std::vector& paddings, + const std::string& paddding_algorithm, + int groups, + const std::vector& dilations, + const std::string& data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search, + Tensor* input_grad, + Tensor* filter_grad) { Backend kernel_backend = Backend::UNDEFINED; DataLayout kernel_layout = DataLayout::UNDEFINED; DataType kernel_data_type = DataType::UNDEFINED; @@ -566,11 +567,8 @@ std::vector> conv2d_grad_impl( auto input_filter = PrepareData(filter, args1, {}); auto input_out_grad = PrepareData(out_grad, args2, {}); - std::vector> api_output(2); - api_output[0].emplace_back(); - auto kernel_out_0 = SetKernelOutput(kernel_backend, &api_output[0][0]); - api_output[1].emplace_back(); - auto kernel_out_1 = SetKernelOutput(kernel_backend, &api_output[1][0]); + auto kernel_out_0 = SetKernelOutput(kernel_backend, input_grad); + auto kernel_out_1 = SetKernelOutput(kernel_backend, filter_grad); phi::MetaTensor meta_out_0(kernel_out_0); phi::MetaTensor meta_out_1(kernel_out_1); @@ -613,8 +611,6 @@ std::vector> conv2d_grad_impl( kernel_out_0, kernel_out_1); } - - return api_output; } Tensor copy_to_impl(const Tensor& x, Place place, bool blocking) { @@ -1000,8 +996,9 @@ std::tuple sgd_impl( // but if we use this impl, it will not support. We need to be able to reuse // the autograd API here, which is not yet implemented // TODO(chenweihang): we should support call generated api in custom api impl -std::vector add_n_grad_impl(const std::vector& x, - const Tensor& out_grad) { +void add_n_grad_impl(const std::vector& x, + const Tensor& out_grad, + std::vector x_grad) { auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad); auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); @@ -1019,9 +1016,7 @@ std::vector add_n_grad_impl(const std::vector& x, auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(0), {}); - size_t out_number = x.size(); - std::vector x_grad; - auto dense_x_grad = SetKernelOutput(out_number, kernel_backend, &x_grad); + auto dense_x_grad = SetKernelOutput(&x_grad); using kernel_signature = void (*)(const platform::DeviceContext&, const phi::DenseTensor&, @@ -1037,8 +1032,6 @@ std::vector add_n_grad_impl(const std::vector& x, (*kernel_fn)( *dev_ctx, *dense_out_grad, phi::Scalar(1.0), 0.0, true, dense_x_grad_t); } - - return x_grad; } std::tuple batch_norm_impl( @@ -1170,7 +1163,7 @@ std::tuple batch_norm_impl( return api_output; } -Tensor imag_grad_impl(const Tensor& out_grad) { +void imag_grad_impl(const Tensor& out_grad, Tensor* x_grad) { phi::KernelKey kernel_key{ParseBackend(out_grad), out_grad.layout(), phi::dtype::ToComplex(out_grad.dtype())}; @@ -1184,8 +1177,7 @@ Tensor imag_grad_impl(const Tensor& out_grad) { auto dense_out_grad = TensorToDenseTensor(out_grad); - Tensor out; - auto kernel_out = SetKernelOutput(kernel_key.backend(), &out); + auto kernel_out = SetKernelOutput(kernel_key.backend(), x_grad); phi::MetaTensor meta_out(kernel_out); phi::RealAndImagGradInferMeta(*dense_out_grad, &meta_out); @@ -1194,11 +1186,9 @@ Tensor imag_grad_impl(const Tensor& out_grad) { auto* kernel_fn = kernel.GetVariadicKernelFn(); (*kernel_fn)(*dev_ctx, *dense_out_grad, kernel_out); - - return out; } -Tensor real_grad_impl(const Tensor& out_grad) { +void real_grad_impl(const Tensor& out_grad, Tensor* x_grad) { phi::KernelKey kernel_key{ParseBackend(out_grad), out_grad.layout(), phi::dtype::ToComplex(out_grad.dtype())}; @@ -1212,8 +1202,7 @@ Tensor real_grad_impl(const Tensor& out_grad) { auto dense_out_grad = TensorToDenseTensor(out_grad); - Tensor out; - auto kernel_out = SetKernelOutput(kernel_key.backend(), &out); + auto kernel_out = SetKernelOutput(kernel_key.backend(), x_grad); phi::MetaTensor meta_out(kernel_out); phi::RealAndImagGradInferMeta(*dense_out_grad, &meta_out); @@ -1222,8 +1211,6 @@ Tensor real_grad_impl(const Tensor& out_grad) { auto* kernel_fn = kernel.GetVariadicKernelFn(); (*kernel_fn)(*dev_ctx, *dense_out_grad, kernel_out); - - return out; } } // namespace experimental diff --git a/paddle/phi/api/lib/api_custom_impl.h b/paddle/phi/api/lib/api_custom_impl.h index 46abcd90de32a..d88a134654caf 100644 --- a/paddle/phi/api/lib/api_custom_impl.h +++ b/paddle/phi/api/lib/api_custom_impl.h @@ -96,20 +96,6 @@ Tensor conv2d_impl(const Tensor& input, int workspace_size_MB, bool exhaustive_search); -std::vector> conv2d_grad_impl( - const Tensor& input, - const Tensor& filter, - const Tensor& out_grad, - const std::vector& strides, - const std::vector& paddings, - const std::string& paddding_algorithm, - int groups, - const std::vector& dilations, - const std::string& data_format, - bool use_addto, - int workspace_size_MB, - bool exhaustive_search); - Tensor copy_to_impl(const Tensor& x, Place place, bool blocking); std::vector split_impl(const Tensor& x, @@ -138,12 +124,28 @@ std::tuple sgd_impl( ////////////////// Backward(grad) api impls ////////////////////// -std::vector add_n_grad_impl(const std::vector& x, - const Tensor& out_grad); - -Tensor imag_grad_impl(const Tensor& x); - -Tensor real_grad_impl(const Tensor& x); +void add_n_grad_impl(const std::vector& x, + const Tensor& out_grad, + std::vector x_grad); + +void conv2d_grad_impl(const Tensor& input, + const Tensor& filter, + const Tensor& out_grad, + const std::vector& strides, + const std::vector& paddings, + const std::string& paddding_algorithm, + int groups, + const std::vector& dilations, + const std::string& data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search, + Tensor* input_grad, + Tensor* filter_grad); + +void imag_grad_impl(const Tensor& out_grad, Tensor* x_grad); + +void real_grad_impl(const Tensor& out_grad, Tensor* x_grad); } // namespace experimental } // namespace paddle diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index e0c910ba3d66c..fdf1462ec6fc0 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -133,6 +133,18 @@ std::vector SetKernelOutput(size_t out_size, return results; } +std::vector SetKernelOutput(std::vector* out) { + std::vector results(out->size(), nullptr); + for (size_t i = 0; i < out->size(); ++i) { + if (out->at(i)) { + auto tensor_ptr = std::make_shared(); + results[i] = tensor_ptr.get(); + (*out)[i]->set_impl(tensor_ptr); + } + } + return results; +} + phi::SelectedRows* SetSelectedRowsKernelOutput(Backend backend, Tensor* out) { if (!out->initialized()) { auto select_rows = std::make_shared(); diff --git a/paddle/phi/api/lib/api_gen_utils.h b/paddle/phi/api/lib/api_gen_utils.h index 47b80bb3fc290..7303e6b46114d 100644 --- a/paddle/phi/api/lib/api_gen_utils.h +++ b/paddle/phi/api/lib/api_gen_utils.h @@ -74,6 +74,9 @@ std::vector SetKernelOutput(size_t out_size, Backend backend, std::vector* out); +// For backward api +std::vector SetKernelOutput(std::vector* out); + phi::SelectedRows* SetSelectedRowsKernelOutput(Backend backend, Tensor* out); phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type); diff --git a/paddle/phi/tests/api/test_matmul_api.cc b/paddle/phi/tests/api/test_matmul_api.cc index e2c324a6775c8..0d4ec7bd4f592 100644 --- a/paddle/phi/tests/api/test_matmul_api.cc +++ b/paddle/phi/tests/api/test_matmul_api.cc @@ -179,8 +179,18 @@ TEST(API, matmul_double_grad) { auto dx_grad = paddle::experimental::full({3, 3}, 2.0); // 2. test API - const auto out = paddle::experimental::matmul_double_grad( - x, y, out_grad, dx_grad, {}, false, false); + std::vector> out( + 3, std::vector(1)); + paddle::experimental::matmul_double_grad(x, + y, + out_grad, + dx_grad, + {}, + false, + false, + &out[0][0], + &out[1][0], + &out[2][0]); // 3. check result ASSERT_EQ(out.size(), 3UL); diff --git a/python/paddle/utils/code_gen/api_base.py b/python/paddle/utils/code_gen/api_base.py index a6bd0a10cb1fa..3c28f598c9df5 100644 --- a/python/paddle/utils/code_gen/api_base.py +++ b/python/paddle/utils/code_gen/api_base.py @@ -60,6 +60,12 @@ def get_api_name(self, api_item_yaml): def get_api_func_name(self): return self.api + def get_declare_args(self): + return self.args_str['args_declare'] + + def get_define_args(self): + return self.args_str["args_define"] + def parse_args(self, api_name, api_item_yaml): optional_vars = [] if 'optional' in api_item_yaml: @@ -309,12 +315,12 @@ def get_return_type(self, out_type_list): def gene_api_declaration(self): api_declaration = f""" -PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name()}({self.args_str['args_declare']}); +PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name()}({self.get_declare_args()}); """ if self.is_base_api and self.inplace_map is not None: api_declaration = api_declaration + f""" -PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self.args_str['args_declare']}); +PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self.get_declare_args()}); """ return api_declaration @@ -712,7 +718,7 @@ def gene_return_type_code(self): # Override by child class def gene_return_code(self): - return "api_output" + return "return api_output;" # Override by child class def gene_output(self, @@ -748,7 +754,7 @@ def gen_dense_tensor_kernel_code(self, code_indent, inplace_flag=False): {code_indent} (*kernel_fn)({kernel_args}, {outputs_args}); {code_indent} }} -{code_indent} return {self.gene_return_code()};""" +{code_indent} {self.gene_return_code()}""" def gen_selected_rows_kernel_code(self, code_indent, inplace_flag=False): input_tensors, kernel_args, kernel_signature = self.get_selected_rows_kernel_args( @@ -775,12 +781,12 @@ def gen_selected_rows_kernel_code(self, code_indent, inplace_flag=False): {code_indent} (*kernel_fn)({kernel_args}, {outputs_args}); {code_indent} }} -{code_indent} return {self.gene_return_code()};""" +{code_indent} {self.gene_return_code()}""" def gene_base_api_code(self, inplace_flag=False): api_func_name = self.get_api_func_name() + ('_' if inplace_flag else '') api_code = f""" -PADDLE_API {self.gene_return_type_code()} {api_func_name}({self.args_str["args_define"]}) {{ +PADDLE_API {self.gene_return_type_code()} {api_func_name}({self.get_define_args()}) {{ {self.gene_kernel_select()} """ @@ -802,6 +808,12 @@ def gene_base_api_code(self, inplace_flag=False): } """ + def gene_invoke_code(self, invoke_code, params_code): + return f""" +PADDLE_API {self.outputs['return_type']} {self.api}({params_code}) {{ + return {invoke_code}; +}}""" + def gene_api_code(self): if self.is_base_api: api_code = self.gene_base_api_code() @@ -821,12 +833,8 @@ def adjust_name(matched): invoke_code = re.sub(pattern, adjust_name, self.invoke) params_code = re.sub(pattern, adjust_name, - self.args_str["args_define"]) + self.get_define_args()) else: invoke_code = self.invoke - params_code = self.args_str["args_define"] - return f""" -{self.outputs['return_type']} {self.api}({params_code}) {{ - return {invoke_code}; -}} -""" + params_code = self.get_define_args() + return self.gene_invoke_code(invoke_code, params_code) diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index 538958c2361bc..291e6f4ce44bd 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -61,19 +61,19 @@ def gene_return_type_code(self): def gene_return_code(self): if self.is_dygraph_api or len(self.intermediate_outs) == 0: - return "api_output" + return "return api_output;" else: return_out_list = [] for i, name in enumerate(self.outputs['names']): if name not in self.intermediate_outs: return_out_list.append(i) if len(return_out_list) == 1: - return f"std::get<{return_out_list[0]}>(api_output)" + return f"return std::get<{return_out_list[0]}>(api_output);" else: selected_code = [ f"std::get<{i}>(api_output)" for i in return_out_list ] - return '{' + ", ".join(selected_code) + '}' + return 'return {' + ", ".join(selected_code) + '};' def gene_output(self, output_type_list, diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index 64acc140c2117..088491465778c 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -58,7 +58,7 @@ forward : add_n (Tensor[] x) -> Tensor(out) args : (Tensor[] x, Tensor out_grad) output : Tensor[](x_grad){x.size()} - invoke : add_n_grad_impl(x, out_grad) + invoke : add_n_grad_impl(x, out_grad, x_grad) no_need_buffer : x - backward_api : add_triple_grad @@ -260,7 +260,7 @@ forward : conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(out) args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) output : Tensor(input_grad), Tensor(filter_grad) - invoke : conv2d_grad_impl(input, filter, out_grad, strides, paddings, paddding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search) + invoke : conv2d_grad_impl(input, filter, out_grad, strides, paddings, paddding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search, input_grad, filter_grad) backward : conv2d_grad_grad - backward_api : conv2d_grad_grad @@ -687,7 +687,7 @@ forward : imag (Tensor x) -> Tensor(out) args : (Tensor out_grad) output : Tensor(x_grad) - invoke : imag_grad_impl(out_grad) + invoke : imag_grad_impl(out_grad, x_grad) - backward_api : index_sample_grad forward : index_sample (Tensor x, Tensor index) -> Tensor(out) @@ -1279,7 +1279,7 @@ forward : real (Tensor x) -> Tensor(out) args : (Tensor out_grad) output : Tensor(x_grad) - invoke : real_grad_impl(out_grad) + invoke : real_grad_impl(out_grad, x_grad) - backward_api : reciprocal_grad forward : reciprocal (Tensor x) -> Tensor(out) @@ -1661,7 +1661,7 @@ forward : sum_double_grad (Tensor grad_grad_x, int64_t[] dims={}, bool keep_dim=false) -> Tensor(grad_grad_out) args : (Tensor grad_grad_x, Tensor grad_grad_out_grad, int64_t[] dims={}, bool keep_dim=false, bool reduce_all=false) output : Tensor(grad_grad_x_grad) - invoke : sum_grad(grad_grad_x, grad_grad_out_grad, dims, keep_dim, reduce_all) + invoke : sum_grad(grad_grad_x, grad_grad_out_grad, dims, keep_dim, reduce_all, grad_grad_x_grad) no_need_buffer : x - backward_api : swish_grad diff --git a/python/paddle/utils/code_gen/backward_api_gen.py b/python/paddle/utils/code_gen/backward_api_gen.py index a88339c607c55..a155a2c3d6c9f 100644 --- a/python/paddle/utils/code_gen/backward_api_gen.py +++ b/python/paddle/utils/code_gen/backward_api_gen.py @@ -77,6 +77,25 @@ def check_args(self, forward_config): f"{self.api} : Output error: The number of outputs should be less then the number of inputs of forward api. \ Please check the output of {self.api} in yaml." + def get_declare_args(self): + return self.get_define_args() + + def get_define_args(self): + out_type_map = { + 'Tensor': 'Tensor*', + 'std::vector': 'std::vector' + } + intputs_and_attrs = self.args_str['args_define'] + outs = [] + for i, name in enumerate(self.outputs['names']): + outs.append(out_type_map[self.outputs['types'][i]] + ' ' + + name.split('@')[0]) + result = intputs_and_attrs + ', ' + ", ".join(outs) + return result + + def gene_return_code(self): + return "" + def gene_kernel_backend_select(self): all_no_need_buffer = True for in_name in self.inputs['names']: @@ -91,8 +110,7 @@ def gene_kernel_backend_select(self): return super().gene_kernel_backend_select() def get_return_type(self, out_type_list): - return out_type_list[0] if len( - out_type_list) == 1 else "std::vector>" + return 'void' def gene_output(self, output_type_list, @@ -109,23 +127,19 @@ def gene_output(self, inplace_assign = " = " + self.inplace_map[self.outputs['names'][ 0]] if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][0] in self.inplace_map else "" - output_create = f""" -{code_indent} {self.outputs['return_type']} api_output{inplace_assign};""" - + output_create = "" if output_type_list[0] == 'std::vector': assert self.outputs['out_size_expr'] is not None, \ f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." output_create = output_create + f""" -{code_indent} auto kernel_out = {set_out_func}({self.outputs['out_size_expr']}, kernel_backend, &api_output);""" +{code_indent} auto kernel_out = {set_out_func}(&{self.outputs['names'][0]});""" else: output_create = output_create + f""" -{code_indent} auto kernel_out = {set_out_func}(kernel_backend, &api_output);""" +{code_indent} auto kernel_out = {set_out_func}(kernel_backend, {self.outputs['names'][0]});""" elif len(output_type_list) > 1: - output_create = f""" -{code_indent} {self.outputs['return_type']} api_output({len(output_type_list)});""" - + output_create = "" for i, out_type_item in enumerate(output_type_list): kernel_output = kernel_output + f'kernel_out_{i}, ' output_names.append(f'kernel_out_{i}') @@ -133,26 +147,21 @@ def gene_output(self, if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][i] in self.inplace_map: output_create = output_create + f""" -{code_indent} api_output[{i}].emplace_back({self.inplace_map[self.outputs['names'][i]]});""" - - else: - output_create = output_create + f""" -{code_indent} api_output[{i}].emplace_back();""" +{code_indent} *{self.outputs['names'][i]} = {self.inplace_map[self.outputs['names'][i]]};""" output_create = output_create + f""" -{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, &api_output[{i}][0]);""" +{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, {self.outputs['names'][i]});""" else: - get_out_code = f'&api_output[{i}]' if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][i] in self.inplace_map: output_create = output_create + f""" -{code_indent} api_output[{i}] = {self.inplace_map[self.outputs['names'][i]]};""" +{code_indent} *{self.outputs['names'][i]} = {self.inplace_map[self.outputs['names'][i]]};""" assert self.outputs['out_size_expr'][i] is not None, \ f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." output_create = output_create + f""" -{code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, &api_output[{i}]);""" +{code_indent} auto kernel_out_{i} = {set_out_func}(&{self.outputs['names'][i]});""" kernel_output = kernel_output[:-2] else: @@ -162,6 +171,21 @@ def gene_output(self, return kernel_output, output_names, output_create + def gene_invoke_code(self, invoke_code, params_code): + inveke_func_name = invoke_code.split('(')[0].strip() + if inveke_func_name.endswith('_grad') or inveke_func_name.endswith( + '_grad_impl'): + return f""" +PADDLE_API {self.outputs['return_type']} {self.api}({params_code}) {{ + {invoke_code}; +}}""" + + else: + return f""" +PADDLE_API {self.outputs['return_type']} {self.api}({params_code}) {{ + *{self.outputs['names'][0].split('@')[0]} = {invoke_code}; +}}""" + def header_include(): return """ diff --git a/python/paddle/utils/code_gen/sparse_api_gen.py b/python/paddle/utils/code_gen/sparse_api_gen.py index c0316fc164294..6fc176b27b617 100644 --- a/python/paddle/utils/code_gen/sparse_api_gen.py +++ b/python/paddle/utils/code_gen/sparse_api_gen.py @@ -27,7 +27,7 @@ def __init__(self, api_item_yaml): def gene_api_declaration(self): return f""" // {", ".join(self.outputs['names'])} -PADDLE_API {self.outputs['return_type']} {self.get_api_func_name()}({self.args_str['args_declare']}); +PADDLE_API {self.outputs['return_type']} {self.get_api_func_name()}({self.get_declare_args()}); """ def get_kernel_tensor_out_type(self, output_name): @@ -136,7 +136,8 @@ def gen_sparse_kernel_code(self, inplace_flag=False): kernel_context_code = self.gen_sparse_kernel_context( kernel_output_names) - + return_code = "" if len(self.gene_return_code( + )) == 0 else " " + self.gene_return_code() return f""" auto phi_kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( "{self.kernel['func'][0]}", {{kernel_backend, kernel_layout, kernel_data_type}}); @@ -148,13 +149,12 @@ def gen_sparse_kernel_code(self, inplace_flag=False): {output_create} {kernel_context_code} phi_kernel(&kernel_context); - - return api_output;""" +{return_code}""" def gene_base_api_code(self, inplace_flag=False): api_func_name = self.get_api_func_name() return f""" -PADDLE_API {self.outputs['return_type']} {api_func_name}({self.args_str["args_define"]}) {{ +PADDLE_API {self.outputs['return_type']} {api_func_name}({self.get_define_args()}) {{ {self.gene_kernel_select()} {self.gen_sparse_kernel_code(inplace_flag)} }} diff --git a/python/paddle/utils/code_gen/sparse_bw_api_gen.py b/python/paddle/utils/code_gen/sparse_bw_api_gen.py index 4f209a7592161..359e1a3b4e765 100644 --- a/python/paddle/utils/code_gen/sparse_bw_api_gen.py +++ b/python/paddle/utils/code_gen/sparse_bw_api_gen.py @@ -34,9 +34,18 @@ def gene_kernel_backend_select(self): def get_return_type(self, out_type_list): return BackwardAPI.get_return_type(self, out_type_list) + def gene_return_code(self): + return "" + def gene_api_declaration(self): return SparseAPI.gene_api_declaration(self) + def get_declare_args(self): + return BackwardAPI.get_declare_args(self) + + def get_define_args(self): + return BackwardAPI.get_define_args(self) + def gene_output(self, output_type_list, set_out_func, @@ -53,36 +62,21 @@ def gene_output(self, 0]] if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][0] in self.inplace_map else "" output_create = f""" - {self.outputs['return_type']} api_output{inplace_assign}; - auto kernel_out = {set_out_func}(&api_output, {self.get_kernel_tensor_out_type(self.outputs['names'][0])});""" + auto kernel_out = {set_out_func}({self.outputs['names'][0].split('@')[0]}, {self.get_kernel_tensor_out_type(self.outputs['names'][0])});""" elif len(output_type_list) > 1: - output_create = f""" - {self.outputs['return_type']} api_output({len(output_type_list)});""" + output_create = "" for i, out_type_item in enumerate(output_type_list): kernel_output = kernel_output + f'kernel_out_{i}, ' output_names.append(f'kernel_out_{i}') - if out_type_item == 'Tensor': - get_out_code = f'&api_output[{i}][0]' - if inplace_flag and self.inplace_map is not None and self.outputs[ - 'names'][i] in self.inplace_map: - output_create = output_create + f""" - api_output[{i}].emplace_back({self.inplace_map[self.outputs['names'][i]]});""" - - else: - output_create = output_create + f""" - api_output[{i}].emplace_back();""" - - else: - get_out_code = f'&api_output[{i}]' - if inplace_flag and self.inplace_map is not None and self.outputs[ - 'names'][i] in self.inplace_map: - output_create = output_create + f""" - api_output[{i}] = {self.inplace_map[self.outputs['names'][i]]};""" + if inplace_flag and self.inplace_map is not None and self.outputs[ + 'names'][i] in self.inplace_map: + output_create = output_create + f""" + *{self.outputs['names'][i]} = {self.inplace_map[self.outputs['names'][i]]};""" output_create = output_create + f""" - auto kernel_out_{i} = {set_out_func}({get_out_code}, {self.get_kernel_tensor_out_type(self.outputs['names'][i])});""" + auto kernel_out_{i} = {set_out_func}({self.outputs['names'][i].split('@')[0]}, {self.get_kernel_tensor_out_type(self.outputs['names'][i])});""" kernel_output = kernel_output[:-2] else: diff --git a/python/paddle/utils/code_gen/strings_api_gen.py b/python/paddle/utils/code_gen/strings_api_gen.py index d7117e9d54060..746cd861b1c56 100644 --- a/python/paddle/utils/code_gen/strings_api_gen.py +++ b/python/paddle/utils/code_gen/strings_api_gen.py @@ -194,7 +194,7 @@ def gen_string_tensor_kernel_code(self, inplace_flag=False, code_indent=""): {code_indent} auto* kernel_fn = kernel.GetVariadicKernelFn(); {code_indent} (*kernel_fn)({kernel_args}, {outputs_args}); -{code_indent} return {self.gene_return_code()};""" +{code_indent} {self.gene_return_code()}""" def gene_kernel_select(self) -> str: api = self.api From 004e791e51a5174cb2bf243ff8aa35437f45edcb Mon Sep 17 00:00:00 2001 From: zyfncg Date: Sat, 7 May 2022 03:54:51 +0000 Subject: [PATCH 02/16] fix merge conflict --- .../auto_code_generator/final_state_generator/eager_gen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 9c9d74709630b..ac2a89fd317df 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -1324,9 +1324,9 @@ def GenerateNodeDefinition(self, grad_node_creation_str): slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys()) grad_api_namespace = f"paddle::experimental::{namespace}" grad_function_call_str = f""" - std::vector> returns({slot_num_bwd_outputs}); + paddle::small_vector, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs}); const auto& out_metas = OutputMeta(); - std::vector> api_output({slot_num_bwd_outputs}); + paddle::small_vector, egr::kSlotSmallVectorSize> api_output({slot_num_bwd_outputs}); for (int i = 0; i < {slot_num_bwd_outputs}; ++i) {{ api_output[i].reserve(returns.size()); for (size_t j = 0; j < returns[i].size(); ++j) {{ From 6d5fdf4f3bef9d124846f6405f44ad4ad5571b3b Mon Sep 17 00:00:00 2001 From: zyfncg Date: Sat, 7 May 2022 08:44:00 +0000 Subject: [PATCH 03/16] fix sparse api code auto-gen --- paddle/phi/tests/api/test_sparse_conv_api.cc | 6 +++--- python/paddle/utils/code_gen/api_gen.py | 4 ++-- python/paddle/utils/code_gen/sparse_api_gen.py | 10 ++++------ python/paddle/utils/code_gen/sparse_bw_api_gen.py | 3 +++ 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/paddle/phi/tests/api/test_sparse_conv_api.cc b/paddle/phi/tests/api/test_sparse_conv_api.cc index 7c4aa16425907..c00113389adb7 100644 --- a/paddle/phi/tests/api/test_sparse_conv_api.cc +++ b/paddle/phi/tests/api/test_sparse_conv_api.cc @@ -77,11 +77,11 @@ void TestConv3dBase(const std::vector& indices, kernel.size() * sizeof(T)); if (!std::is_same::value) { - auto outs = paddle::experimental::sparse::conv3d( + auto tensor_out = paddle::experimental::sparse::conv3d( x, weight, paddings, dilations, strides, 1, false); - auto out = std::dynamic_pointer_cast( - std::get<0>(outs).impl()); + auto out = + std::dynamic_pointer_cast(tensor_out.impl()); ASSERT_EQ(correct_out_dims.size(), out->dims().size()); for (int i = 0; i < correct_out_dims.size(); i++) { ASSERT_EQ(correct_out_dims[i], out->dims()[i]); diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index 291e6f4ce44bd..8fd95f9a191c3 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -53,7 +53,7 @@ def gene_return_type_code(self): else: return_out_list = [] for i, name in enumerate(self.outputs['names']): - if name not in self.intermediate_outs: + if name.split('@')[0] not in self.intermediate_outs: return_out_list.append(self.outputs['types'][i]) return return_out_list[0] if len( return_out_list) == 1 else "std::tuple<" + ",".join( @@ -65,7 +65,7 @@ def gene_return_code(self): else: return_out_list = [] for i, name in enumerate(self.outputs['names']): - if name not in self.intermediate_outs: + if name.split('@')[0] not in self.intermediate_outs: return_out_list.append(i) if len(return_out_list) == 1: return f"return std::get<{return_out_list[0]}>(api_output);" diff --git a/python/paddle/utils/code_gen/sparse_api_gen.py b/python/paddle/utils/code_gen/sparse_api_gen.py index 6fc176b27b617..eb9bca2eca7b7 100644 --- a/python/paddle/utils/code_gen/sparse_api_gen.py +++ b/python/paddle/utils/code_gen/sparse_api_gen.py @@ -25,10 +25,9 @@ def __init__(self, api_item_yaml): super(SparseAPI, self).__init__(api_item_yaml) def gene_api_declaration(self): - return f""" -// {", ".join(self.outputs['names'])} -PADDLE_API {self.outputs['return_type']} {self.get_api_func_name()}({self.get_declare_args()}); -""" + api_declaration = "// " + ', '.join(self.outputs['names']) + return api_declaration + super(SparseAPI, + self).gene_api_declaration() + '\n' def get_kernel_tensor_out_type(self, output_name): sparse_type = 'TensorType::DENSE_TENSOR' @@ -152,9 +151,8 @@ def gen_sparse_kernel_code(self, inplace_flag=False): {return_code}""" def gene_base_api_code(self, inplace_flag=False): - api_func_name = self.get_api_func_name() return f""" -PADDLE_API {self.outputs['return_type']} {api_func_name}({self.get_define_args()}) {{ +PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name()}({self.get_define_args()}) {{ {self.gene_kernel_select()} {self.gen_sparse_kernel_code(inplace_flag)} }} diff --git a/python/paddle/utils/code_gen/sparse_bw_api_gen.py b/python/paddle/utils/code_gen/sparse_bw_api_gen.py index 359e1a3b4e765..6dc4a2668ebb9 100644 --- a/python/paddle/utils/code_gen/sparse_bw_api_gen.py +++ b/python/paddle/utils/code_gen/sparse_bw_api_gen.py @@ -34,6 +34,9 @@ def gene_kernel_backend_select(self): def get_return_type(self, out_type_list): return BackwardAPI.get_return_type(self, out_type_list) + def gene_return_type_code(self): + return self.outputs['return_type'] + def gene_return_code(self): return "" From 7ea2b71a71e02a786770d80874d45805d8c07c41 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Sat, 7 May 2022 09:56:53 +0000 Subject: [PATCH 04/16] fix eager_gen bug --- .../auto_code_generator/final_state_generator/eager_gen.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index ac2a89fd317df..12c4d8943c8f0 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -1324,11 +1324,12 @@ def GenerateNodeDefinition(self, grad_node_creation_str): slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys()) grad_api_namespace = f"paddle::experimental::{namespace}" grad_function_call_str = f""" - paddle::small_vector, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs}); const auto& out_metas = OutputMeta(); + paddle::small_vector, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs}); paddle::small_vector, egr::kSlotSmallVectorSize> api_output({slot_num_bwd_outputs}); for (int i = 0; i < {slot_num_bwd_outputs}; ++i) {{ - api_output[i].reserve(returns.size()); + returns[i].resize(out_metas[i].size()); + api_output[i].reserve(out_metas[i].size()); for (size_t j = 0; j < returns[i].size(); ++j) {{ if (out_metas[i][j].IsStopGradient()) {{ api_output[i].push_back(nullptr); From 25eba8d494cb505cfe91d72ff20dd18a0e3c7c36 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Sat, 7 May 2022 11:40:57 +0000 Subject: [PATCH 05/16] fix bug of output is null --- paddle/phi/api/lib/api_gen_utils.cc | 9 ++++++--- python/paddle/utils/code_gen/api_base.py | 8 +++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index 9d62278ec9781..2111829b8d60b 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -113,10 +113,13 @@ phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor) { /* ------------------ for output ----------------------- */ phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) { - if (out->impl() == nullptr) { - out->set_impl(std::make_shared()); + if (out) { + if (out->impl() == nullptr) { + out->set_impl(std::make_shared()); + } + return static_cast(out->impl().get()); } - return static_cast(out->impl().get()); + return nullptr; } std::vector SetKernelOutput(size_t out_size, diff --git a/python/paddle/utils/code_gen/api_base.py b/python/paddle/utils/code_gen/api_base.py index 6f4c478d68dc7..af870fcc8e54d 100644 --- a/python/paddle/utils/code_gen/api_base.py +++ b/python/paddle/utils/code_gen/api_base.py @@ -519,7 +519,7 @@ def gene_infer_meta(self, kernel_output_names, code_indent) -> str: {code_indent} auto {out_name}_{PREFIX_META_TENSOR_NAME}vec = MakeMetaTensor({out_name}); {code_indent} std::vector {out_name}_metas({out_name}_{PREFIX_META_TENSOR_NAME}vec.size()); {code_indent} for (size_t i = 0; i < {out_name}_{PREFIX_META_TENSOR_NAME}vec.size(); ++i) {{ -{code_indent} {out_name}_metas[i] = &{out_name}_{PREFIX_META_TENSOR_NAME}vec[i]; +{code_indent} {out_name}_metas[i] = {out_name}[i] ? &{out_name}_{PREFIX_META_TENSOR_NAME}vec[i] : nullptr; {code_indent} }}""" param_code = param_code + out_name + '_metas, ' @@ -527,8 +527,10 @@ def gene_infer_meta(self, kernel_output_names, code_indent) -> str: meta_tensor_code = meta_tensor_code + code_indent + " phi::MetaTensor " + out_name.replace( 'kernel_', PREFIX_META_TENSOR_NAME) + "(" + out_name + ");\n" - param_code = param_code + "&" + out_name.replace( - 'kernel_', PREFIX_META_TENSOR_NAME) + ", " + if len(kernel_output_names) == 1: + param_code = param_code + f"&{out_name.replace('kernel_', PREFIX_META_TENSOR_NAME)}, " + else: + param_code = param_code + f"{out_name} ? &{out_name.replace('kernel_', PREFIX_META_TENSOR_NAME)} : nullptr, " param_code = param_code[:-2] return f"""{meta_tensor_code} From a12b2ff042d80c5e2ef4a8400dbcfbd404986af5 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Sat, 7 May 2022 11:48:07 +0000 Subject: [PATCH 06/16] fix bug of conv2d_grad_impl --- paddle/phi/api/lib/api_custom_impl.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index e96e910c14840..d80444e7f710c 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -654,8 +654,8 @@ void conv2d_grad_impl(const Tensor& input, phi::GeneralBinaryGradInferMeta(MakeMetaTensor(*input_input), MakeMetaTensor(*input_filter), - &meta_out_0, - &meta_out_1); + kernel_out_0 ? &meta_out_0 : nullptr, + kernel_out_1 ? &meta_out_1 : nullptr); using kernel_signature = void (*)(const platform::DeviceContext&, const phi::DenseTensor&, From c8c2fe2296bad17d388402e5968a1db0f1c1ccc0 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Sat, 7 May 2022 17:23:16 +0000 Subject: [PATCH 07/16] fix optional grad --- .../final_state_generator/eager_gen.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 12c4d8943c8f0..1d70d2a41249b 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -146,7 +146,7 @@ class {} : public egr::GradNodeBase {{ {} // Call grad_api function - VLOG(3) << \"Final State Running: \" << \"{}\"; + VLOG(3) << \"Final State Running: {}\"; {} // Get GradIn autograd_meta {} @@ -1312,11 +1312,10 @@ def GenerateNodeDefinition(self, grad_node_creation_str): grad_api_position) in backward_grad_outputs_map.items(): transformed_tensor_name = self.TransformToNextGradName(name) if IsPlainTensorType(ttype): - grad_api_args.append(f"api_output[{grad_api_position}][0]") + grad_api_args.append(f"api_output[{fwd_position}][0]") else: assert IsVectorTensorType(ttype) - get_tensor_str = f"{indent}auto& {transformed_tensor_name} = returns[{grad_api_position}];" - grad_api_args.append(f"api_output[{grad_api_position}]") + grad_api_args.append(f"api_output[{fwd_position}]") grad_api_args_str = ", ".join(grad_api_args) @@ -1329,7 +1328,12 @@ def GenerateNodeDefinition(self, grad_node_creation_str): paddle::small_vector, egr::kSlotSmallVectorSize> api_output({slot_num_bwd_outputs}); for (int i = 0; i < {slot_num_bwd_outputs}; ++i) {{ returns[i].resize(out_metas[i].size()); - api_output[i].reserve(out_metas[i].size()); + if(returns[i].size() == 0) {{ + api_output[i].reserve(1); + api_output[i].push_back(nullptr); + continue; + }} + api_output[i].reserve(returns[i].size()); for (size_t j = 0; j < returns[i].size(); ++j) {{ if (out_metas[i][j].IsStopGradient()) {{ api_output[i].push_back(nullptr); From 388dedd599a922d28aae01bcc72aeda7a9c84b8a Mon Sep 17 00:00:00 2001 From: zyfncg Date: Mon, 9 May 2022 09:23:49 +0000 Subject: [PATCH 08/16] fix bug of eager-gen double_grad --- .../final_state_generator/CMakeLists.txt | 4 +-- .../final_state_generator/codegen_utils.py | 4 --- .../final_state_generator/eager_gen.py | 32 ++++++++++++------- paddle/phi/infermeta/multiary.cc | 4 ++- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt index 8e89ea3f19762..94f7f717fb24a 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt @@ -16,9 +16,9 @@ add_custom_target(eager_final_state_codegen COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py" "--api_yaml_path=${api_yaml_path}" "--backward_yaml_path=${backward_yaml_path}" - "--forwards_cc_path=${tmp_forwards_cc_path}" + "--forwards_cc_path=${tmp_forwards_cc_path}" "--forwards_h_path=${tmp_forwards_h_path}" - "--nodes_cc_path=${tmp_nodes_cc_path}" + "--nodes_cc_path=${tmp_nodes_cc_path}" "--nodes_h_path=${tmp_nodes_h_path}" COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_forwards_cc_path} ${forwards_cc_path} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_forwards_h_path} ${forwards_h_path} diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py index 448fa546255bb..9ad628ef515b1 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py @@ -416,10 +416,6 @@ def DetermineForwardPositionMap(self, forward_inputs_list, self.forward_outputs_position_map[ return_name] = [return_type, return_pos] - print("Generated Forward Input Position Map: ", - self.forward_inputs_position_map) - print("Generated Forward Output Position Map: ", - self.forward_outputs_position_map) class YamlGeneratorBase: diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 1d70d2a41249b..c7d176d8f9889 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -653,7 +653,7 @@ def GenerateNodeCreationCodes(self): pass_stop_gradient_args_list.append(output_autograd_meta_name) pass_stop_gradient_args_str = ",".join(pass_stop_gradient_args_list) - # Node Construction + # Node Construction num_backward_inputs = len(forward_outputs_position_map.keys()) num_backward_outputs = len(forward_inputs_position_map.keys()) grad_node_name = GetGradNodeName(forward_api_name) @@ -713,6 +713,7 @@ def GenerateNodeCreationCodes(self): set_output_tensor_wrappers_list) # SetGradOutMeta & SetEdges + grad_node_out_list = [] set_grad_out_meta_list = [] set_edges_list = [] for name, (_, pos) in forward_inputs_position_map.items(): @@ -725,7 +726,7 @@ def GenerateNodeCreationCodes(self): if not has_corresponding_grad_output: continue - input_autograd_meta_name = GetAutoGradMetaName(name) + grad_node_out_list.append(name) is_optional = (name in self.optional_inputs) if is_optional: set_grad_out_meta = f"{indent}if({name}.get_ptr() != nullptr) grad_node->SetGradOutMeta(*({name}.get_ptr()), {pos});" @@ -767,6 +768,7 @@ def GenerateNodeCreationCodes(self): set_input_tensor_wrappers_str, set_grad_out_meta_str, set_out_rank_str, set_history_str, set_grad_in_meta_str, set_retain_grad_str, set_output_tensor_wrappers_str) + self.grad_node_out_list = grad_node_out_list def run(self): # Basic Validation Check @@ -1154,6 +1156,7 @@ def GenerateHigherOrderNodeCreationCode(self): next_grad_api_contents = self.next_grad_api_contents grad_node_creation_str = "" + grad_node_out_list = [] if next_grad_api_contents: forward_api_contents = grad_api_contents forward_api_contents['api'] = forward_api_contents['backward_api'] @@ -1164,10 +1167,11 @@ def GenerateHigherOrderNodeCreationCode(self): next_node_generator.run() next_node_generator.GenerateNodeCreationCodes() grad_node_creation_str = next_node_generator.node_creation_str + grad_node_out_list = next_node_generator.grad_node_out_list self.RecordGrad2NextGradNameMapping(next_node_generator) - return grad_node_creation_str + return grad_node_creation_str, grad_node_out_list def GenerateNodeDeclaration(self): forward_op_name = self.forward_api_name @@ -1230,7 +1234,8 @@ def GenerateNodeDeclaration(self): logging.info(f"Generated Node Declaration: {self.node_declaration_str}") - def GenerateNodeDefinition(self, grad_node_creation_str): + def GenerateNodeDefinition(self, grad_node_creation_str, + grad_node_out_list): namespace = self.namespace forward_api_name = self.forward_api_name backward_api_name = self.backward_api_name @@ -1370,7 +1375,9 @@ def GenerateNodeDefinition(self, grad_node_creation_str): input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" inputs_autograd_meta_list.append(input_autograd_meta) - compute_require_grad_args_list.append(input_autograd_meta_name) + if name in grad_node_out_list: + compute_require_grad_args_list.append( + input_autograd_meta_name) # 2. Get TensorWrapper AutoGradMeta for name, (ttype, _, pos), in backward_forward_inputs_map.items(): @@ -1388,7 +1395,9 @@ def GenerateNodeDefinition(self, grad_node_creation_str): input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" inputs_autograd_meta_list.append(input_autograd_meta) - compute_require_grad_args_list.append(input_autograd_meta_name) + if name in grad_node_out_list: + compute_require_grad_args_list.append( + input_autograd_meta_name) inputs_autograd_meta_str = "\n".join(inputs_autograd_meta_list) compute_require_grad_args_str = ",".join( compute_require_grad_args_list) @@ -1406,13 +1415,13 @@ def GenerateNodeDefinition(self, grad_node_creation_str): transformed_tensor_name) if IsPlainTensorType(rtype): output_autograd_meta = f""" - auto& {transformed_tensor_name} = returns[{grad_api_position}][0]; + auto& {transformed_tensor_name} = returns[{pos}][0]; egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});""" else: assert IsVectorTensorType(rtype) output_autograd_meta = f""" - auto& {transformed_tensor_name} = returns[{grad_api_position}]; + auto& {transformed_tensor_name} = returns[{pos}]; std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name}); std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name}; """ @@ -1445,16 +1454,17 @@ def run(self): ## Code Generation ## ##################### # Higher-order GradNode generation - grad_node_creation_str = self.GenerateHigherOrderNodeCreationCode() + grad_node_creation_str, grad_node_out_list = self.GenerateHigherOrderNodeCreationCode( + ) self.GenerateNodeDeclaration() - self.GenerateNodeDefinition(grad_node_creation_str) + self.GenerateNodeDefinition(grad_node_creation_str, grad_node_out_list) class DygraphYamlGenerator(YamlGeneratorBase): def __init__(self, api_yaml_path, backward_yaml_path): - # Parent members: + # Parent members: # self.namespace # self.api_yaml_path # self.forward_api_list diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 519d21b323fc2..e793eb8e66872 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -1998,7 +1998,9 @@ void StackInferMeta(const std::vector& x, void UnchangedMultiInferMeta(const std::vector& x, std::vector out) { for (size_t i = 0; i < x.size(); ++i) { - out[i]->share_meta(*x[i]); + if (out[i]) { + out[i]->share_meta(*x[i]); + } } } From 8395a96b58cefb02ab5ef241bca7de9350fa48b6 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Mon, 9 May 2022 10:41:35 +0000 Subject: [PATCH 09/16] fix bug --- .../final_state_generator/eager_gen.py | 53 ++++++++++--------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 57f9915c91213..a2a573f07d875 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -1226,6 +1226,12 @@ def GenerateNodeDefinition(self, grad_node_creation_str, backward_attrs_list = self.backward_attrs_list indent = GetIndent(1) + print("########## backward_api_name: ", backward_api_name) + print("########## backward_forward_inputs_map: ", + backward_forward_inputs_map) + print("########## backward_grad_inputs_map: ", backward_grad_inputs_map) + print("########## grad_node_out_list: ", grad_node_out_list) + # Construct grad_api function args # Order: TensorWrappers, GradTensors, Attributes grad_api_args_len = len(backward_forward_inputs_map.keys()) + len( @@ -1343,42 +1349,41 @@ def GenerateNodeDefinition(self, grad_node_creation_str, for name, (ttype, pos, grad_api_position) in backward_grad_inputs_map.items(): transformed_tensor_name = self.TransformToNextGradName(name) - - input_autograd_meta_name = GetAutoGradMetaName( - transformed_tensor_name) - if IsPlainTensorType(ttype): - input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});" - else: - assert IsVectorTensorType(ttype) - input_autograd_meta_vec_name = GetAutoGradMetaVectorName( + if transformed_tensor_name in grad_node_out_list: + input_autograd_meta_name = GetAutoGradMetaName( transformed_tensor_name) - input_autograd_meta = f"{indent}std::vector {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n" - input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" + if IsPlainTensorType(ttype): + input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});" + else: + assert IsVectorTensorType(ttype) + input_autograd_meta_vec_name = GetAutoGradMetaVectorName( + transformed_tensor_name) + input_autograd_meta = f"{indent}std::vector {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n" + input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" - inputs_autograd_meta_list.append(input_autograd_meta) - if name in grad_node_out_list: + inputs_autograd_meta_list.append(input_autograd_meta) compute_require_grad_args_list.append( input_autograd_meta_name) # 2. Get TensorWrapper AutoGradMeta for name, (ttype, _, pos), in backward_forward_inputs_map.items(): transformed_tensor_name = self.TransformToNextGradName(name) - - input_autograd_meta_name = GetAutoGradMetaName( - transformed_tensor_name) - if IsPlainTensorType(ttype): - input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});" - else: - assert IsVectorTensorType(ttype) - input_autograd_meta_vec_name = GetAutoGradMetaVectorName( + if transformed_tensor_name in grad_node_out_list: + input_autograd_meta_name = GetAutoGradMetaName( transformed_tensor_name) - input_autograd_meta = f"{indent}std::vector {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n" - input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" + if IsPlainTensorType(ttype): + input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});" + else: + assert IsVectorTensorType(ttype) + input_autograd_meta_vec_name = GetAutoGradMetaVectorName( + transformed_tensor_name) + input_autograd_meta = f"{indent}std::vector {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n" + input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" - inputs_autograd_meta_list.append(input_autograd_meta) - if name in grad_node_out_list: + inputs_autograd_meta_list.append(input_autograd_meta) compute_require_grad_args_list.append( input_autograd_meta_name) + inputs_autograd_meta_str = "\n".join(inputs_autograd_meta_list) compute_require_grad_args_str = ",".join( compute_require_grad_args_list) From 8bed60d4cddc2dde8b19f493972904413419d912 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Tue, 10 May 2022 07:19:29 +0000 Subject: [PATCH 10/16] fix multiply_double_grad bug --- .../final_state_generator/eager_gen.py | 6 ------ paddle/phi/api/lib/CMakeLists.txt | 1 + .../kernels/impl/elementwise_grad_kernel_impl.h | 14 ++++++++++++++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index a2a573f07d875..4a131a8688e14 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -1226,12 +1226,6 @@ def GenerateNodeDefinition(self, grad_node_creation_str, backward_attrs_list = self.backward_attrs_list indent = GetIndent(1) - print("########## backward_api_name: ", backward_api_name) - print("########## backward_forward_inputs_map: ", - backward_forward_inputs_map) - print("########## backward_grad_inputs_map: ", backward_grad_inputs_map) - print("########## grad_node_out_list: ", grad_node_out_list) - # Construct grad_api function args # Order: TensorWrappers, GradTensors, Attributes grad_api_args_len = len(backward_forward_inputs_map.keys()) + len( diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt index b195ed1aefadc..ddeb073046bf1 100644 --- a/paddle/phi/api/lib/CMakeLists.txt +++ b/paddle/phi/api/lib/CMakeLists.txt @@ -74,6 +74,7 @@ add_custom_command( COMMAND ${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${api_yaml_file} --api_header_path ${api_header_file_tmp} + --api_header_path ${api_header_file_tmp} --api_source_path ${api_source_file_tmp} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp} ${api_header_file} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp} ${api_source_file} diff --git a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h index fa1f15672b903..33a5d0cd4461b 100644 --- a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h @@ -513,6 +513,20 @@ void MultiplyDoubleGradKernel(const Context& dev_ctx, funcs::InverseMultiplyFunctor>( dev_ctx, dout, ddy_safe, dx, axis); } + } else { + if (dx && dy) { + phi::funcs::ElemwiseGradCompute, MulGradDY>( + dev_ctx, + ddx_safe, + ddy_safe, + dout, + dout, + axis, + dx, + dy, + MulGradDX(), + MulGradDY()); + } } } From 2cbc708dc0648b50c1f081dc3310625b96013eee Mon Sep 17 00:00:00 2001 From: zyfncg Date: Thu, 12 May 2022 02:32:30 +0000 Subject: [PATCH 11/16] fix bug of higher order derivative --- .../final_state_generator/codegen_utils.py | 9 +++--- .../final_state_generator/eager_gen.py | 2 +- paddle/fluid/eager/grad_node_info.cc | 2 ++ paddle/fluid/eager/utils.cc | 32 +++++++++---------- paddle/phi/api/lib/kernel_dispatch.h | 7 +++- paddle/phi/kernels/activation_grad_kernel.h | 3 +- paddle/phi/kernels/funcs/activation_functor.h | 17 +++++++--- .../phi/kernels/impl/activation_grad_impl.h | 8 ++--- python/paddle/utils/code_gen/backward.yaml | 1 + 9 files changed, 48 insertions(+), 33 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py index 9ad628ef515b1..6a2df6cdcdfde 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py @@ -24,10 +24,11 @@ ops_to_fill_zero_for_empty_grads = set([ "split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad", "sigmoid_double_grad", "sigmoid_triple_grad", "add_double_grad", - "add_triple_grad", "multiply_double_grad", "multiply_triple_grad", - "conv2d_grad_grad", "batch_norm_double_grad", "tanh_double_grad", - "tanh_triple_grad", "subtract_double_grad", "divide_double_grad", - "log_double_grad", "elu_double_grad", "leaky_relu_double_grad" + "add_triple_grad", "multiply_grad", "multiply_double_grad", + "multiply_triple_grad", "conv2d_grad_grad", "batch_norm_double_grad", + "tanh_double_grad", "tanh_triple_grad", "subtract_double_grad", + "divide_double_grad", "log_double_grad", "elu_double_grad", + "leaky_relu_double_grad" ]) # For API dispatch used at python-level diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 4a131a8688e14..a4a4e7e81ac67 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -1396,7 +1396,7 @@ def GenerateNodeDefinition(self, grad_node_creation_str, if IsPlainTensorType(rtype): output_autograd_meta = f""" auto& {transformed_tensor_name} = returns[{pos}][0]; - egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});""" + egr::AutogradMeta* {output_autograd_meta_name} = returns[{pos}][0].initialized() ? egr::EagerUtils::autograd_meta(&{transformed_tensor_name}) : nullptr;""" else: assert IsVectorTensorType(rtype) diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 2d4db8cb52974..af387bb3238d1 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -218,6 +218,8 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in, // Set Stop_gradient if (fwd_in_meta) { meta.SetStopGradient(fwd_in_meta->StopGradient()); + } else { + meta.SetStopGradient(true); } // Set Adj Edges if (fwd_in_meta && !fwd_in_meta->StopGradient()) { diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 033af5c496c98..7917012f160ad 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -165,12 +165,14 @@ void EagerUtils::SetHistory(std::vector* autograd_metas, void EagerUtils::SetHistory(AutogradMeta* autograd_meta, const std::shared_ptr& grad_node) { - if (autograd_meta->GradNode()) { - VLOG(7) << "Should not set grad node twice, original node is:" - << autograd_meta->GradNode()->name() - << "current is: " << grad_node->name(); + if (autograd_meta) { + if (autograd_meta->GradNode()) { + VLOG(7) << "Should not set grad node twice, original node is:" + << autograd_meta->GradNode()->name() + << "current is: " << grad_node->name(); + } + autograd_meta->SetGradNode(grad_node); } - autograd_meta->SetGradNode(grad_node); } void EagerUtils::SetOutRankWithSlot(std::vector* targets, @@ -181,7 +183,7 @@ void EagerUtils::SetOutRankWithSlot(std::vector* targets, } } void EagerUtils::SetOutRankWithSlot(AutogradMeta* target, size_t slot_id) { - target->SetSingleOutRankWithSlot(slot_id, 0); + if (target) target->SetSingleOutRankWithSlot(slot_id, 0); } std::shared_ptr EagerUtils::TrySyncToVar( @@ -450,17 +452,13 @@ void EagerUtils::FillZeroForEmptyGradInputs( paddle::experimental::Tensor& grad = (*in_grads)[i][j]; if (!grad.initialized()) { const GradSlotMeta& grad_in_meta = grad_in_metas[i][j]; - PADDLE_ENFORCE( - grad_in_meta.HasTensorMeta(), - paddle::platform::errors::Fatal( - "Unable to fill empty grad inputs due to empty GradSlotMeta")); - - const auto& tensor_meta = grad_in_meta.GetTensorMeta(); - phi::Place place = grad_in_meta.GetPlace(); - - auto tensor_with_zero = paddle::experimental::full( - phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, place); - grad.set_impl(tensor_with_zero.impl()); + if (grad_in_meta.HasTensorMeta()) { + const auto& tensor_meta = grad_in_meta.GetTensorMeta(); + auto tensor_with_zero = paddle::experimental::full( + phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, + grad_in_meta.GetPlace()); + grad.set_impl(tensor_with_zero.impl()); + } } } } diff --git a/paddle/phi/api/lib/kernel_dispatch.h b/paddle/phi/api/lib/kernel_dispatch.h index 9f2ad6c62c7cf..29254a0486d00 100644 --- a/paddle/phi/api/lib/kernel_dispatch.h +++ b/paddle/phi/api/lib/kernel_dispatch.h @@ -109,7 +109,12 @@ struct KernelKeyParser : ArgsIterator { } } - void operator()(const Tensor& x) { AssignKernelKeySet(*x.impl()); } + void operator()(const Tensor& x) { + const auto* tensor = x.impl().get(); + if (tensor) { + AssignKernelKeySet(*tensor); + } + } void operator()(const std::vector& x) { const phi::TensorBase& tensor = *x.at(0).impl(); diff --git a/paddle/phi/kernels/activation_grad_kernel.h b/paddle/phi/kernels/activation_grad_kernel.h index fd42756ba3867..e5b083ac13c44 100644 --- a/paddle/phi/kernels/activation_grad_kernel.h +++ b/paddle/phi/kernels/activation_grad_kernel.h @@ -17,6 +17,7 @@ limitations under the License. */ #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/infermeta/unary.h" +#include "paddle/utils/optional.h" namespace phi { @@ -136,7 +137,7 @@ void SigmoidTripleGradKernel(const Context& dev_ctx, const DenseTensor& dout, const DenseTensor& ddx, const DenseTensor& d_dout_new, - const DenseTensor& d_ddout, + paddle::optional d_ddout, DenseTensor* d_out_new, DenseTensor* d_dout, DenseTensor* d_ddx); diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h index b75477a1af982..315d540541f92 100644 --- a/paddle/phi/kernels/funcs/activation_functor.h +++ b/paddle/phi/kernels/funcs/activation_functor.h @@ -1428,16 +1428,19 @@ struct SigmoidTripleGradFunctor : public BaseActivationFunctor { GET_DATA_SAFELY(Out, "Input", "Out", "SigmoidTripleGrad")); auto dout = EigenVector::Flatten( GET_DATA_SAFELY(dOut, "Input", "DOut", "SigmoidTripleGrad")); - auto d_ddOut = EigenVector::Flatten( - GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "SigmoidTripleGrad")); auto d_dOutNew = EigenVector::Flatten(GET_DATA_SAFELY( d_dOut_New, "Input", "D_DOut_New", "SigmoidTripleGrad")); if (d_Out_New) { auto d_OutNew = EigenVector::Flatten(GET_DATA_SAFELY( d_Out_New, "Output", "D_OutNew", "SigmoidTripleGrad")); - d_OutNew.device(*d) = (ddx - static_cast(2) * out * ddx) * d_ddOut - - static_cast(2) * dout * ddx * d_dOutNew; + d_OutNew.device(*d) = -static_cast(2) * dout * ddx * d_dOutNew; + if (d_DDOut) { + auto d_ddOut = EigenVector::Flatten( + GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "SigmoidTripleGrad")); + d_OutNew.device(*d) = + (ddx - static_cast(2) * out * ddx) * d_ddOut + d_OutNew; + } } if (d_d_Out) { auto d_dOut = EigenVector::Flatten( @@ -1449,8 +1452,12 @@ struct SigmoidTripleGradFunctor : public BaseActivationFunctor { auto d_ddx = EigenVector::Flatten( GET_DATA_SAFELY(d_DDx, "Output", "D_DDx", "SigmoidTripleGrad")); d_ddx.device(*d) = - (static_cast(1) - out) * out * d_ddOut + (static_cast(1) - static_cast(2) * out) * dout * d_dOutNew; + if (d_DDOut) { + auto d_ddOut = EigenVector::Flatten( + GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "SigmoidTripleGrad")); + d_ddx.device(*d) = d_ddx + (static_cast(1) - out) * out * d_ddOut; + } } } static constexpr ActBwdOpFwdDeps FwdDeps() { diff --git a/paddle/phi/kernels/impl/activation_grad_impl.h b/paddle/phi/kernels/impl/activation_grad_impl.h index 2f35acc095085..12fcac7d62d31 100644 --- a/paddle/phi/kernels/impl/activation_grad_impl.h +++ b/paddle/phi/kernels/impl/activation_grad_impl.h @@ -265,7 +265,7 @@ void SigmoidTripleGradKernel(const Context& dev_ctx, const DenseTensor& dout, const DenseTensor& ddx, const DenseTensor& d_dout_new, - const DenseTensor& d_ddout, + paddle::optional d_ddout, DenseTensor* d_out_new, DenseTensor* d_dout, DenseTensor* d_ddx) { @@ -274,11 +274,11 @@ void SigmoidTripleGradKernel(const Context& dev_ctx, dev_ctx.template Alloc(d_dout); } if (d_out_new) { - d_dout->Resize(out.dims()); + d_out_new->Resize(out.dims()); dev_ctx.template Alloc(d_out_new); } if (d_ddx) { - d_dout->Resize(ddx.dims()); + d_ddx->Resize(ddx.dims()); dev_ctx.template Alloc(d_ddx); } funcs::SigmoidTripleGradFunctor functor; @@ -286,7 +286,7 @@ void SigmoidTripleGradKernel(const Context& dev_ctx, &out, &ddx, &dout, - &d_ddout, + d_ddout.get_ptr(), &d_dout_new, d_dout, d_out_new, diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index abffd0a68b406..484e3b271020c 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -1615,6 +1615,7 @@ param : [out, fwd_grad_out, grad_grad_x] kernel : func : sigmoid_triple_grad + optional : grad_grad_out_grad - backward_api : silu_grad forward : silu (Tensor x) -> Tensor(out) From 6d0913d879f7fdeaead3e9c11dc5b6c38636b0b9 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Fri, 13 May 2022 04:29:40 +0000 Subject: [PATCH 12/16] fix bug of FillZeroForEmptyGradInput --- .../final_state_generator/eager_gen.py | 20 ++++++-- paddle/fluid/eager/utils.cc | 50 ++++++++++++++++--- paddle/fluid/eager/utils.h | 7 +++ 3 files changed, 65 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index a4a4e7e81ac67..010493f81bd56 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -329,14 +329,14 @@ class {} : public egr::GradNodeBase {{ CREATE_PLAIN_OPTIONAL_TENSOR_TEMPLATE = \ """ - paddle::optional {}_optional = paddle::none; - if({}.initialized()) {}_optional = paddle::make_optional({}); + paddle::optional {}_optional = paddle::none; + if({}.initialized()) {}_optional = paddle::make_optional({}); """ CREATE_RECOVER_OPTIONAL_TENSOR_TEMPLATE = \ """ - paddle::optional {}_optional = paddle::none; - if( {}.impl() ) {}_optional = paddle::make_optional({}); + paddle::optional {}_optional = paddle::none; + if( {}.impl() ) {}_optional = paddle::make_optional({}); """ @@ -1236,7 +1236,17 @@ def GenerateNodeDefinition(self, grad_node_creation_str, # Fill Grad Ins with Zero fill_zero_str = "" if backward_api_name in ops_to_fill_zero_for_empty_grads: - fill_zero_str = f"{indent}egr::EagerUtils::FillZeroForEmptyGradInputs(&grads, this->InputMeta());\n" + fill_zero_str = f"{indent}const auto& input_metas = this->InputMeta();\n" + for name, (ttype, fwd_position, + grad_api_position) in backward_grad_inputs_map.items(): + if name in self.optional_inputs: + if IsPlainTensorType(ttype): + fill_zero_str += f"{indent}egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[{fwd_position}][0], input_metas[{fwd_position}][0]);\n" + else: + if IsPlainTensorType(ttype): + fill_zero_str += f"{indent}egr::EagerUtils::FillZeroForEmptyGradInput(&grads[{fwd_position}][0], input_metas[{fwd_position}][0]);\n" + else: + fill_zero_str += f"{indent}egr::EagerUtils::FillZeroForEmptyGradInput(&grads[{fwd_position}], input_metas[{fwd_position}]);\n" # Grad Ins from TensorWrappers for name, (_, is_fwd_input, diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index d95f09dbb0cad..b22a5c829499d 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -452,16 +452,52 @@ void EagerUtils::FillZeroForEmptyGradInputs( paddle::experimental::Tensor& grad = (*in_grads)[i][j]; if (!grad.initialized()) { const GradSlotMeta& grad_in_meta = grad_in_metas[i][j]; - if (grad_in_meta.HasTensorMeta()) { - const auto& tensor_meta = grad_in_meta.GetTensorMeta(); - auto tensor_with_zero = paddle::experimental::full( - phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, - grad_in_meta.GetPlace()); - grad.set_impl(tensor_with_zero.impl()); - } + PADDLE_ENFORCE( + grad_in_meta.HasTensorMeta(), + paddle::platform::errors::Fatal( + "Unable to fill empty grad inputs due to empty GradSlotMeta")); + const auto& tensor_meta = grad_in_meta.GetTensorMeta(); + auto tensor_with_zero = paddle::experimental::full( + phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, + grad_in_meta.GetPlace()); + grad.set_impl(tensor_with_zero.impl()); } } } } +void EagerUtils::FillZeroForEmptyGradInput( + paddle::experimental::Tensor* in_grad, const GradSlotMeta& grad_in_meta) { + if (!in_grad->initialized()) { + PADDLE_ENFORCE( + grad_in_meta.HasTensorMeta(), + paddle::platform::errors::Fatal( + "Unable to fill empty grad inputs due to empty GradSlotMeta")); + const auto& tensor_meta = grad_in_meta.GetTensorMeta(); + auto tensor_with_zero = + paddle::experimental::full(phi::vectorize(tensor_meta.dims), 0.0, + tensor_meta.dtype, grad_in_meta.GetPlace()); + in_grad->set_impl(tensor_with_zero.impl()); + } +} + +void EagerUtils::FillZeroForEmptyOptionalGradInput( + paddle::experimental::Tensor* in_grad, const GradSlotMeta& grad_in_meta) { + if (!in_grad->initialized() && grad_in_meta.HasTensorMeta()) { + const auto& tensor_meta = grad_in_meta.GetTensorMeta(); + auto tensor_with_zero = + paddle::experimental::full(phi::vectorize(tensor_meta.dims), 0.0, + tensor_meta.dtype, grad_in_meta.GetPlace()); + in_grad->set_impl(tensor_with_zero.impl()); + } +} + +void EagerUtils::FillZeroForEmptyGradInput( + std::vector* in_grads, + const std::vector& grad_in_metas) { + for (size_t i = 0; i < in_grads->size(); i++) { + FillZeroForEmptyGradInput(&in_grads->at(i), grad_in_metas[i]); + } +} + } // namespace egr diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h index ef2b1baac661b..b96244f0d138b 100644 --- a/paddle/fluid/eager/utils.h +++ b/paddle/fluid/eager/utils.h @@ -238,6 +238,13 @@ class EagerUtils { kSlotSmallVectorSize>* out_grads, const paddle::small_vector, kSlotSmallVectorSize>& grad_out_metas); + static void FillZeroForEmptyGradInput(paddle::experimental::Tensor* in_grad, + const GradSlotMeta& grad_in_meta); + static void FillZeroForEmptyOptionalGradInput( + paddle::experimental::Tensor* in_grad, const GradSlotMeta& grad_in_meta); + static void FillZeroForEmptyGradInput( + std::vector* in_grads, + const std::vector& grad_in_metas); }; } // namespace egr From 53bb8124f11f07950bc1fdb0eece92318fe93a27 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Fri, 13 May 2022 05:33:04 +0000 Subject: [PATCH 13/16] remove redundant vector in grad_node --- .../final_state_generator/eager_gen.py | 53 ++++++++++--------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 010493f81bd56..cf6baca8afd3d 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -1303,43 +1303,46 @@ def GenerateNodeDefinition(self, grad_node_creation_str, get_grad_in_args_str = "\n".join(get_grad_in_args_list) - # Grad Outputs - for name, (ttype, fwd_position, - grad_api_position) in backward_grad_outputs_map.items(): - transformed_tensor_name = self.TransformToNextGradName(name) - if IsPlainTensorType(ttype): - grad_api_args.append(f"api_output[{fwd_position}][0]") - else: - assert IsVectorTensorType(ttype) - grad_api_args.append(f"api_output[{fwd_position}]") - - grad_api_args_str = ", ".join(grad_api_args) - # Grad Function Call String slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys()) grad_api_namespace = f"paddle::experimental::{namespace}" grad_function_call_str = f""" const auto& out_metas = OutputMeta(); paddle::small_vector, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs}); - paddle::small_vector, egr::kSlotSmallVectorSize> api_output({slot_num_bwd_outputs}); for (int i = 0; i < {slot_num_bwd_outputs}; ++i) {{ returns[i].resize(out_metas[i].size()); - if(returns[i].size() == 0) {{ - api_output[i].reserve(1); - api_output[i].push_back(nullptr); - continue; - }} - api_output[i].reserve(returns[i].size()); - for (size_t j = 0; j < returns[i].size(); ++j) {{ - if (out_metas[i][j].IsStopGradient()) {{ - api_output[i].push_back(nullptr); - }} else {{ - api_output[i].push_back(&returns[i][j]); - }} + }} +""" + + # Grad Outputs + out_index = -1 + for name, (ttype, fwd_position, + grad_api_position) in backward_grad_outputs_map.items(): + transformed_tensor_name = self.TransformToNextGradName(name) + out_index = out_index + 1 + grad_api_args.append(f"api_output_{out_index}") + + if IsPlainTensorType(ttype): + grad_function_call_str += f""" + auto* api_output_{out_index} = out_metas[{fwd_position}][0].IsStopGradient() ? nullptr : &returns[{fwd_position}][0]; +""" + + else: + assert IsVectorTensorType(ttype) + grad_function_call_str += f""" + std::vector api_output_{out_index}; + api_output_{out_index}.reserve(returns[{fwd_position}].size()); + for (size_t i = 0; i < returns[{fwd_position}].size(); ++i) {{ + if (out_metas[{fwd_position}][i].IsStopGradient()) {{ + api_output_{out_index}.push_back(nullptr); + }} else {{ + api_output_{out_index}.push_back(&returns[{fwd_position}][i]); }} }} """ + grad_api_args_str = ", ".join(grad_api_args) + grad_function_call_str = grad_function_call_str + f"{indent}{grad_api_namespace}{backward_api_name}({grad_api_args_str});" # Prepare for Node Creation if Necessary From 2ab5b8243456f05d1d2d8319eece79833ec9dab6 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Fri, 13 May 2022 06:39:03 +0000 Subject: [PATCH 14/16] fix bug of test_deformable_conv_v1_op --- .../auto_code_generator/final_state_generator/eager_gen.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index cf6baca8afd3d..51e71580275d8 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -1324,8 +1324,7 @@ def GenerateNodeDefinition(self, grad_node_creation_str, if IsPlainTensorType(ttype): grad_function_call_str += f""" - auto* api_output_{out_index} = out_metas[{fwd_position}][0].IsStopGradient() ? nullptr : &returns[{fwd_position}][0]; -""" + auto* api_output_{out_index} = (out_metas[{fwd_position}].empty() || out_metas[{fwd_position}][0].IsStopGradient()) ? nullptr : &returns[{fwd_position}][0];""" else: assert IsVectorTensorType(ttype) @@ -1338,8 +1337,7 @@ def GenerateNodeDefinition(self, grad_node_creation_str, }} else {{ api_output_{out_index}.push_back(&returns[{fwd_position}][i]); }} - }} -""" + }}""" grad_api_args_str = ", ".join(grad_api_args) From 28d45b1e7f3f3abd75a75420594576f43dec3170 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Fri, 13 May 2022 06:48:35 +0000 Subject: [PATCH 15/16] fix bug of test_deformable_conv_v1_op --- .../auto_code_generator/final_state_generator/eager_gen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 51e71580275d8..d47ec7d7581c5 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -1332,7 +1332,7 @@ def GenerateNodeDefinition(self, grad_node_creation_str, std::vector api_output_{out_index}; api_output_{out_index}.reserve(returns[{fwd_position}].size()); for (size_t i = 0; i < returns[{fwd_position}].size(); ++i) {{ - if (out_metas[{fwd_position}][i].IsStopGradient()) {{ + if (out_metas[{fwd_position}].empty() || out_metas[{fwd_position}][i].IsStopGradient()) {{ api_output_{out_index}.push_back(nullptr); }} else {{ api_output_{out_index}.push_back(&returns[{fwd_position}][i]); From 4390cd0216b0b03d479775a8034e317a6bfcd558 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Tue, 17 May 2022 07:20:58 +0000 Subject: [PATCH 16/16] some refacotr --- .../final_state_generator/eager_gen.py | 12 +++++++++--- paddle/fluid/eager/utils.cc | 14 ++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index d47ec7d7581c5..2ffb22e0c0419 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -729,8 +729,13 @@ def GenerateNodeCreationCodes(self): num_outputs = len(forward_outputs_position_map.keys()) for name, (_, pos) in forward_outputs_position_map.items(): output_autograd_meta_name = GetAutoGradMetaName(name) - set_out_rank = f"{indent}egr::EagerUtils::SetOutRankWithSlot({output_autograd_meta_name}, {pos});" - set_history = f"{indent}egr::EagerUtils::SetHistory({output_autograd_meta_name}, grad_node);" + set_out_rank = f"""{indent}if ({output_autograd_meta_name}) {{ +{indent} egr::EagerUtils::SetOutRankWithSlot({output_autograd_meta_name}, {pos}); +{indent}}}""" + + set_history = f"""{indent}if ({output_autograd_meta_name}) {{ +{indent} egr::EagerUtils::SetHistory({output_autograd_meta_name}, grad_node); +{indent}}}""" set_retain_grad = f"{indent}egr::EagerUtils::CheckAndRetainGrad({name});" set_grad_in_meta = f"{indent}grad_node->SetGradInMeta({name}, {pos});" @@ -1341,7 +1346,8 @@ def GenerateNodeDefinition(self, grad_node_creation_str, grad_api_args_str = ", ".join(grad_api_args) - grad_function_call_str = grad_function_call_str + f"{indent}{grad_api_namespace}{backward_api_name}({grad_api_args_str});" + grad_function_call_str = grad_function_call_str + f""" +{indent}{grad_api_namespace}{backward_api_name}({grad_api_args_str});""" # Prepare for Node Creation if Necessary inputs_autograd_meta_str = "" diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index b22a5c829499d..f253c4cb51380 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -165,14 +165,12 @@ void EagerUtils::SetHistory(std::vector* autograd_metas, void EagerUtils::SetHistory(AutogradMeta* autograd_meta, const std::shared_ptr& grad_node) { - if (autograd_meta) { - if (autograd_meta->GradNode()) { - VLOG(7) << "Should not set grad node twice, original node is:" - << autograd_meta->GradNode()->name() - << "current is: " << grad_node->name(); - } - autograd_meta->SetGradNode(grad_node); + if (autograd_meta->GradNode()) { + VLOG(7) << "Should not set grad node twice, original node is:" + << autograd_meta->GradNode()->name() + << "current is: " << grad_node->name(); } + autograd_meta->SetGradNode(grad_node); } void EagerUtils::SetOutRankWithSlot(std::vector* targets, @@ -183,7 +181,7 @@ void EagerUtils::SetOutRankWithSlot(std::vector* targets, } } void EagerUtils::SetOutRankWithSlot(AutogradMeta* target, size_t slot_id) { - if (target) target->SetSingleOutRankWithSlot(slot_id, 0); + target->SetSingleOutRankWithSlot(slot_id, 0); } std::shared_ptr EagerUtils::TrySyncToVar(