diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc index add0359ccf25d..d27ca1d242953 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc @@ -94,7 +94,7 @@ paddle::Tensor add_n_ad_func(const std::vector& x) { // SetAttributes if needed // Set TensorWrappers for Forward Inputs if needed - grad_node->SetTensorWrapperx(x); + grad_node->SetTensorWrapper_x(x); // SetGradOutMeta & SetEdges grad_node->SetGradOutMeta(x, 0); // SetOutRank & SetHistory & SetGradInMeta & RetainGrad diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc index 33e9393a615bc..7cf3ee807b685 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc +++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc @@ -146,15 +146,15 @@ paddle::Tensor conv2d_ad_func(const paddle::Tensor& input, } // SetAttributes if needed - grad_node->SetAttributestrides(strides); - grad_node->SetAttributepaddings(paddings); - grad_node->SetAttributepadding_algorithm(padding_algorithm); - grad_node->SetAttributegroups(groups); - grad_node->SetAttributedilations(dilations); - grad_node->SetAttributedata_format(data_format); + grad_node->SetAttribute_strides(strides); + grad_node->SetAttribute_paddings(paddings); + grad_node->SetAttribute_padding_algorithm(padding_algorithm); + grad_node->SetAttribute_groups(groups); + grad_node->SetAttribute_dilations(dilations); + grad_node->SetAttribute_data_format(data_format); // Set TensorWrappers for Forward Inputs if needed - grad_node->SetTensorWrapperinput(input); - grad_node->SetTensorWrapperfilter(filter); + grad_node->SetTensorWrapper_input(input); + grad_node->SetTensorWrapper_filter(filter); // SetGradOutMeta & SetEdges grad_node->SetGradOutMeta(input, 0); grad_node->SetGradOutMeta(filter, 1); diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc index 18e36264ebe6b..856407c58e96c 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc @@ -157,25 +157,25 @@ paddle::Tensor multiply_ad_func(const paddle::Tensor& x, grad_node->SetForwardTrace(egr::Controller::Instance().GetPythonStack()); } // SetAttributes if needed - grad_node->SetAttributeaxis(-1); + grad_node->SetAttribute_axis(-1); if (paddle::platform::is_gpu_place(x.place())) { if (x_autograd_meta != nullptr && x_autograd_meta->StopGradient() && y_autograd_meta != nullptr && !y_autograd_meta->StopGradient()) { - grad_node->SetTensorWrapperx(x); - grad_node->SetTensorWrapperNoNeedBuffery(y); + grad_node->SetTensorWrapper_x(x); + grad_node->SetTensorWrapperNoNeedBuffer_y(y); } else if (x_autograd_meta != nullptr && !x_autograd_meta->StopGradient() && y_autograd_meta != nullptr && y_autograd_meta->StopGradient()) { - grad_node->SetTensorWrapperNoNeedBufferx(x); - grad_node->SetTensorWrappery(y); + grad_node->SetTensorWrapperNoNeedBuffer_x(x); + grad_node->SetTensorWrapper_y(y); } else { - grad_node->SetTensorWrapperx(x); - grad_node->SetTensorWrappery(y); + grad_node->SetTensorWrapper_x(x); + grad_node->SetTensorWrapper_y(y); } } else { - grad_node->SetTensorWrapperx(x); - grad_node->SetTensorWrappery(y); + grad_node->SetTensorWrapper_x(x); + grad_node->SetTensorWrapper_y(y); } // SetGradOutMeta & SetEdges grad_node->SetGradOutMeta(x, 0); @@ -300,11 +300,11 @@ paddle::Tensor& multiply__ad_func(paddle::Tensor& x, // NOLINT grad_node->SetForwardTrace(egr::Controller::Instance().GetPythonStack()); } // SetAttributes if needed - grad_node->SetAttributeaxis(-1); + grad_node->SetAttribute_axis(-1); // Set TensorWrappers for Forward Inputs if needed auto x_clone = paddle::experimental::assign(x); - grad_node->SetTensorWrapperx(x_clone); - grad_node->SetTensorWrappery(y); + grad_node->SetTensorWrapper_x(x_clone); + grad_node->SetTensorWrapper_y(y); } // Forward API Call @@ -505,8 +505,8 @@ paddle::Tensor multiply_ad_func(const paddle::Tensor& x, // SetAttributes if needed // Set TensorWrappers for Forward Inputs if needed - grad_node->SetTensorWrapperx(x); - grad_node->SetTensorWrappery(y); + grad_node->SetTensorWrapper_x(x); + grad_node->SetTensorWrapper_y(y); // SetGradOutMeta & SetEdges grad_node->SetGradOutMeta(x, 0); grad_node->SetGradOutMeta(y, 1); diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/reshard_fwd_func.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/reshard_fwd_func.cc index 5ee5d74094538..b227e2a06e68d 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/forwards/reshard_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/reshard_fwd_func.cc @@ -50,7 +50,7 @@ paddle::Tensor reshard_ad_function( std::shared_ptr(new ReshardGradNode(1, 1)); // NOLINT // Set TensorWrappers for Forward Inputs if needed - grad_node->SetTensorWrapperNoNeedBufferInput(input); + grad_node->SetTensorWrapperNoNeedBuffer_Input(input); } // Forward API Call diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/sync_batch_norm_fwd_func.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/sync_batch_norm_fwd_func.cc index 654ab2bfd73db..c4e007801c66c 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/forwards/sync_batch_norm_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/sync_batch_norm_fwd_func.cc @@ -235,16 +235,16 @@ sync_batch_norm__ad_func(const paddle::Tensor& x, egr::Controller::Instance().PushBackForceSequentialNodes(grad_node.get()); // SetAttributes if needed - grad_node->SetAttributemomentum(momentum); - grad_node->SetAttributeepsilon(epsilon); - grad_node->SetAttributedata_layout(data_layout); - grad_node->SetAttributeis_test(is_test); - grad_node->SetAttributeuse_global_stats(use_global_stats); - grad_node->SetAttributetrainable_statistics(trainable_statistics); + grad_node->SetAttribute_momentum(momentum); + grad_node->SetAttribute_epsilon(epsilon); + grad_node->SetAttribute_data_layout(data_layout); + grad_node->SetAttribute_is_test(is_test); + grad_node->SetAttribute_use_global_stats(use_global_stats); + grad_node->SetAttribute_trainable_statistics(trainable_statistics); // Set TensorWrappers for Forward Inputs if needed - grad_node->SetTensorWrapperx(x); - grad_node->SetTensorWrapperscale(scale); - grad_node->SetTensorWrapperbias(bias); + grad_node->SetTensorWrapper_x(x); + grad_node->SetTensorWrapper_scale(scale); + grad_node->SetTensorWrapper_bias(bias); // SetGradOutMeta & SetEdges grad_node->SetGradOutMeta(x, 0); grad_node->SetGradOutMeta(scale, 3); @@ -293,9 +293,9 @@ sync_batch_norm__ad_func(const paddle::Tensor& x, grad_node->SetGradInMeta(saved_variance, 4); grad_node->SetGradInMeta(reserve_space, 5); // Set TensorWrappers for Forward Outputs if needed - grad_node->SetTensorWrappersaved_mean(saved_mean); - grad_node->SetTensorWrappersaved_variance(saved_variance); - grad_node->SetTensorWrapperreserve_space(reserve_space); + grad_node->SetTensorWrapper_saved_mean(saved_mean); + grad_node->SetTensorWrapper_saved_variance(saved_variance); + grad_node->SetTensorWrapper_reserve_space(reserve_space); } VLOG(4) << "Finish AD API: sync_batch_norm_"; @@ -571,16 +571,16 @@ sync_batch_norm__ad_func(const paddle::Tensor& x, new SyncBatchNormGradNode(6, 5)); egr::Controller::Instance().PushBackForceSequentialNodes(grad_node.get()); // SetAttributes if needed - grad_node->SetAttributemomentum(momentum); - grad_node->SetAttributeepsilon(epsilon); - grad_node->SetAttributedata_layout(data_layout); - grad_node->SetAttributeis_test(is_test); - grad_node->SetAttributeuse_global_stats(use_global_stats); - grad_node->SetAttributetrainable_statistics(trainable_statistics); + grad_node->SetAttribute_momentum(momentum); + grad_node->SetAttribute_epsilon(epsilon); + grad_node->SetAttribute_data_layout(data_layout); + grad_node->SetAttribute_is_test(is_test); + grad_node->SetAttribute_use_global_stats(use_global_stats); + grad_node->SetAttribute_trainable_statistics(trainable_statistics); // Set TensorWrappers for Forward Inputs if needed - grad_node->SetTensorWrapperx(x); - grad_node->SetTensorWrapperscale(scale); - grad_node->SetTensorWrapperbias(bias); + grad_node->SetTensorWrapper_x(x); + grad_node->SetTensorWrapper_scale(scale); + grad_node->SetTensorWrapper_bias(bias); // SetGradOutMeta & SetEdges grad_node->SetGradOutMeta(x, 0); grad_node->SetGradOutMeta(scale, 3); @@ -629,9 +629,9 @@ sync_batch_norm__ad_func(const paddle::Tensor& x, grad_node->SetGradInMeta(saved_variance, 4); grad_node->SetGradInMeta(reserve_space, 5); // Set TensorWrappers for Forward Outputs if needed - grad_node->SetTensorWrappersaved_mean(saved_mean); - grad_node->SetTensorWrappersaved_variance(saved_variance); - grad_node->SetTensorWrapperreserve_space(reserve_space); + grad_node->SetTensorWrapper_saved_mean(saved_mean); + grad_node->SetTensorWrapper_saved_variance(saved_variance); + grad_node->SetTensorWrapper_reserve_space(reserve_space); } VLOG(4) << "Finish AD API: sync_batch_norm_"; diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc b/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc index e1bcc3bc73731..437cce80c919b 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc +++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc @@ -126,16 +126,16 @@ Conv2dGradNodeFinal::operator()( auto grad_node = std::shared_ptr( // NOLINT new Conv2dDoubleGradNodeFinal(2, 3)); // SetAttributes if needed - grad_node->SetAttributestrides(strides); - grad_node->SetAttributepaddings(paddings); - grad_node->SetAttributepadding_algorithm(padding_algorithm); - grad_node->SetAttributegroups(groups); - grad_node->SetAttributedilations(dilations); - grad_node->SetAttributedata_format(data_format); + grad_node->SetAttribute_strides(strides); + grad_node->SetAttribute_paddings(paddings); + grad_node->SetAttribute_padding_algorithm(padding_algorithm); + grad_node->SetAttribute_groups(groups); + grad_node->SetAttribute_dilations(dilations); + grad_node->SetAttribute_data_format(data_format); // Set TensorWrappers for Forward Inputs if needed - grad_node->SetTensorWrapperinput(input); - grad_node->SetTensorWrapperfilter(filter); - grad_node->SetTensorWrappergrad_out(grad_out); + grad_node->SetTensorWrapper_input(input); + grad_node->SetTensorWrapper_filter(filter); + grad_node->SetTensorWrapper_grad_out(grad_out); // SetGradOutMeta & SetEdges if (grad_filter_autograd_meta) { grad_node->SetGradOutMeta(input, 0); diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc b/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc index b3e38e066300d..56c1f1e61a7fc 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc +++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc @@ -167,11 +167,11 @@ MultiplyGradNode::operator()( auto grad_node = std::shared_ptr( // NOLINT new MultiplyDoubleGradNode(2, 3)); // SetAttributes if needed - grad_node->SetAttributeaxis(axis); + grad_node->SetAttribute_axis(axis); // Set TensorWrappers for Forward Inputs if needed - grad_node->SetTensorWrapperx(x); - grad_node->SetTensorWrappery(y); - grad_node->SetTensorWrappergrad_out(grad_out); + grad_node->SetTensorWrapper_x(x); + grad_node->SetTensorWrapper_y(y); + grad_node->SetTensorWrapper_grad_out(grad_out); // SetGradOutMeta & SetEdges grad_node->SetGradOutMeta(x, 0); grad_node->SetGradOutMeta(y, 1); diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h b/paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h index bc6d1d9f1a1b6..12274670827f6 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h +++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h @@ -48,28 +48,28 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase { } // SetTensorWrapperX, SetTensorWrapperY, ... - void SetTensorWrapperinput(const paddle::Tensor& input) { + void SetTensorWrapper_input(const paddle::Tensor& input) { input_ = egr::TensorWrapper(input, false); } - void SetTensorWrapperfilter(const paddle::Tensor& filter) { + void SetTensorWrapper_filter(const paddle::Tensor& filter) { filter_ = egr::TensorWrapper(filter, false); } // SetAttributes - void SetAttributestrides(const std::vector& strides) { + void SetAttribute_strides(const std::vector& strides) { strides_ = strides; } - void SetAttributepaddings(const std::vector& paddings) { + void SetAttribute_paddings(const std::vector& paddings) { paddings_ = paddings; } - void SetAttributepadding_algorithm(const std::string& padding_algorithm) { + void SetAttribute_padding_algorithm(const std::string& padding_algorithm) { padding_algorithm_ = padding_algorithm; } - void SetAttributegroups(const int& groups) { groups_ = groups; } - void SetAttributedilations(const std::vector& dilations) { + void SetAttribute_groups(const int& groups) { groups_ = groups; } + void SetAttribute_dilations(const std::vector& dilations) { dilations_ = dilations; } - void SetAttributedata_format(const std::string& data_format) { + void SetAttribute_data_format(const std::string& data_format) { data_format_ = data_format; } @@ -117,31 +117,31 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase { } // SetTensorWrapperX, SetTensorWrapperY, ... - void SetTensorWrapperinput(const paddle::Tensor& input) { + void SetTensorWrapper_input(const paddle::Tensor& input) { input_ = egr::TensorWrapper(input, false); } - void SetTensorWrapperfilter(const paddle::Tensor& filter) { + void SetTensorWrapper_filter(const paddle::Tensor& filter) { filter_ = egr::TensorWrapper(filter, false); } - void SetTensorWrappergrad_out(const paddle::Tensor& grad_out) { + void SetTensorWrapper_grad_out(const paddle::Tensor& grad_out) { grad_out_ = egr::TensorWrapper(grad_out, false); } // SetAttributes - void SetAttributestrides(const std::vector& strides) { + void SetAttribute_strides(const std::vector& strides) { strides_ = strides; } - void SetAttributepaddings(const std::vector& paddings) { + void SetAttribute_paddings(const std::vector& paddings) { paddings_ = paddings; } - void SetAttributepadding_algorithm(const std::string& padding_algorithm) { + void SetAttribute_padding_algorithm(const std::string& padding_algorithm) { padding_algorithm_ = padding_algorithm; } - void SetAttributegroups(const int& groups) { groups_ = groups; } - void SetAttributedilations(const std::vector& dilations) { + void SetAttribute_groups(const int& groups) { groups_ = groups; } + void SetAttribute_dilations(const std::vector& dilations) { dilations_ = dilations; } - void SetAttributedata_format(const std::string& data_format) { + void SetAttribute_data_format(const std::string& data_format) { data_format_ = data_format; } @@ -190,7 +190,7 @@ class AddNGradNodeFinal : public egr::GradNodeBase { } // SetTensorWrapperX, SetTensorWrapperY, ... - void SetTensorWrapperx(const std::vector& x) { + void SetTensorWrapper_x(const std::vector& x) { for (const auto& eager_tensor : x) { x_.emplace_back(egr::TensorWrapper(eager_tensor, true)); } @@ -233,22 +233,22 @@ class MultiplyGradNode : public egr::GradNodeBase { } // SetTensorWrapperX, SetTensorWrapperY, ... - void SetTensorWrapperx(const paddle::Tensor& x) { + void SetTensorWrapper_x(const paddle::Tensor& x) { x_ = egr::TensorWrapper(x, false); } - void SetTensorWrappery(const paddle::Tensor& y) { + void SetTensorWrapper_y(const paddle::Tensor& y) { y_ = egr::TensorWrapper(y, false); } - void SetTensorWrapperNoNeedBufferx(const paddle::Tensor& x) { + void SetTensorWrapperNoNeedBuffer_x(const paddle::Tensor& x) { x_ = egr::TensorWrapper(x, true); } - void SetTensorWrapperNoNeedBuffery(const paddle::Tensor& y) { + void SetTensorWrapperNoNeedBuffer_y(const paddle::Tensor& y) { y_ = egr::TensorWrapper(y, true); } // SetAttributes - void SetAttributeaxis(const int& axis) { axis_ = axis; } + void SetAttribute_axis(const int& axis) { axis_ = axis; } private: // TensorWrappers @@ -289,18 +289,18 @@ class MultiplyDoubleGradNode : public egr::GradNodeBase { } // SetTensorWrapperX, SetTensorWrapperY, ... - void SetTensorWrapperx(const paddle::Tensor& x) { + void SetTensorWrapper_x(const paddle::Tensor& x) { x_ = egr::TensorWrapper(x, false); } - void SetTensorWrappery(const paddle::Tensor& y) { + void SetTensorWrapper_y(const paddle::Tensor& y) { y_ = egr::TensorWrapper(y, false); } - void SetTensorWrappergrad_out(const paddle::Tensor& grad_out) { + void SetTensorWrapper_grad_out(const paddle::Tensor& grad_out) { grad_out_ = egr::TensorWrapper(grad_out, false); } // SetAttributes - void SetAttributeaxis(const int& axis) { axis_ = axis; } + void SetAttribute_axis(const int& axis) { axis_ = axis; } private: // TensorWrappers @@ -345,36 +345,36 @@ class SyncBatchNormGradNode : public egr::GradNodeBase { } // SetTensorWrapperX, SetTensorWrapperY, ... - void SetTensorWrapperx(const paddle::Tensor& x) { + void SetTensorWrapper_x(const paddle::Tensor& x) { x_ = egr::TensorWrapper(x, false); } - void SetTensorWrapperscale(const paddle::Tensor& scale) { + void SetTensorWrapper_scale(const paddle::Tensor& scale) { scale_ = egr::TensorWrapper(scale, false); } - void SetTensorWrapperbias(const paddle::Tensor& bias) { + void SetTensorWrapper_bias(const paddle::Tensor& bias) { bias_ = egr::TensorWrapper(bias, false); } - void SetTensorWrappersaved_mean(const paddle::Tensor& saved_mean) { + void SetTensorWrapper_saved_mean(const paddle::Tensor& saved_mean) { saved_mean_ = egr::TensorWrapper(saved_mean, false); } - void SetTensorWrappersaved_variance(const paddle::Tensor& saved_variance) { + void SetTensorWrapper_saved_variance(const paddle::Tensor& saved_variance) { saved_variance_ = egr::TensorWrapper(saved_variance, false); } - void SetTensorWrapperreserve_space(const paddle::Tensor& reserve_space) { + void SetTensorWrapper_reserve_space(const paddle::Tensor& reserve_space) { reserve_space_ = egr::TensorWrapper(reserve_space, false); } // SetAttributes - void SetAttributemomentum(const float& momentum) { momentum_ = momentum; } - void SetAttributeepsilon(const float& epsilon) { epsilon_ = epsilon; } - void SetAttributedata_layout(const std::string& data_layout) { + void SetAttribute_momentum(const float& momentum) { momentum_ = momentum; } + void SetAttribute_epsilon(const float& epsilon) { epsilon_ = epsilon; } + void SetAttribute_data_layout(const std::string& data_layout) { data_layout_ = data_layout; } - void SetAttributeis_test(const bool& is_test) { is_test_ = is_test; } - void SetAttributeuse_global_stats(const bool& use_global_stats) { + void SetAttribute_is_test(const bool& is_test) { is_test_ = is_test; } + void SetAttribute_use_global_stats(const bool& use_global_stats) { use_global_stats_ = use_global_stats; } - void SetAttributetrainable_statistics(const bool& trainable_statistics) { + void SetAttribute_trainable_statistics(const bool& trainable_statistics) { trainable_statistics_ = trainable_statistics; } @@ -434,7 +434,7 @@ class ReshardGradNode : public egr::GradNodeBase { // SetTensorWrapperX // Only input's meta is needed. - void SetTensorWrapperNoNeedBufferInput(const paddle::Tensor& input) { + void SetTensorWrapperNoNeedBuffer_Input(const paddle::Tensor& input) { input_ = egr::TensorWrapper(input, true); } @@ -477,36 +477,36 @@ class SyncBatchNormGradNode : public egr::GradNodeBase { } // SetTensorWrapperX, SetTensorWrapperY, ... - void SetTensorWrapperx(const paddle::Tensor& x) { + void SetTensorWrapper_x(const paddle::Tensor& x) { x_ = egr::TensorWrapper(x, false); } - void SetTensorWrapperscale(const paddle::Tensor& scale) { + void SetTensorWrapper_scale(const paddle::Tensor& scale) { scale_ = egr::TensorWrapper(scale, false); } - void SetTensorWrapperbias(const paddle::Tensor& bias) { + void SetTensorWrapper_bias(const paddle::Tensor& bias) { bias_ = egr::TensorWrapper(bias, false); } - void SetTensorWrappersaved_mean(const paddle::Tensor& saved_mean) { + void SetTensorWrapper_saved_mean(const paddle::Tensor& saved_mean) { saved_mean_ = egr::TensorWrapper(saved_mean, false); } - void SetTensorWrappersaved_variance(const paddle::Tensor& saved_variance) { + void SetTensorWrapper_saved_variance(const paddle::Tensor& saved_variance) { saved_variance_ = egr::TensorWrapper(saved_variance, false); } - void SetTensorWrapperreserve_space(const paddle::Tensor& reserve_space) { + void SetTensorWrapper_reserve_space(const paddle::Tensor& reserve_space) { reserve_space_ = egr::TensorWrapper(reserve_space, false); } // SetAttributes - void SetAttributemomentum(const float& momentum) { momentum_ = momentum; } - void SetAttributeepsilon(const float& epsilon) { epsilon_ = epsilon; } - void SetAttributedata_layout(const std::string& data_layout) { + void SetAttribute_momentum(const float& momentum) { momentum_ = momentum; } + void SetAttribute_epsilon(const float& epsilon) { epsilon_ = epsilon; } + void SetAttribute_data_layout(const std::string& data_layout) { data_layout_ = data_layout; } - void SetAttributeis_test(const bool& is_test) { is_test_ = is_test; } - void SetAttributeuse_global_stats(const bool& use_global_stats) { + void SetAttribute_is_test(const bool& is_test) { is_test_ = is_test; } + void SetAttribute_use_global_stats(const bool& use_global_stats) { use_global_stats_ = use_global_stats; } - void SetAttributetrainable_statistics(const bool& trainable_statistics) { + void SetAttribute_trainable_statistics(const bool& trainable_statistics) { trainable_statistics_ = trainable_statistics; } @@ -557,10 +557,10 @@ class MultiplyGradNode : public egr::GradNodeBase { } // SetTensorWrapperX, SetTensorWrapperY, ... - void SetTensorWrapperx(const paddle::Tensor& x) { + void SetTensorWrapper_x(const paddle::Tensor& x) { x_ = egr::TensorWrapper(x, false); } - void SetTensorWrappery(const paddle::Tensor& y) { + void SetTensorWrapper_y(const paddle::Tensor& y) { y_ = egr::TensorWrapper(y, false); } diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc index f3612c2830dd0..6130b79059f65 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc @@ -403,27 +403,27 @@ fused_attention_dygraph_function( grad_node->SetAttrMap(std::move(attrs)); grad_node->SetDefaultAttrMap(std::move(default_attrs)); - grad_node->SetTensorWrapperX(X); - grad_node->SetTensorWrapperQKVW(QKVW); - grad_node->SetTensorWrapperOutLinearW(OutLinearW); - grad_node->SetTensorWrapperQKVOut(QKVOut); - grad_node->SetTensorWrapperTransposeOut2(TransposeOut2); - grad_node->SetTensorWrapperQKOut(QKOut); - grad_node->SetTensorWrapperQKTVOut(QKTVOut); - grad_node->SetTensorWrapperSoftmaxOut(SoftmaxOut); - grad_node->SetTensorWrapperAttnDropoutMaskOut(AttnDropoutMaskOut); - grad_node->SetTensorWrapperAttnDropoutOut(AttnDropoutOut); - grad_node->SetTensorWrapperFMHAOut(FMHAOut); - grad_node->SetTensorWrapperOutLinearOut(OutLinearOut); - grad_node->SetTensorWrapperDropoutMaskOut(DropoutMaskOut); + grad_node->SetTensorWrapper_X(X); + grad_node->SetTensorWrapper_QKVW(QKVW); + grad_node->SetTensorWrapper_OutLinearW(OutLinearW); + grad_node->SetTensorWrapper_QKVOut(QKVOut); + grad_node->SetTensorWrapper_TransposeOut2(TransposeOut2); + grad_node->SetTensorWrapper_QKOut(QKOut); + grad_node->SetTensorWrapper_QKTVOut(QKTVOut); + grad_node->SetTensorWrapper_SoftmaxOut(SoftmaxOut); + grad_node->SetTensorWrapper_AttnDropoutMaskOut(AttnDropoutMaskOut); + grad_node->SetTensorWrapper_AttnDropoutOut(AttnDropoutOut); + grad_node->SetTensorWrapper_FMHAOut(FMHAOut); + grad_node->SetTensorWrapper_OutLinearOut(OutLinearOut); + grad_node->SetTensorWrapper_DropoutMaskOut(DropoutMaskOut); grad_node->SetGradOutMeta(X, 0); grad_node->SetGradOutMeta(QKVW, 3); grad_node->SetGradOutMeta(OutLinearW, 7); if (QKVBias.initialized()) { - grad_node->SetTensorWrapperQKVBias(QKVBias); - grad_node->SetTensorWrapperQKVBiasOut(QKVBiasOut); + grad_node->SetTensorWrapper_QKVBias(QKVBias); + grad_node->SetTensorWrapper_QKVBiasOut(QKVBiasOut); grad_node->SetGradOutMeta(QKVBias, 4); auto QKVBiasOut_accumulation_node = @@ -436,8 +436,8 @@ fused_attention_dygraph_function( } if (SrcMask.initialized()) { - grad_node->SetTensorWrapperSrcMask(SrcMask); - grad_node->SetTensorWrapperSrcMaskOut(SrcMaskOut); + grad_node->SetTensorWrapper_SrcMask(SrcMask); + grad_node->SetTensorWrapper_SrcMaskOut(SrcMaskOut); auto SrcMaskOut_accumulation_node = std::make_shared(p_autograd_SrcMaskOut); @@ -449,21 +449,21 @@ fused_attention_dygraph_function( } if (OutLinearBias.initialized()) { - grad_node->SetTensorWrapperOutLinearBias(OutLinearBias); + grad_node->SetTensorWrapper_OutLinearBias(OutLinearBias); grad_node->SetGradOutMeta(OutLinearBias, 8); } if (pre_layer_norm) { if (LnScale.initialized()) { - grad_node->SetTensorWrapperLnScale(LnScale); + grad_node->SetTensorWrapper_LnScale(LnScale); grad_node->SetGradOutMeta(LnScale, 1); } if (LnBias.initialized()) { - grad_node->SetTensorWrapperLnBias(LnBias); + grad_node->SetTensorWrapper_LnBias(LnBias); grad_node->SetGradOutMeta(LnBias, 2); } if (LnOut.initialized()) { - grad_node->SetTensorWrapperLnOut(LnOut); + grad_node->SetTensorWrapper_LnOut(LnOut); auto LnOut_accumulation_node = std::make_shared(p_autograd_LnOut); @@ -474,24 +474,24 @@ fused_attention_dygraph_function( grad_node->SetGradOutMeta(LnOut, 13); } if (LnMean.initialized()) { - grad_node->SetTensorWrapperLnMean(LnMean); + grad_node->SetTensorWrapper_LnMean(LnMean); } if (LnVariance.initialized()) { - grad_node->SetTensorWrapperLnVariance(LnVariance); + grad_node->SetTensorWrapper_LnVariance(LnVariance); } } else { if (Ln2Scale.initialized()) { - grad_node->SetTensorWrapperLn2Scale(Ln2Scale); + grad_node->SetTensorWrapper_Ln2Scale(Ln2Scale); grad_node->SetGradOutMeta(Ln2Scale, 9); } if (Ln2Bias.initialized()) { - grad_node->SetTensorWrapperLn2Bias(Ln2Bias); + grad_node->SetTensorWrapper_Ln2Bias(Ln2Bias); grad_node->SetGradOutMeta(Ln2Bias, 10); } - grad_node->SetTensorWrapperBiasDropoutResidualOut( + grad_node->SetTensorWrapper_BiasDropoutResidualOut( BiasDropoutResidualOut); - grad_node->SetTensorWrapperLn2Mean(Ln2Mean); - grad_node->SetTensorWrapperLn2Variance(Ln2Variance); + grad_node->SetTensorWrapper_Ln2Mean(Ln2Mean); + grad_node->SetTensorWrapper_Ln2Variance(Ln2Variance); auto BiasDropoutResidualOut_accumulation_node = std::make_shared( diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc index c76073ba0b574..b67d0b40b7d0d 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc @@ -193,15 +193,16 @@ fused_bias_dropout_residual_layer_norm_dygraph_function( grad_node->SetDefaultAttrMap(std::move(default_attrs)); // Set Tensor Wrappers - grad_node->SetTensorWrapperBias(Bias); - grad_node->SetTensorWrapperBiasDropoutResidualOut(BiasDropoutResidualOut); - grad_node->SetTensorWrapperDropoutMaskOut(DropoutMaskOut); - grad_node->SetTensorWrapperLnBias(LnBias); - grad_node->SetTensorWrapperLnMean(LnMean); - grad_node->SetTensorWrapperLnScale(LnScale); - grad_node->SetTensorWrapperLnVariance(LnVariance); - grad_node->SetTensorWrapperResidual(Residual); - grad_node->SetTensorWrapperX(X); + grad_node->SetTensorWrapper_Bias(Bias); + grad_node->SetTensorWrapper_BiasDropoutResidualOut( + BiasDropoutResidualOut); + grad_node->SetTensorWrapper_DropoutMaskOut(DropoutMaskOut); + grad_node->SetTensorWrapper_LnBias(LnBias); + grad_node->SetTensorWrapper_LnMean(LnMean); + grad_node->SetTensorWrapper_LnScale(LnScale); + grad_node->SetTensorWrapper_LnVariance(LnVariance); + grad_node->SetTensorWrapper_Residual(Residual); + grad_node->SetTensorWrapper_X(X); grad_node->SetGradOutMeta(X, 0); grad_node->SetGradOutMeta(Residual, 1); diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc index b2f5238c5be32..f3cfc39d17c7b 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc @@ -323,15 +323,15 @@ fused_feedforward_dygraph_function( grad_node->SetAttrMap(std::move(attrs)); grad_node->SetDefaultAttrMap(std::move(default_attrs)); - grad_node->SetTensorWrapperX(X); - grad_node->SetTensorWrapperLinear1Weight(Linear1Weight); - grad_node->SetTensorWrapperLinear1Bias(Linear1Bias); - grad_node->SetTensorWrapperLinear2Weight(Linear2Weight); - grad_node->SetTensorWrapperDropout1Mask(Dropout1Mask); - grad_node->SetTensorWrapperDropout2Mask(Dropout2Mask); - grad_node->SetTensorWrapperLinear1Out(Linear1Out); - grad_node->SetTensorWrapperDropout1Out(Dropout1Out); - grad_node->SetTensorWrapperDropout2Out(Dropout2Out); + grad_node->SetTensorWrapper_X(X); + grad_node->SetTensorWrapper_Linear1Weight(Linear1Weight); + grad_node->SetTensorWrapper_Linear1Bias(Linear1Bias); + grad_node->SetTensorWrapper_Linear2Weight(Linear2Weight); + grad_node->SetTensorWrapper_Dropout1Mask(Dropout1Mask); + grad_node->SetTensorWrapper_Dropout2Mask(Dropout2Mask); + grad_node->SetTensorWrapper_Linear1Out(Linear1Out); + grad_node->SetTensorWrapper_Dropout1Out(Dropout1Out); + grad_node->SetTensorWrapper_Dropout2Out(Dropout2Out); grad_node->SetGradOutMeta(X, 0); grad_node->SetGradOutMeta(Linear1Weight, 3); @@ -339,24 +339,24 @@ fused_feedforward_dygraph_function( grad_node->SetGradOutMeta(Linear2Weight, 5); if (pre_layer_norm) { - grad_node->SetTensorWrapperLn1Scale(Ln1Scale); - grad_node->SetTensorWrapperLn1Bias(Ln1Bias); - grad_node->SetTensorWrapperLn1Out(Ln1Out); - grad_node->SetTensorWrapperLn1Mean(Ln1Mean); - grad_node->SetTensorWrapperLn1Variance(Ln1Variance); + grad_node->SetTensorWrapper_Ln1Scale(Ln1Scale); + grad_node->SetTensorWrapper_Ln1Bias(Ln1Bias); + grad_node->SetTensorWrapper_Ln1Out(Ln1Out); + grad_node->SetTensorWrapper_Ln1Mean(Ln1Mean); + grad_node->SetTensorWrapper_Ln1Variance(Ln1Variance); grad_node->SetGradOutMeta(Ln1Scale, 7); grad_node->SetGradOutMeta(Ln1Bias, 8); } else { - grad_node->SetTensorWrapperLn2Scale(Ln2Scale); + grad_node->SetTensorWrapper_Ln2Scale(Ln2Scale); grad_node->SetGradOutMeta(Ln2Scale, 9); - grad_node->SetTensorWrapperLn2Bias(Ln2Bias); + grad_node->SetTensorWrapper_Ln2Bias(Ln2Bias); grad_node->SetGradOutMeta(Ln2Bias, 10); - grad_node->SetTensorWrapperLn2Mean(Ln2Mean); - grad_node->SetTensorWrapperLn2Variance(Ln2Variance); + grad_node->SetTensorWrapper_Ln2Mean(Ln2Mean); + grad_node->SetTensorWrapper_Ln2Variance(Ln2Variance); } if (Linear2Bias.initialized()) { - grad_node->SetTensorWrapperLinear2Bias(Linear2Bias); + grad_node->SetTensorWrapper_Linear2Bias(Linear2Bias); grad_node->SetGradOutMeta(Linear2Bias, 6); } diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc index c42a099cef4b0..b9e2a52228bcb 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc @@ -324,28 +324,28 @@ fused_gate_attention_dygraph_function( grad_node->SetAttrMap(std::move(attrs)); grad_node->SetDefaultAttrMap(std::move(default_attrs)); - grad_node->SetTensorWrapperFMHAOut(FMHAOut); - grad_node->SetTensorWrapperQuery(Query); - grad_node->SetTensorWrapperSoftmaxOut(SoftmaxOut); - grad_node->SetTensorWrapperOutLinearBias(OutLinearBias); - grad_node->SetTensorWrapperOutLinearWeight(OutLinearWeight); + grad_node->SetTensorWrapper_FMHAOut(FMHAOut); + grad_node->SetTensorWrapper_Query(Query); + grad_node->SetTensorWrapper_SoftmaxOut(SoftmaxOut); + grad_node->SetTensorWrapper_OutLinearBias(OutLinearBias); + grad_node->SetTensorWrapper_OutLinearWeight(OutLinearWeight); grad_node->SetGradOutMeta(Query, 0); grad_node->SetGradOutMeta(OutLinearWeight, 10); grad_node->SetGradOutMeta(OutLinearBias, 11); if (merge_qkv) { - grad_node->SetTensorWrapperQKVTransposeOut(QKVTransposeOut); - grad_node->SetTensorWrapperQKVWeight(QKVWeight); + grad_node->SetTensorWrapper_QKVTransposeOut(QKVTransposeOut); + grad_node->SetTensorWrapper_QKVWeight(QKVWeight); grad_node->SetGradOutMeta(QKVWeight, 5); } else { - grad_node->SetTensorWrapperKey(Key); - grad_node->SetTensorWrapperQueryWeight(QueryWeight); - grad_node->SetTensorWrapperKeyWeight(KeyWeight); - grad_node->SetTensorWrapperValueWeight(ValueWeight); - grad_node->SetTensorWrapperQueryTransposeOut(QueryTransposeOut); - grad_node->SetTensorWrapperKeyTransposeOut(KeyTransposeOut); - grad_node->SetTensorWrapperValueTransposeOut(ValueTransposeOut); + grad_node->SetTensorWrapper_Key(Key); + grad_node->SetTensorWrapper_QueryWeight(QueryWeight); + grad_node->SetTensorWrapper_KeyWeight(KeyWeight); + grad_node->SetTensorWrapper_ValueWeight(ValueWeight); + grad_node->SetTensorWrapper_QueryTransposeOut(QueryTransposeOut); + grad_node->SetTensorWrapper_KeyTransposeOut(KeyTransposeOut); + grad_node->SetTensorWrapper_ValueTransposeOut(ValueTransposeOut); grad_node->SetGradOutMeta(Key, 1); grad_node->SetGradOutMeta(QueryWeight, 2); @@ -354,21 +354,21 @@ fused_gate_attention_dygraph_function( } if (has_gating) { - grad_node->SetTensorWrapperGateWeight(GateWeight); + grad_node->SetTensorWrapper_GateWeight(GateWeight); grad_node->SetGradOutMeta(GateWeight, 8); - grad_node->SetTensorWrapperGateBias(GateBias); + grad_node->SetTensorWrapper_GateBias(GateBias); grad_node->SetGradOutMeta(GateBias, 9); - grad_node->SetTensorWrapperGateOut(GateOut); + grad_node->SetTensorWrapper_GateOut(GateOut); } if (NonbatchedBias.initialized()) { - grad_node->SetTensorWrapperNonbatchedBias(NonbatchedBias); + grad_node->SetTensorWrapper_NonbatchedBias(NonbatchedBias); grad_node->SetGradOutMeta(NonbatchedBias, 6); } if (use_flash_attn) { - grad_node->SetTensorWrapperSoftmaxLse(SoftmaxLse); - grad_node->SetTensorWrapperSrcMask(SrcMask); + grad_node->SetTensorWrapper_SoftmaxLse(SoftmaxLse); + grad_node->SetTensorWrapper_SrcMask(SrcMask); grad_node->SetGradOutMeta(SrcMask, 7); } diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc index c4ae0840c294f..15c0fdfd0d1ff 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc @@ -111,8 +111,8 @@ paddle::Tensor fused_gemm_epilogue_dygraph_function( grad_node->SetDefaultAttrMap(std::move(default_attrs)); // Set Tensor Wrappers - grad_node->SetTensorWrapperX(X); - grad_node->SetTensorWrapperY(Y); + grad_node->SetTensorWrapper_X(X); + grad_node->SetTensorWrapper_Y(Y); grad_node->SetGradOutMeta(X, 0); grad_node->SetGradOutMeta(Y, 1); diff --git a/paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h b/paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h index 212f9d9f1da19..e8c80e635b155 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h +++ b/paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h @@ -90,65 +90,65 @@ class fused_gate_attentionGradNodeCompat : public egr::GradNodeBase { } // SetX, SetY, ... - void SetTensorWrapperFMHAOut(const paddle::Tensor& FMHAOut) { + void SetTensorWrapper_FMHAOut(const paddle::Tensor& FMHAOut) { FMHAOut_ = egr::TensorWrapper(FMHAOut, false); } - void SetTensorWrapperGateBias(const paddle::Tensor& GateBias) { + void SetTensorWrapper_GateBias(const paddle::Tensor& GateBias) { GateBias_ = egr::TensorWrapper(GateBias, false); } - void SetTensorWrapperGateOut(const paddle::Tensor& GateOut) { + void SetTensorWrapper_GateOut(const paddle::Tensor& GateOut) { GateOut_ = egr::TensorWrapper(GateOut, false); } - void SetTensorWrapperGateWeight(const paddle::Tensor& GateWeight) { + void SetTensorWrapper_GateWeight(const paddle::Tensor& GateWeight) { GateWeight_ = egr::TensorWrapper(GateWeight, false); } - void SetTensorWrapperNonbatchedBias(const paddle::Tensor& NonbatchedBias) { + void SetTensorWrapper_NonbatchedBias(const paddle::Tensor& NonbatchedBias) { NonbatchedBias_ = egr::TensorWrapper(NonbatchedBias, false); } - void SetTensorWrapperSrcMask(const paddle::Tensor& SrcMask) { + void SetTensorWrapper_SrcMask(const paddle::Tensor& SrcMask) { SrcMask_ = egr::TensorWrapper(SrcMask, false); } - void SetTensorWrapperOutLinearBias(const paddle::Tensor& OutLinearBias) { + void SetTensorWrapper_OutLinearBias(const paddle::Tensor& OutLinearBias) { OutLinearBias_ = egr::TensorWrapper(OutLinearBias, false); } - void SetTensorWrapperOutLinearWeight(const paddle::Tensor& OutLinearWeight) { + void SetTensorWrapper_OutLinearWeight(const paddle::Tensor& OutLinearWeight) { OutLinearWeight_ = egr::TensorWrapper(OutLinearWeight, false); } - void SetTensorWrapperQKVTransposeOut(const paddle::Tensor& QKVTransposeOut) { + void SetTensorWrapper_QKVTransposeOut(const paddle::Tensor& QKVTransposeOut) { QKVTransposeOut_ = egr::TensorWrapper(QKVTransposeOut, false); } - void SetTensorWrapperQKVWeight(const paddle::Tensor& QKVWeight) { + void SetTensorWrapper_QKVWeight(const paddle::Tensor& QKVWeight) { QKVWeight_ = egr::TensorWrapper(QKVWeight, false); } - void SetTensorWrapperQuery(const paddle::Tensor& Query) { + void SetTensorWrapper_Query(const paddle::Tensor& Query) { Query_ = egr::TensorWrapper(Query, false); } - void SetTensorWrapperSoftmaxOut(const paddle::Tensor& SoftmaxOut) { + void SetTensorWrapper_SoftmaxOut(const paddle::Tensor& SoftmaxOut) { SoftmaxOut_ = egr::TensorWrapper(SoftmaxOut, false); } - void SetTensorWrapperSoftmaxLse(const paddle::Tensor& SoftmaxLse) { + void SetTensorWrapper_SoftmaxLse(const paddle::Tensor& SoftmaxLse) { SoftmaxLse_ = egr::TensorWrapper(SoftmaxLse, false); } - void SetTensorWrapperKey(const paddle::Tensor& Key) { + void SetTensorWrapper_Key(const paddle::Tensor& Key) { Key_ = egr::TensorWrapper(Key, false); } - void SetTensorWrapperQueryWeight(const paddle::Tensor& QueryWeight) { + void SetTensorWrapper_QueryWeight(const paddle::Tensor& QueryWeight) { QueryWeight_ = egr::TensorWrapper(QueryWeight, false); } - void SetTensorWrapperKeyWeight(const paddle::Tensor& KeyWeight) { + void SetTensorWrapper_KeyWeight(const paddle::Tensor& KeyWeight) { KeyWeight_ = egr::TensorWrapper(KeyWeight, false); } - void SetTensorWrapperValueWeight(const paddle::Tensor& ValueWeight) { + void SetTensorWrapper_ValueWeight(const paddle::Tensor& ValueWeight) { ValueWeight_ = egr::TensorWrapper(ValueWeight, false); } - void SetTensorWrapperQueryTransposeOut( + void SetTensorWrapper_QueryTransposeOut( const paddle::Tensor& QueryTransposeOut) { QueryTransposeOut_ = egr::TensorWrapper(QueryTransposeOut, false); } - void SetTensorWrapperKeyTransposeOut(const paddle::Tensor& KeyTransposeOut) { + void SetTensorWrapper_KeyTransposeOut(const paddle::Tensor& KeyTransposeOut) { KeyTransposeOut_ = egr::TensorWrapper(KeyTransposeOut, false); } - void SetTensorWrapperValueTransposeOut( + void SetTensorWrapper_ValueTransposeOut( const paddle::Tensor& ValueTransposeOut) { ValueTransposeOut_ = egr::TensorWrapper(ValueTransposeOut, false); } @@ -240,63 +240,63 @@ class fused_feedforwardGradNodeCompat : public egr::GradNodeBase { } // SetX, SetY, ... - void SetTensorWrapperDropout1Mask(const paddle::Tensor& Dropout1Mask) { + void SetTensorWrapper_Dropout1Mask(const paddle::Tensor& Dropout1Mask) { Dropout1Mask_ = egr::TensorWrapper(Dropout1Mask, false); } - void SetTensorWrapperDropout1Out(const paddle::Tensor& Dropout1Out) { + void SetTensorWrapper_Dropout1Out(const paddle::Tensor& Dropout1Out) { Dropout1Out_ = egr::TensorWrapper(Dropout1Out, false); } - void SetTensorWrapperDropout2Mask(const paddle::Tensor& Dropout2Mask) { + void SetTensorWrapper_Dropout2Mask(const paddle::Tensor& Dropout2Mask) { Dropout2Mask_ = egr::TensorWrapper(Dropout2Mask, false); } - void SetTensorWrapperDropout2Out(const paddle::Tensor& Dropout2Out) { + void SetTensorWrapper_Dropout2Out(const paddle::Tensor& Dropout2Out) { auto pre_layer_norm = GetAttrWithDefault( attr_map_, default_attr_map_, "pre_layer_norm"); Dropout2Out_ = egr::TensorWrapper(Dropout2Out, pre_layer_norm); } - void SetTensorWrapperLinear1Bias(const paddle::Tensor& Linear1Bias) { + void SetTensorWrapper_Linear1Bias(const paddle::Tensor& Linear1Bias) { Linear1Bias_ = egr::TensorWrapper(Linear1Bias, false); } - void SetTensorWrapperLinear1Out(const paddle::Tensor& Linear1Out) { + void SetTensorWrapper_Linear1Out(const paddle::Tensor& Linear1Out) { Linear1Out_ = egr::TensorWrapper(Linear1Out, false); } - void SetTensorWrapperLinear1Weight(const paddle::Tensor& Linear1Weight) { + void SetTensorWrapper_Linear1Weight(const paddle::Tensor& Linear1Weight) { Linear1Weight_ = egr::TensorWrapper(Linear1Weight, false); } - void SetTensorWrapperLinear2Bias(const paddle::Tensor& Linear2Bias) { + void SetTensorWrapper_Linear2Bias(const paddle::Tensor& Linear2Bias) { Linear2Bias_ = egr::TensorWrapper(Linear2Bias, false); } - void SetTensorWrapperLinear2Weight(const paddle::Tensor& Linear2Weight) { + void SetTensorWrapper_Linear2Weight(const paddle::Tensor& Linear2Weight) { Linear2Weight_ = egr::TensorWrapper(Linear2Weight, false); } - void SetTensorWrapperLn2Bias(const paddle::Tensor& Ln2Bias) { + void SetTensorWrapper_Ln2Bias(const paddle::Tensor& Ln2Bias) { Ln2Bias_ = egr::TensorWrapper(Ln2Bias, false); } - void SetTensorWrapperLn2Mean(const paddle::Tensor& Ln2Mean) { + void SetTensorWrapper_Ln2Mean(const paddle::Tensor& Ln2Mean) { Ln2Mean_ = egr::TensorWrapper(Ln2Mean, false); } - void SetTensorWrapperLn2Scale(const paddle::Tensor& Ln2Scale) { + void SetTensorWrapper_Ln2Scale(const paddle::Tensor& Ln2Scale) { Ln2Scale_ = egr::TensorWrapper(Ln2Scale, false); } - void SetTensorWrapperLn2Variance(const paddle::Tensor& Ln2Variance) { + void SetTensorWrapper_Ln2Variance(const paddle::Tensor& Ln2Variance) { Ln2Variance_ = egr::TensorWrapper(Ln2Variance, false); } - void SetTensorWrapperX(const paddle::Tensor& X) { + void SetTensorWrapper_X(const paddle::Tensor& X) { X_ = egr::TensorWrapper(X, false); } - void SetTensorWrapperLn1Scale(const paddle::Tensor& Ln1Scale) { + void SetTensorWrapper_Ln1Scale(const paddle::Tensor& Ln1Scale) { Ln1Scale_ = egr::TensorWrapper(Ln1Scale, false); } - void SetTensorWrapperLn1Bias(const paddle::Tensor& Ln1Bias) { + void SetTensorWrapper_Ln1Bias(const paddle::Tensor& Ln1Bias) { Ln1Bias_ = egr::TensorWrapper(Ln1Bias, false); } - void SetTensorWrapperLn1Out(const paddle::Tensor& Ln1Out) { + void SetTensorWrapper_Ln1Out(const paddle::Tensor& Ln1Out) { Ln1Out_ = egr::TensorWrapper(Ln1Out, false); } - void SetTensorWrapperLn1Mean(const paddle::Tensor& Ln1Mean) { + void SetTensorWrapper_Ln1Mean(const paddle::Tensor& Ln1Mean) { Ln1Mean_ = egr::TensorWrapper(Ln1Mean, false); } - void SetTensorWrapperLn1Variance(const paddle::Tensor& Ln1Variance) { + void SetTensorWrapper_Ln1Variance(const paddle::Tensor& Ln1Variance) { Ln1Variance_ = egr::TensorWrapper(Ln1Variance, false); } // SetAttrMap @@ -393,90 +393,90 @@ class fused_attentionGradNodeCompat : public egr::GradNodeBase { } // SetX, SetY, ... - void SetTensorWrapperAttnDropoutMaskOut( + void SetTensorWrapper_AttnDropoutMaskOut( const paddle::Tensor& AttnDropoutMaskOut) { AttnDropoutMaskOut_ = egr::TensorWrapper(AttnDropoutMaskOut, false); } - void SetTensorWrapperAttnDropoutOut(const paddle::Tensor& AttnDropoutOut) { + void SetTensorWrapper_AttnDropoutOut(const paddle::Tensor& AttnDropoutOut) { AttnDropoutOut_ = egr::TensorWrapper(AttnDropoutOut, false); } - void SetTensorWrapperBiasDropoutResidualOut( + void SetTensorWrapper_BiasDropoutResidualOut( const paddle::Tensor& BiasDropoutResidualOut) { BiasDropoutResidualOut_ = egr::TensorWrapper(BiasDropoutResidualOut, false); } - void SetTensorWrapperDropoutMaskOut(const paddle::Tensor& DropoutMaskOut) { + void SetTensorWrapper_DropoutMaskOut(const paddle::Tensor& DropoutMaskOut) { DropoutMaskOut_ = egr::TensorWrapper(DropoutMaskOut, false); } - void SetTensorWrapperFMHAOut(const paddle::Tensor& FMHAOut) { + void SetTensorWrapper_FMHAOut(const paddle::Tensor& FMHAOut) { FMHAOut_ = egr::TensorWrapper(FMHAOut, false); } - void SetTensorWrapperLn2Bias(const paddle::Tensor& Ln2Bias) { + void SetTensorWrapper_Ln2Bias(const paddle::Tensor& Ln2Bias) { Ln2Bias_ = egr::TensorWrapper(Ln2Bias, false); } - void SetTensorWrapperLn2Mean(const paddle::Tensor& Ln2Mean) { + void SetTensorWrapper_Ln2Mean(const paddle::Tensor& Ln2Mean) { Ln2Mean_ = egr::TensorWrapper(Ln2Mean, false); } - void SetTensorWrapperLn2Scale(const paddle::Tensor& Ln2Scale) { + void SetTensorWrapper_Ln2Scale(const paddle::Tensor& Ln2Scale) { Ln2Scale_ = egr::TensorWrapper(Ln2Scale, false); } - void SetTensorWrapperLn2Variance(const paddle::Tensor& Ln2Variance) { + void SetTensorWrapper_Ln2Variance(const paddle::Tensor& Ln2Variance) { Ln2Variance_ = egr::TensorWrapper(Ln2Variance, false); } - void SetTensorWrapperOutLinearBias(const paddle::Tensor& OutLinearBias) { + void SetTensorWrapper_OutLinearBias(const paddle::Tensor& OutLinearBias) { OutLinearBias_ = egr::TensorWrapper(OutLinearBias, false); } - void SetTensorWrapperOutLinearOut(const paddle::Tensor& OutLinearOut) { + void SetTensorWrapper_OutLinearOut(const paddle::Tensor& OutLinearOut) { OutLinearOut_ = egr::TensorWrapper(OutLinearOut, true); } - void SetTensorWrapperOutLinearW(const paddle::Tensor& OutLinearW) { + void SetTensorWrapper_OutLinearW(const paddle::Tensor& OutLinearW) { OutLinearW_ = egr::TensorWrapper(OutLinearW, false); } - void SetTensorWrapperQKOut(const paddle::Tensor& QKOut) { + void SetTensorWrapper_QKOut(const paddle::Tensor& QKOut) { QKOut_ = egr::TensorWrapper(QKOut, true); } - void SetTensorWrapperQKTVOut(const paddle::Tensor& QKTVOut) { + void SetTensorWrapper_QKTVOut(const paddle::Tensor& QKTVOut) { QKTVOut_ = egr::TensorWrapper(QKTVOut, true); } - void SetTensorWrapperQKVBias(const paddle::Tensor& QKVBias) { + void SetTensorWrapper_QKVBias(const paddle::Tensor& QKVBias) { QKVBias_ = egr::TensorWrapper(QKVBias, false); } - void SetTensorWrapperQKVBiasOut(const paddle::Tensor& QKVBiasOut) { + void SetTensorWrapper_QKVBiasOut(const paddle::Tensor& QKVBiasOut) { QKVBiasOut_ = egr::TensorWrapper(QKVBiasOut, true); } - void SetTensorWrapperQKVOut(const paddle::Tensor& QKVOut) { + void SetTensorWrapper_QKVOut(const paddle::Tensor& QKVOut) { QKVOut_ = egr::TensorWrapper(QKVOut, true); } - void SetTensorWrapperQKVW(const paddle::Tensor& QKVW) { + void SetTensorWrapper_QKVW(const paddle::Tensor& QKVW) { QKVW_ = egr::TensorWrapper(QKVW, false); } - void SetTensorWrapperSoftmaxOut(const paddle::Tensor& SoftmaxOut) { + void SetTensorWrapper_SoftmaxOut(const paddle::Tensor& SoftmaxOut) { SoftmaxOut_ = egr::TensorWrapper(SoftmaxOut, false); } - void SetTensorWrapperSrcMask(const paddle::Tensor& SrcMask) { + void SetTensorWrapper_SrcMask(const paddle::Tensor& SrcMask) { SrcMask_ = egr::TensorWrapper(SrcMask, true); } - void SetTensorWrapperSrcMaskOut(const paddle::Tensor& SrcMaskOut) { + void SetTensorWrapper_SrcMaskOut(const paddle::Tensor& SrcMaskOut) { SrcMaskOut_ = egr::TensorWrapper(SrcMaskOut, false); } - void SetTensorWrapperTransposeOut2(const paddle::Tensor& TransposeOut2) { + void SetTensorWrapper_TransposeOut2(const paddle::Tensor& TransposeOut2) { TransposeOut2_ = egr::TensorWrapper(TransposeOut2, false); } - void SetTensorWrapperX(const paddle::Tensor& X) { + void SetTensorWrapper_X(const paddle::Tensor& X) { X_ = egr::TensorWrapper(X, false); } - void SetTensorWrapperLnScale(const paddle::Tensor& LnScale) { + void SetTensorWrapper_LnScale(const paddle::Tensor& LnScale) { LnScale_ = egr::TensorWrapper(LnScale, false); } - void SetTensorWrapperLnBias(const paddle::Tensor& LnBias) { + void SetTensorWrapper_LnBias(const paddle::Tensor& LnBias) { LnBias_ = egr::TensorWrapper(LnBias, false); } - void SetTensorWrapperLnOut(const paddle::Tensor& LnOut) { + void SetTensorWrapper_LnOut(const paddle::Tensor& LnOut) { LnOut_ = egr::TensorWrapper(LnOut, false); } - void SetTensorWrapperLnMean(const paddle::Tensor& LnMean) { + void SetTensorWrapper_LnMean(const paddle::Tensor& LnMean) { LnMean_ = egr::TensorWrapper(LnMean, false); } - void SetTensorWrapperLnVariance(const paddle::Tensor& LnVariance) { + void SetTensorWrapper_LnVariance(const paddle::Tensor& LnVariance) { LnVariance_ = egr::TensorWrapper(LnVariance, false); } @@ -563,10 +563,10 @@ class fused_gemm_epilogueGradNodeCompat : public egr::GradNodeBase { } // SetX, SetY, ... - void SetTensorWrapperX(const paddle::Tensor& X) { + void SetTensorWrapper_X(const paddle::Tensor& X) { X_ = egr::TensorWrapper(X, false); } - void SetTensorWrapperY(const paddle::Tensor& Y) { + void SetTensorWrapper_Y(const paddle::Tensor& Y) { Y_ = egr::TensorWrapper(Y, false); } @@ -640,32 +640,32 @@ class fused_bias_dropout_residual_layer_normGradNodeCompat } // SetX, SetY, ... - void SetTensorWrapperBias(const paddle::Tensor& Bias) { + void SetTensorWrapper_Bias(const paddle::Tensor& Bias) { Bias_ = egr::TensorWrapper(Bias, false); } - void SetTensorWrapperBiasDropoutResidualOut( + void SetTensorWrapper_BiasDropoutResidualOut( const paddle::Tensor& BiasDropoutResidualOut) { BiasDropoutResidualOut_ = egr::TensorWrapper(BiasDropoutResidualOut, false); } - void SetTensorWrapperDropoutMaskOut(const paddle::Tensor& DropoutMaskOut) { + void SetTensorWrapper_DropoutMaskOut(const paddle::Tensor& DropoutMaskOut) { DropoutMaskOut_ = egr::TensorWrapper(DropoutMaskOut, false); } - void SetTensorWrapperLnBias(const paddle::Tensor& LnBias) { + void SetTensorWrapper_LnBias(const paddle::Tensor& LnBias) { LnBias_ = egr::TensorWrapper(LnBias, false); } - void SetTensorWrapperLnMean(const paddle::Tensor& LnMean) { + void SetTensorWrapper_LnMean(const paddle::Tensor& LnMean) { LnMean_ = egr::TensorWrapper(LnMean, false); } - void SetTensorWrapperLnScale(const paddle::Tensor& LnScale) { + void SetTensorWrapper_LnScale(const paddle::Tensor& LnScale) { LnScale_ = egr::TensorWrapper(LnScale, false); } - void SetTensorWrapperLnVariance(const paddle::Tensor& LnVariance) { + void SetTensorWrapper_LnVariance(const paddle::Tensor& LnVariance) { LnVariance_ = egr::TensorWrapper(LnVariance, false); } - void SetTensorWrapperResidual(const paddle::Tensor& Residual) { + void SetTensorWrapper_Residual(const paddle::Tensor& Residual) { Residual_ = egr::TensorWrapper(Residual, false); } - void SetTensorWrapperX(const paddle::Tensor& X) { + void SetTensorWrapper_X(const paddle::Tensor& X) { X_ = egr::TensorWrapper(X, false); } diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index b9e04b3e318ac..66b4d05f68bf0 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1204,7 +1204,7 @@ static std::string GenerateGradNodeCreationContent( for (auto& kv : grad_ins_fwd_slotname_map) { const std::string& tensor_wrapper_name = kv.second; const char* SET_TENSOR_WRAPPER_TEMPLATE = - " grad_node->SetTensorWrapper%s(%s);\n"; + " grad_node->SetTensorWrapper_%s(%s);\n"; // Replace output directly with input in inplace op. if (!forward_inplace_map.empty() && forward_inplace_map.count(tensor_wrapper_name)) { @@ -2941,7 +2941,7 @@ static std::string GenerateGradNodeHeaderContents( CLEAR_TENSOR_WRAPPER_TEMPLATE, struct_tensor_wrapper_name); } const char* SET_TENSOR_WRAPPER_TEMPLATE = - " void SetTensorWrapper%s(%s) {\n %s\n }\n"; + " void SetTensorWrapper_%s(%s) {\n %s\n }\n"; set_tensor_wrappers_str += paddle::string::Sprintf(SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name, diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index e1ad1a0dc81b2..dad46949d70ea 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -135,12 +135,12 @@ def ParseArguments(): ###################### # Code Gen Templates # ###################### -SET_PLAIN_TENSOR_WRAPPER_TEMPLATE = """ void SetTensorWrapper{}(const paddle::Tensor& {}) {{ +SET_PLAIN_TENSOR_WRAPPER_TEMPLATE = """ void SetTensorWrapper_{}(const paddle::Tensor& {}) {{ {} = egr::TensorWrapper({}, {}); }} """ -SET_VECTOR_TENSOR_WRAPPER_TEMPLATE = """ void SetTensorWrapper{}(const std::vector& {}) {{ +SET_VECTOR_TENSOR_WRAPPER_TEMPLATE = """ void SetTensorWrapper_{}(const std::vector& {}) {{ for(const auto& eager_tensor : {}) {{ {}.emplace_back(egr::TensorWrapper(eager_tensor, {})); }}; @@ -161,7 +161,7 @@ def ParseArguments(): }} """ -SET_ATTR_METHOD_TEMPLATE = """ void SetAttribute{}({} {}) {{ +SET_ATTR_METHOD_TEMPLATE = """ void SetAttribute_{}({} {}) {{ {} = {}; }} """ @@ -1062,10 +1062,10 @@ def GenerateNodeCreationCodes(self, for_backward=False, is_inplaced=False): for name, _, default_val_attr, _ in backward_attrs_list: if name in forward_attrs_name_set: set_attributes = ( - f"{indent}grad_node->SetAttribute{name}({name});" + f"{indent}grad_node->SetAttribute_{name}({name});" ) else: - set_attributes = f"{indent}grad_node->SetAttribute{name}({default_val_attr});" + set_attributes = f"{indent}grad_node->SetAttribute_{name}({default_val_attr});" set_attributes_list.append(set_attributes) set_attributes_str = "\n".join(set_attributes_list) @@ -1089,7 +1089,7 @@ def GenerateNodeCreationCodes(self, for_backward=False, is_inplaced=False): if is_inplace_input: set_tensor_wrappers = """{indent}if({name}) { auto {name}_clone = paddle::experimental::assign({name}); - grad_node->SetTensorWrapper{name}(*{name}_clone);}""".format_map( + grad_node->SetTensorWrapper_{name}(*{name}_clone);}""".format_map( {"indent": indent, "name": name} ) else: @@ -1100,16 +1100,16 @@ def GenerateNodeCreationCodes(self, for_backward=False, is_inplaced=False): or (name in self.optional_inputs) ): if for_backward is False: - set_tensor_wrappers = f"{indent}if({name}) grad_node->SetTensorWrapper{name}(*{name});" + set_tensor_wrappers = f"{indent}if({name}) grad_node->SetTensorWrapper_{name}(*{name});" else: - set_tensor_wrappers = f"{indent}if({name}_optional) grad_node->SetTensorWrapper{name}(*{name}_optional);" + set_tensor_wrappers = f"{indent}if({name}_optional) grad_node->SetTensorWrapper_{name}(*{name}_optional);" else: need_pre_contiguous_set.add(name) - set_tensor_wrappers = f"{indent}if({name}) grad_node->SetTensorWrapper{name}(*{name}_tmp);" + set_tensor_wrappers = f"{indent}if({name}) grad_node->SetTensorWrapper_{name}(*{name}_tmp);" else: if is_inplace_input: - set_tensor_wrappers = f"{indent}auto {name}_clone = paddle::experimental::assign({name});\n{indent}grad_node->SetTensorWrapper{name}({name}_clone);" + set_tensor_wrappers = f"{indent}auto {name}_clone = paddle::experimental::assign({name});\n{indent}grad_node->SetTensorWrapper_{name}({name}_clone);" else: if ( (forward_api_name in strided_op_list) @@ -1117,10 +1117,10 @@ def GenerateNodeCreationCodes(self, for_backward=False, is_inplaced=False): or IsVectorTensorType(atype) or (name in self.optional_inputs) ): - set_tensor_wrappers = f"{indent}grad_node->SetTensorWrapper{name}({name});" + set_tensor_wrappers = f"{indent}grad_node->SetTensorWrapper_{name}({name});" else: need_pre_contiguous_set.add(name) - set_tensor_wrappers = f"{indent}grad_node->SetTensorWrapper{name}({name}_tmp);" + set_tensor_wrappers = f"{indent}grad_node->SetTensorWrapper_{name}({name}_tmp);" set_input_tensor_wrappers_list.append(set_tensor_wrappers) else: # Forwad's output as backward's input if num_fwd_outputs > 1: @@ -1130,7 +1130,7 @@ def GenerateNodeCreationCodes(self, for_backward=False, is_inplaced=False): ), AssertMessage(name, forward_outputs_position_map.keys()) set_tensor_wrappers = ( - f"{indent}grad_node->SetTensorWrapper{name}({name});" + f"{indent}grad_node->SetTensorWrapper_{name}({name});" ) set_output_tensor_wrappers_list.append(set_tensor_wrappers) set_input_tensor_wrappers_str = "\n".join( diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 6611d108adcf5..2094fef07a873 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -1783,13 +1783,13 @@ static PyObject* tensor__setitem_dygraph(TensorObject* self, grad_node = std::shared_ptr( new SetValueWithTensorGradNode(1, 2)); // NOLINT - grad_node->SetAttributestarts(slice_starts); - grad_node->SetAttributeends(slice_ends); - grad_node->SetAttributesteps(slice_strides); - grad_node->SetAttributeaxes(slice_axes); - grad_node->SetAttributedecrease_axes(decrease_axis); - grad_node->SetAttributenone_axes(none_axes); - grad_node->SetTensorWrappervalues(values_tmp); + grad_node->SetAttribute_starts(slice_starts); + grad_node->SetAttribute_ends(slice_ends); + grad_node->SetAttribute_steps(slice_strides); + grad_node->SetAttribute_axes(slice_axes); + grad_node->SetAttribute_decrease_axes(decrease_axis); + grad_node->SetAttribute_none_axes(none_axes); + grad_node->SetTensorWrapper_values(values_tmp); paddle::memory::LogDeviceMemoryStats( egr::Controller::Instance().GetExpectedPlace(),