Skip to content

Commit

Permalink
[HACKATHON 6th][CMake Optimization] fix copy elision
Browse files Browse the repository at this point in the history
  • Loading branch information
silverling committed Mar 8, 2024
1 parent e5b09ec commit db64b59
Show file tree
Hide file tree
Showing 39 changed files with 178 additions and 203 deletions.
8 changes: 4 additions & 4 deletions paddle/fluid/distributed/collective/reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -850,8 +850,8 @@ void EagerReducer::MarkVarReady(const size_t var_index,
auto dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor_impl);
if (!dense_tensor->meta().is_contiguous()) {
grad_tensor.set_impl(std::make_shared<phi::DenseTensor>(std::move(
paddle::experimental::Trans2Contiguous(*dense_tensor))));
grad_tensor.set_impl(std::make_shared<phi::DenseTensor>(
paddle::experimental::Trans2Contiguous(*dense_tensor)));
}
}

Expand Down Expand Up @@ -884,8 +884,8 @@ void EagerReducer::MarkVarReady(const size_t var_index,
auto dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor_impl);
if (!dense_tensor->meta().is_contiguous()) {
grad_tensor->set_impl(std::make_shared<phi::DenseTensor>(std::move(
paddle::experimental::Trans2Contiguous(*dense_tensor))));
grad_tensor->set_impl(std::make_shared<phi::DenseTensor>(
paddle::experimental::Trans2Contiguous(*dense_tensor)));
}
}

Expand Down
9 changes: 3 additions & 6 deletions paddle/fluid/distributed/ps/service/brpc_ps_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1634,8 +1634,7 @@ void BrpcPsClient::PushSparseTaskConsume() {

task_list.reserve(cur_merge_size + 1);

task_list.push_back(
std::move(std::shared_ptr<SparseAsyncTask>(async_task)));
task_list.push_back(std::shared_ptr<SparseAsyncTask>(async_task));

while (!task_queue->Empty() && merge_count < cur_merge_size) {
++merge_count;
Expand Down Expand Up @@ -1667,8 +1666,7 @@ void BrpcPsClient::PushSparseTaskConsume() {

for_each(task_list.begin() + 1,
task_list.end(),
[&request_kv_num, request_call_num, closure](
std::shared_ptr<SparseAsyncTask> &task) {
[closure](std::shared_ptr<SparseAsyncTask> &task) {
closure->add_timer(task->timer());
closure->add_promise(task->promise());
});
Expand Down Expand Up @@ -1978,8 +1976,7 @@ void BrpcPsClient::PushDenseTaskConsume() {
closure->add_timer(async_task->timer());
closure->add_promise(async_task->promise());
merge_status[merge_count] =
async_merge_dense_threads.enqueue([closure,
accessor,
async_merge_dense_threads.enqueue([accessor,
&total_send_data,
total_send_data_size,
async_task]() -> int {
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/distributed/ps/table/memory_dense_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ int32_t MemoryDenseTable::Save(const std::string &path,
os << " ";
os << values_[param_col_ids_[x]][y];
}
result_buffer_param.emplace_back(std::move(os.str()));
result_buffer_param.emplace_back(os.str());
}
} else {
std::ostringstream os;
Expand All @@ -368,7 +368,7 @@ int32_t MemoryDenseTable::Save(const std::string &path,
os << " ";
os << values_[param_col_ids_[x]][y];
}
result_buffer_param.emplace_back(std::move(os.str()));
result_buffer_param.emplace_back(os.str());
}
}

Expand Down
3 changes: 1 addition & 2 deletions paddle/fluid/eager/amp_auto_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ inline std::vector<paddle::Tensor> AmpAutoCasts(
paddle::framework::AttributeMap cast_attrs = {
{"in_dtype", paddle::framework::TransToProtoVarType(input.dtype())},
{"out_dtype", paddle::framework::TransToProtoVarType(dst_dtype)}};
inputs_casted.emplace_back(
std::move(cast_dygraph_function(input, cast_attrs)));
inputs_casted.emplace_back(cast_dygraph_function(input, cast_attrs));
} else {
inputs_casted.emplace_back(input);
}
Expand Down
6 changes: 2 additions & 4 deletions paddle/fluid/eager/api/utils/hook_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ int64_t RegisterGradientHookForTensor(
auto rank_info = EagerUtils::unsafe_autograd_meta(tensor)->OutRankInfo();

return grad_node->RegisterGradientHook(
rank_info.first,
rank_info.second,
std::move(std::make_shared<CppTensorHook>(hook)));
rank_info.first, rank_info.second, std::make_shared<CppTensorHook>(hook));
}

void RegisterReduceHookForTensor(const paddle::Tensor& tensor,
Expand All @@ -48,7 +46,7 @@ void RegisterReduceHookForTensor(const paddle::Tensor& tensor,
auto accumulation_grad_node =
std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node);
accumulation_grad_node->RegisterReduceHook(
std::move(std::make_shared<CppVoidHook>(hook)));
std::make_shared<CppVoidHook>(hook));
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Only can register reduce hook for leaf Tensor."));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1147,7 +1147,7 @@ def GenerateNodeCreationCodes(self, for_backward=False, is_inplaced=False):
for name, (ttype, pos) in forward_inputs_position_map.items():
if name in need_pre_contiguous_set:
pre_contiguous_list.append(
f"{indent}const auto& {name}_tmp = (require_any_grad && {name}.is_dense_tensor() && !std::dynamic_pointer_cast<phi::DenseTensor>({name}.impl())->meta().is_contiguous()) ? paddle::Tensor(std::make_shared<phi::DenseTensor>(std::move(paddle::experimental::Trans2Contiguous(*(std::dynamic_pointer_cast<phi::DenseTensor>({name}.impl()))))), {name}.mutable_autograd_meta()) : {name};"
f"{indent}const auto& {name}_tmp = (require_any_grad && {name}.is_dense_tensor() && !std::dynamic_pointer_cast<phi::DenseTensor>({name}.impl())->meta().is_contiguous()) ? paddle::Tensor(std::make_shared<phi::DenseTensor>(paddle::experimental::Trans2Contiguous(*(std::dynamic_pointer_cast<phi::DenseTensor>({name}.impl())))), {name}.mutable_autograd_meta()) : {name};"
)
self.inputs_call_list_tmp[pos] = (
self.inputs_call_list_tmp[pos] + '_tmp'
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/eager/custom_operator/custom_operator_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ RunCustomOpNode::operator()(paddle::small_vector<std::vector<paddle::Tensor>,
->meta()
.is_contiguous()) {
tensor.set_impl(std::make_shared<phi::DenseTensor>(
std::move(paddle::experimental::Trans2Contiguous(*(
std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl()))))));
paddle::experimental::Trans2Contiguous(*(
std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())))));
}
}

Expand Down
9 changes: 4 additions & 5 deletions paddle/fluid/eager/custom_operator/custom_operator_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,11 @@ static std::vector<std::vector<phi::DDim>> RunInferShapeFunc(
for (size_t i = 0; i < ctx.InputRange().size(); ++i) {
const auto& input_pair = ctx.InputRangeAt(i);
if (input_pair.first == input_pair.second - 1) {
input_shapes.emplace_back(
std::move(ctx.InputAt(input_pair.first).shape()));
input_shapes.emplace_back(ctx.InputAt(input_pair.first).shape());
} else {
std::vector<std::vector<int64_t>> shapes;
for (size_t j = input_pair.first; j < input_pair.second; j++) {
shapes.push_back(std::move(ctx.InputAt(j).shape()));
shapes.push_back(ctx.InputAt(j).shape());
}
vec_input_shapes.emplace_back(std::move(shapes));
}
Expand Down Expand Up @@ -800,8 +799,8 @@ void run_custom_op_impl(const paddle::OpMetaInfo& op_info,
->meta()
.is_contiguous()) {
tensor.set_impl(std::make_shared<phi::DenseTensor>(
std::move(paddle::experimental::Trans2Contiguous(
*(std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl()))))));
paddle::experimental::Trans2Contiguous(
*(std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())))));
}
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/general_grad.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ class GeneralGrad {
target_node->RegisterGradientHook(
rank_info.first,
rank_info.second,
std::move(std::make_shared<egr::CppTensorHook>(hook)));
std::make_shared<egr::CppTensorHook>(hook));
return tmp;
}

Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/eager/to_static/run_program_op_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ static std::vector<paddle::Tensor> Trans2ContiguousTensors(
.is_contiguous()) {
res.emplace_back(
std::make_shared<phi::DenseTensor>(
std::move(paddle::experimental::Trans2Contiguous(
*(std::dynamic_pointer_cast<phi::DenseTensor>(t.impl()))))),
paddle::experimental::Trans2Contiguous(
*(std::dynamic_pointer_cast<phi::DenseTensor>(t.impl())))),
t.mutable_autograd_meta());
} else {
res.emplace_back(t);
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/framework/custom_operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ static void RunKernelFunc(
in_name));
VLOG(3) << "Custom Operator: KernelFunc's input " << in_name
<< " is optional dtype with None input";
kernel_ctx.EmplaceBackInput(std::move(paddle::Tensor()));
kernel_ctx.EmplaceBackInput(paddle::Tensor());
}
}
}
Expand Down Expand Up @@ -215,7 +215,7 @@ static void RunKernelFunc(
VLOG(3) << "Custom Operator: InferDtype - inplace optional outputs : "
<< out_name << " is None.";
true_out_ptrs.emplace_back(nullptr);
kernel_ctx.EmplaceBackOutput(std::move(paddle::Tensor()));
kernel_ctx.EmplaceBackOutput(paddle::Tensor());
continue;
}
// general/inplace vector<Tensor> outputs
Expand Down Expand Up @@ -252,7 +252,7 @@ static void RunKernelFunc(
VLOG(3) << "Custom Operator: InferDtype - inplace optional outputs : "
<< out_name << " is None.";
true_out_ptrs.emplace_back(nullptr);
kernel_ctx.EmplaceBackOutput(std::move(paddle::Tensor()));
kernel_ctx.EmplaceBackOutput(paddle::Tensor());
continue;
}
// general/inplace Tensor outputs
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/framework/heter_section_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ void HeterSectionWorker::Initialize(const TrainerDesc& desc) {

if (is_first_stage) { // NOLINT
for (auto& op_desc : program_->Block(0).AllOps()) {
auto op = std::move(OpRegistry::CreateOp(*op_desc));
auto op = OpRegistry::CreateOp(*op_desc);
auto op_type = op->Type();
if (listen_op_ == nullptr && op_type == "heter_listen_and_serv") {
listen_op_ = std::move(op);
Expand All @@ -142,11 +142,11 @@ void HeterSectionWorker::Initialize(const TrainerDesc& desc) {
} else if (is_last_stage) {
for (auto& op_desc : program_->Block(0).AllOps()) {
if (listen_op_ == nullptr) {
listen_op_ = std::move(OpRegistry::CreateOp(*op_desc));
listen_op_ = OpRegistry::CreateOp(*op_desc);
}
}
for (auto& op_desc : program_->Block(1).AllOps()) {
auto op = std::move(OpRegistry::CreateOp(*op_desc));
auto op = OpRegistry::CreateOp(*op_desc);
int op_role = op->Attr<int>(std::string("op_role"));
bool is_forward_op = (op_role == static_cast<int>(OpRole::kForward)) ||
(op_role == (static_cast<int>(OpRole::kForward) |
Expand All @@ -161,7 +161,7 @@ void HeterSectionWorker::Initialize(const TrainerDesc& desc) {
} else {
for (auto& op_desc : program_->Block(0).AllOps()) {
if (listen_op_ == nullptr) {
listen_op_ = std::move(OpRegistry::CreateOp(*op_desc));
listen_op_ = OpRegistry::CreateOp(*op_desc);
}
}
for (auto& op_desc : program_->Block(1).AllOps()) {
Expand Down
41 changes: 18 additions & 23 deletions paddle/fluid/framework/infershape_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -559,25 +559,23 @@ CompatInferMetaContext BuildInferMetaContext(InferShapeContext* ctx,

for (auto& in_name : input_names) {
if (ctx->HasInputs(in_name)) {
auto input_var = std::move(ctx->GetInputVarPtrs(in_name));
auto input_var = ctx->GetInputVarPtrs(in_name);
if (input_var.size() == 1) {
infer_meta_context.EmplaceBackInput(
std::move(CompatMetaTensor(input_var[0], ctx->IsRuntime())));
CompatMetaTensor(input_var[0], ctx->IsRuntime()));
} else {
paddle::small_vector<CompatMetaTensor, phi::kInputSmallVectorSize>
inputs;
for (const auto& in : input_var) {
inputs.emplace_back(
std::move(CompatMetaTensor(in, ctx->IsRuntime())));
inputs.emplace_back(CompatMetaTensor(in, ctx->IsRuntime()));
}
infer_meta_context.EmplaceBackInputs(std::move(inputs));
}
} else {
// Note: Because the input of InferMetaFn is const MetaTensor&,
// so when we prepare input MetaTensor by InferMetaContext->InputAt(),
// we need to return a const reference of empty MetaTensor
infer_meta_context.EmplaceBackInput(
std::move(CompatMetaTensor(ctx->IsRuntime())));
infer_meta_context.EmplaceBackInput(CompatMetaTensor(ctx->IsRuntime()));
}
}

Expand Down Expand Up @@ -631,7 +629,7 @@ CompatInferMetaContext BuildInferMetaContext(InferShapeContext* ctx,
attr_name));
}
} else if (ctx->HasInput(attr_name)) {
auto infershape_input = std::move(ctx->GetInputVarPtrs(attr_name));
auto infershape_input = ctx->GetInputVarPtrs(attr_name);
if (infershape_input.size() == 1) {
if (ctx->IsRuntime()) {
Variable* var = PADDLE_GET_CONST(Variable*, infershape_input[0]);
Expand Down Expand Up @@ -659,12 +657,12 @@ CompatInferMetaContext BuildInferMetaContext(InferShapeContext* ctx,
auto& attr = *attr_ptr;
switch (AttrTypeID(attr)) {
case framework::proto::AttrType::INTS: // NOLINT
infer_meta_context.EmplaceBackAttr(std::move(
phi::IntArray(PADDLE_GET_CONST(std::vector<int32_t>, attr))));
infer_meta_context.EmplaceBackAttr(
phi::IntArray(PADDLE_GET_CONST(std::vector<int32_t>, attr)));
break;
case framework::proto::AttrType::LONGS:
infer_meta_context.EmplaceBackAttr(std::move(
phi::IntArray(PADDLE_GET_CONST(std::vector<int64_t>, attr))));
infer_meta_context.EmplaceBackAttr(
phi::IntArray(PADDLE_GET_CONST(std::vector<int64_t>, attr)));
break;
case framework::proto::AttrType::INT:
infer_meta_context.EmplaceBackAttr(
Expand All @@ -677,7 +675,7 @@ CompatInferMetaContext BuildInferMetaContext(InferShapeContext* ctx,
attr_name));
}
} else if (ctx->HasInputs(attr_name) || ctx->HasInput(attr_name)) {
auto infershape_inputs = std::move(ctx->GetInputVarPtrs(attr_name));
auto infershape_inputs = ctx->GetInputVarPtrs(attr_name);
if (ctx->IsRuntime()) {
// If is in runtime, we will get tensor's value for IntArray
// and push it into attrs
Expand All @@ -688,10 +686,10 @@ CompatInferMetaContext BuildInferMetaContext(InferShapeContext* ctx,
}
if (infershape_inputs.size() != 1) {
infer_meta_context.EmplaceBackAttr(
std::move(framework::MakePhiIntArrayFromVarList(vars)));
framework::MakePhiIntArrayFromVarList(vars));
} else {
infer_meta_context.EmplaceBackAttr(
std::move(framework::MakePhiIntArrayFromVar(*vars[0])));
framework::MakePhiIntArrayFromVar(*vars[0]));
}
} else {
// If is not in runtime, we will set default value(-1) for IntArray
Expand Down Expand Up @@ -868,32 +866,29 @@ CompatInferMetaContext BuildInferMetaContext(InferShapeContext* ctx,

for (auto& out_name : output_names) {
if (ctx->HasOutputs(out_name, true)) {
auto output_var = std::move(ctx->GetOutputVarPtrs(out_name));
auto output_var = ctx->GetOutputVarPtrs(out_name);
if (output_var.size() == 1) {
infer_meta_context.EmplaceBackOutput(
std::move(CompatMetaTensor(output_var[0], ctx->IsRuntime())));
CompatMetaTensor(output_var[0], ctx->IsRuntime()));
} else {
paddle::small_vector<CompatMetaTensor, phi::kOutputSmallVectorSize>
outputs;
for (const auto& out : output_var) {
if (ctx->IsRuntime()) {
if (PADDLE_GET_CONST(Variable*, out)) {
outputs.emplace_back(
std::move(CompatMetaTensor(out, ctx->IsRuntime())));
outputs.emplace_back(CompatMetaTensor(out, ctx->IsRuntime()));
continue;
}
} else if (PADDLE_GET_CONST(VarDesc*, out)) {
outputs.emplace_back(
std::move(CompatMetaTensor(out, ctx->IsRuntime())));
outputs.emplace_back(CompatMetaTensor(out, ctx->IsRuntime()));
continue;
}
outputs.emplace_back(std::move(CompatMetaTensor(ctx->IsRuntime())));
outputs.emplace_back(CompatMetaTensor(ctx->IsRuntime()));
}
infer_meta_context.EmplaceBackOutputs(std::move(outputs));
}
} else {
infer_meta_context.EmplaceBackOutput(
std::move(CompatMetaTensor(ctx->IsRuntime())));
infer_meta_context.EmplaceBackOutput(CompatMetaTensor(ctx->IsRuntime()));
}
}

Expand Down
10 changes: 4 additions & 6 deletions paddle/fluid/framework/ir/fusion_group/code_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,10 @@ std::string CodeGenerator::Generate(
std::string func_name,
const std::vector<OperationExpression>& expressions) {
// TODO(liuyiqun): Check whether all expressions are elementwise operations.
std::set<int> input_ids = std::move(DistilInputIds(expressions));
std::set<int> output_ids = std::move(DistilOutputIds(expressions));
std::set<int> intermediate_output_ids =
std::move(DistilIntermediateIds(expressions));
std::unordered_map<int, std::string> dtypes =
std::move(DistilDtypes(expressions));
std::set<int> input_ids = DistilInputIds(expressions);
std::set<int> output_ids = DistilOutputIds(expressions);
std::set<int> intermediate_output_ids = DistilIntermediateIds(expressions);
std::unordered_map<int, std::string> dtypes = DistilDtypes(expressions);
TemplateVariable template_var;
template_var.Add("func_name", func_name);
template_var.Add(
Expand Down
3 changes: 1 addition & 2 deletions paddle/fluid/framework/ir/graph_pattern_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -781,8 +781,7 @@ void GraphSafeRemoveNodes(
for (auto *node : nodes) {
if (saved_nodes != nullptr) {
// prevent unique_ptr node from being released
saved_nodes->insert(
std::move(graph->RemoveNode(const_cast<Node *>(node))));
saved_nodes->insert(graph->RemoveNode(const_cast<Node *>(node)));
} else {
graph->RemoveNode(const_cast<Node *>(node));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ void CustomKernelInstruction::BuildCustomContext(
input_name2id_map_[t] = input_index;
input_index++;
input_ptrs_.emplace_back(nullptr);
custom_kernel_ctx_.EmplaceBackInput(std::move(paddle::Tensor()));
custom_kernel_ctx_.EmplaceBackInput(paddle::Tensor());
}
VLOG(8) << "ctx->EmplaceBackInput : an optional input " << t;
continue;
Expand Down Expand Up @@ -281,7 +281,7 @@ void CustomKernelInstruction::BuildCustomContext(
VLOG(3) << "Custom Operator: BuildContext - inplace optional outputs : "
<< out_name << " is None.";
cache_out_ptrs_.emplace_back(nullptr);
custom_kernel_ctx_.EmplaceBackOutput(std::move(paddle::Tensor()));
custom_kernel_ctx_.EmplaceBackOutput(paddle::Tensor());

VLOG(8) << "ctx->EmplaceBackOutput : an optional output";
continue;
Expand Down
Loading

0 comments on commit db64b59

Please sign in to comment.