From eff17757dc976ebc3cc50e0544da1ee33db74942 Mon Sep 17 00:00:00 2001 From: co63oc Date: Fri, 24 May 2024 11:45:42 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90Hackathon=206th=20Fundable=20Projects?= =?UTF-8?q?=203=20No.300=E3=80=91recurrent=20(#64545)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddle/fluid/framework/CMakeLists.txt | 24 +- paddle/fluid/framework/executor.cc | 3 - paddle/fluid/framework/executor_gc_helper.cc | 3 - .../ir/memory_optimize_pass/CMakeLists.txt | 5 - .../eager_deletion_pass.cc | 5 - .../recurrent_op_eager_deletion_pass.cc | 114 --- .../recurrent_op_eager_deletion_pass.h | 43 - .../interpreter/interpreter_util.cc | 3 - paddle/fluid/operators/CMakeLists.txt | 8 +- .../operators/controlflow/CMakeLists.txt | 4 - .../controlflow/recurrent_op_helper.cc | 291 ------- .../controlflow/recurrent_op_helper.h | 59 -- paddle/fluid/operators/recurrent_op.cc | 801 ------------------ paddle/fluid/operators/recurrent_op.h | 260 ------ test/cpp/fluid/framework/CMakeLists.txt | 2 +- 15 files changed, 14 insertions(+), 1611 deletions(-) delete mode 100644 paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc delete mode 100644 paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.h delete mode 100644 paddle/fluid/operators/controlflow/recurrent_op_helper.cc delete mode 100644 paddle/fluid/operators/controlflow/recurrent_op_helper.h delete mode 100644 paddle/fluid/operators/recurrent_op.cc delete mode 100644 paddle/fluid/operators/recurrent_op.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 39e1a47d1d4ae..041339fe597c3 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -550,7 +550,6 @@ cc_library( executor_gc_helper SRCS executor_gc_helper.cc DEPS while_op_helper - recurrent_op_helper conditional_block_op_helper pylayer_op_helper scope @@ -585,7 +584,6 @@ if(WITH_DISTRIBUTE) device_worker_factory.cc data_set.cc DEPS fleet_wrapper - recurrent_op_helper op_registry device_context scope @@ -660,8 +658,7 @@ if(WITH_DISTRIBUTE) heter_section_worker.cc device_worker_factory.cc data_set.cc - DEPS recurrent_op_helper - op_registry + DEPS op_registry device_context scope framework_proto @@ -736,8 +733,7 @@ if(WITH_DISTRIBUTE) section_worker.cc device_worker_factory.cc data_set.cc - DEPS recurrent_op_helper - op_registry + DEPS op_registry device_context scope framework_proto @@ -796,8 +792,7 @@ elseif(WITH_PSLIB) section_worker.cc device_worker_factory.cc data_set.cc - DEPS recurrent_op_helper - op_registry + DEPS op_registry device_context scope framework_proto @@ -843,8 +838,7 @@ else() section_worker.cc device_worker_factory.cc data_set.cc - DEPS recurrent_op_helper - op_registry + DEPS op_registry device_context scope framework_proto @@ -867,8 +861,14 @@ else() endif() target_link_libraries( - executor while_op_helper executor_gc_helper recurrent_op_helper - conditional_block_op_helper pylayer_op_helper) + executor + while_op_helper + executor_gc_helper + static_prim_api + static_utils + get_expected_kernel_func + conditional_block_op_helper + pylayer_op_helper) cc_library( parallel_executor diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 0ee48f24e6cbc..bd4530f906fac 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -20,7 +20,6 @@ limitations under the License. */ #include "paddle/fluid/framework/trainer_desc.pb.h" #include "paddle/fluid/framework/trainer_factory.h" #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h" -#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h" #include "paddle/fluid/operators/controlflow/while_op_helper.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" @@ -54,8 +53,6 @@ void ExecutorPrepareContext::PrepareUnusedVars( prog_, static_cast(block_id_), ops_); operators::PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp( prog_, static_cast(block_id_), ops_); - operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( - prog_, static_cast(block_id_), ops_); } force_disable_gc_ = force_disable_gc; diff --git a/paddle/fluid/framework/executor_gc_helper.cc b/paddle/fluid/framework/executor_gc_helper.cc index 9866045fdd623..58728005e3268 100644 --- a/paddle/fluid/framework/executor_gc_helper.cc +++ b/paddle/fluid/framework/executor_gc_helper.cc @@ -25,7 +25,6 @@ #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h" #include "paddle/fluid/operators/controlflow/pylayer_op_helper.h" -#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h" #include "paddle/fluid/operators/controlflow/while_op_helper.h" #include "paddle/fluid/platform/enforce.h" @@ -271,8 +270,6 @@ GetEagerDeletionCleanVarsForPartial(const ProgramDesc &origin_program, program, 0, global_block_ops); operators::PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp( program, 0, global_block_ops); - operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( - program, 0, global_block_ops); } // find the skip vars on each block diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt index f5c4f9d419cae..94e59cc998ef2 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt +++ b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt @@ -14,10 +14,6 @@ cc_library( while_op_eager_deletion_pass SRCS while_op_eager_deletion_pass.cc DEPS while_op_helper graph_helper pass) -cc_library( - recurrent_op_eager_deletion_pass - SRCS recurrent_op_eager_deletion_pass.cc - DEPS recurrent_op_helper graph_helper pass) cc_library( reference_count_pass_helper SRCS reference_count_pass_helper.cc @@ -34,7 +30,6 @@ set(EAGER_DELETETION_PASS_DEPS conditional_block_op_eager_deletion_pass pylayer_op_eager_deletion_pass while_op_eager_deletion_pass - recurrent_op_eager_deletion_pass reference_count_pass_helper) cc_library( diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc index 1cb6fd4b4a8b4..0df4ebd7a7e0b 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc @@ -301,10 +301,6 @@ void EagerDeletionPass::ApplyImpl(ir::Graph *graph) const { auto while_op_eager_deletion_pass = ir::PassRegistry::Instance().Get("while_op_eager_deletion_pass"); while_op_eager_deletion_pass->Apply(graph); - - auto recurrent_op_eager_deletion_pass = - ir::PassRegistry::Instance().Get("recurrent_op_eager_deletion_pass"); - recurrent_op_eager_deletion_pass->Apply(graph); } } // namespace ir @@ -320,4 +316,3 @@ REGISTER_PASS(eager_deletion_pass, paddle::framework::ir::EagerDeletionPass) USE_PASS(conditional_block_op_eager_deletion_pass); USE_PASS(pylayer_op_eager_deletion_pass); USE_PASS(while_op_eager_deletion_pass); -USE_PASS(recurrent_op_eager_deletion_pass); diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc deleted file mode 100644 index 5431e62fe4220..0000000000000 --- a/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.h" - -#include -#include - -#include "paddle/fluid/framework/details/computation_op_handle.h" -#include "paddle/fluid/framework/details/multi_devices_helper.h" -#include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/utils/string/string_helper.h" - -namespace paddle { -namespace framework { -namespace ir { - -using paddle::operators::OpAndGradOpPair; -using paddle::operators::OpVariantSet; - -void RecurrentOpEagerDeletionPass::ApplyImpl(Graph *graph) const { - // Find all recurrent_op and recurrent_grad_op in graph - // Note the graph only contains ops and block 0 - std::unordered_map target_ops = - DeviceIdToRecurrentAndRecurrentGradOp(*graph); - - if (graph->IsConstructedByPartialProgram()) { - PADDLE_ENFORCE_LE(target_ops.size(), - 1, - platform::errors::InvalidArgument( - "Unsupported multi devices if graph is constructed " - "with partial program.")); - size_t scope_idx = 0; - auto &recur_ops = target_ops[scope_idx].first; - auto &recur_grad_ops = target_ops[scope_idx].second; - - auto all_ops = graph->OriginProgram().Block(0).AllOps(); - if (recur_ops.empty()) { - operators::AppendOpVariantByOpName( - all_ops, std::string("recurrent"), &recur_ops); - } else if (recur_grad_ops.empty()) { - operators::AppendOpVariantByOpName( - all_ops, std::string("recurrent_grad"), &recur_grad_ops); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "One of recur_ops or recur_grad_ops should be empty.")); - } - } - - for (auto &entry : target_ops) { - // Prepare safe eager deletion on different devices because the garbage - // collection may be different across devices - OpAndGradOpPair &op_pair = entry.second; - PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( - graph->OriginProgram(), &op_pair); - } - - auto all_ops = ir::FilterByNodeWrapper(*graph); - for (auto op_hander : all_ops) { - auto *compute_op = dynamic_cast(op_hander); - if (compute_op == nullptr) continue; - if (compute_op->Name() == "recurrent" || - compute_op->Name() == "recurrent_grad") { - ir::Node *op_node = op_hander->Node(); - auto *op_base = compute_op->GetOp(); - if (op_base->Attrs().count("skip_eager_deletion_vars")) { - op_node->Op()->SetAttr("skip_eager_deletion_vars", - op_base->Attrs().at("skip_eager_deletion_vars")); - } - } - } -} - -// Returns a std::unordered_map mapping from the device id to recurrent op and -// grad op pair -std::unordered_map -RecurrentOpEagerDeletionPass::DeviceIdToRecurrentAndRecurrentGradOp( - const Graph &graph) const { - std::unordered_map ret; - std::vector all_ops = - FilterByNodeWrapper(graph); - - for (auto *op : all_ops) { - auto compute_op = dynamic_cast(op); - if (compute_op == nullptr) continue; - - if (compute_op->Name() == "recurrent") { - // GetScopeIdx() returns device/place id - ret[compute_op->GetScopeIdx()].first.emplace(compute_op->GetOp()); - } else if (compute_op->Name() == "recurrent_grad") { - // GetScopeIdx() returns device/place id - ret[compute_op->GetScopeIdx()].second.emplace(compute_op->GetOp()); - } - } - return ret; -} - -} // namespace ir -} // namespace framework -} // namespace paddle - -REGISTER_PASS(recurrent_op_eager_deletion_pass, - paddle::framework::ir::RecurrentOpEagerDeletionPass); diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.h b/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.h deleted file mode 100644 index 9c39a9faf23ae..0000000000000 --- a/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#include "paddle/fluid/framework/details/computation_op_handle.h" -#include "paddle/fluid/framework/details/multi_devices_helper.h" -#include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/operators/controlflow/op_variant.h" -#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h" - -namespace paddle { -namespace framework { -namespace ir { - -// Pass class set skip eager deletion vars for recurrent ops -class RecurrentOpEagerDeletionPass : public Pass { - protected: - void ApplyImpl(Graph *graph) const override; - - private: - // Returns a std::unordered_map mapping from the device id to recurrent op and - // grad op pair - std::unordered_map - DeviceIdToRecurrentAndRecurrentGradOp(const Graph &graph) const; -}; - -} // namespace ir -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 5c1d872078a42..52516d69794c8 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -29,7 +29,6 @@ #include "paddle/fluid/memory/stats.h" #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h" #include "paddle/fluid/operators/controlflow/pylayer_op_helper.h" -#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h" #include "paddle/fluid/operators/controlflow/while_op_helper.h" #include "paddle/fluid/operators/ops_extra_info.h" #include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h" @@ -604,8 +603,6 @@ void BuildOpFuncList(const platform::Place& place, main_program, block.ID(), ops_unique); operators::PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp( main_program, block.ID(), ops_unique); - operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( - main_program, block.ID(), ops_unique); } #ifdef PADDLE_WITH_DNNL diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 6fbc11df5cd3c..4714f3a2eb446 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -83,7 +83,7 @@ endif() set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi common phi_utils static_prim_api get_expected_kernel_func) register_operators(EXCLUDES py_func_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op - recurrent_op save_combine_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} processgroup_comm_utils) + save_combine_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} processgroup_comm_utils) op_library(generated_op UNITY SRCS generated_op1.cc generated_op2.cc generated_op3.cc generated_op4.cc DEPS ${OP_HEADER_DEPS}) op_library(run_program_op DEPS executor_cache ${OP_HEADER_DEPS}) @@ -109,12 +109,6 @@ if (WITH_GPU OR WITH_ROCM) endif() op_library(lstm_op DEPS ${OP_HEADER_DEPS}) -if (WITH_ROCM) - op_library(recurrent_op DEPS executor ${OP_HEADER_DEPS}) -else() - op_library(recurrent_op DEPS ${OP_HEADER_DEPS}) -endif() - set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) diff --git a/paddle/fluid/operators/controlflow/CMakeLists.txt b/paddle/fluid/operators/controlflow/CMakeLists.txt index ca32f31df1ff7..811542cbdf373 100644 --- a/paddle/fluid/operators/controlflow/CMakeLists.txt +++ b/paddle/fluid/operators/controlflow/CMakeLists.txt @@ -25,10 +25,6 @@ cc_library( pylayer_op_helper SRCS pylayer_op_helper.cc DEPS op_variant operator pylayer_op) -cc_library( - recurrent_op_helper - SRCS recurrent_op_helper.cc - DEPS recurrent_op op_variant operator) cc_library( while_op_helper SRCS while_op_helper.cc diff --git a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc b/paddle/fluid/operators/controlflow/recurrent_op_helper.cc deleted file mode 100644 index e290fa3e016bd..0000000000000 --- a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc +++ /dev/null @@ -1,291 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h" - -#include - -namespace paddle { -namespace framework { -class BlockDesc; -class ProgramDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { - -static bool IsMatchedRecurrentOpAndRecurrentGradOp(const OpVariant &fwd_op, - const OpVariant &grad_op) { - return fwd_op.Inputs().at(RecurrentBase::kInputs) == - grad_op.Inputs().at(RecurrentBase::kInputs) && - fwd_op.Outputs().at(RecurrentBase::kOutputs) == - grad_op.Inputs().at(RecurrentBase::kOutputs); -} - -// Returns whether the variable is skippable in forward recurrent op -// The variable is skippable in recurrent_op when the variable used in -// recurrent_grad is not from grad_block. -static bool IsSkippableVar(const std::string &name, - framework::BlockDesc *grad_block) { - return name != framework::kEmptyVarName && !grad_block->HasVar(name); -} - -static void ClearSkipVars(const OpVariant &op) { - auto &attrs = const_cast(op.Attrs()); - std::vector &attr_skip_vars = PADDLE_GET( - std::vector, attrs[RecurrentBase::kSkipEagerDeletionVars]); - attr_skip_vars.clear(); -} - -// Add skip vars into op's attribute -template -static void AddSkipVars(const OpVariant &op, const Container &skip_vars) { - auto &attrs = const_cast(op.Attrs()); - VLOG(2) << "Prepare to add " << skip_vars.size() - << " skip var(s): " << paddle::string::join_strings(skip_vars, ' '); - std::vector &attr_skip_vars = PADDLE_GET( - std::vector, attrs[RecurrentBase::kSkipEagerDeletionVars]); - attr_skip_vars.insert( - attr_skip_vars.end(), skip_vars.cbegin(), skip_vars.cend()); -} - -// Find all ops and grad ops with given type name. The ops and grad ops -// may locate in different blocks so we should traverse all blocks in the -// program and find them out -static void FindAllOpAndGradOp(const framework::ProgramDesc &program, - OpAndGradOpPair *op_and_grad_op, - const std::string &type_name, - const std::string &backward_type_name) { - OpVariantSet &ops = op_and_grad_op->first; - OpVariantSet &grad_ops = op_and_grad_op->second; - - PADDLE_ENFORCE_GE( - ops.size(), - grad_ops.size(), - phi::errors::InvalidArgument( - "There are more grad ops than forward ops in the graph or program, " - "the number of ops is %d and the number of grad_ops is %d.", - ops.size(), - grad_ops.size())); - - for (size_t i = 1; i < program.Size(); ++i) { - auto &block = program.Block(i); - for (size_t j = 0; j < block.OpSize(); ++j) { - auto *op = block.Op(static_cast(j)); - if (op->Type() == type_name) { - ops.emplace(op); - } else if (op->Type() == backward_type_name) { - grad_ops.emplace(op); - } - } - } - - PADDLE_ENFORCE_GE( - ops.size(), - grad_ops.size(), - phi::errors::InvalidArgument( - "There are more grad ops than forward ops in the graph or program, " - "the number of ops is %d and the number of grad_ops is %d.", - ops.size(), - grad_ops.size())); -} - -// Returns GradVarName of input var names -static std::vector GradVarLists( - const std::vector &var_names) { - std::vector retv; - retv.reserve(var_names.size()); - std::transform(var_names.begin(), - var_names.end(), - std::back_inserter(retv), - framework::GradVarName); - return retv; -} - -// Add memory vars in recurrent op as skip vars. -static void AddOpMemVarsAsSkip(const OpVariant &op, bool set_grad_mem_vars) { - bool has_state = op.Attr(RecurrentBase::kHasStates); - if (has_state) { - std::unordered_set skip_vars; - - auto &mem_vars = op.Attr>(RecurrentBase::kStates); - skip_vars.insert(mem_vars.begin(), mem_vars.end()); - - auto &pre_mem_vars = - op.Attr>(RecurrentBase::kExStates); - skip_vars.insert(pre_mem_vars.begin(), pre_mem_vars.end()); - - if (set_grad_mem_vars) { - auto mem_grad_vars = GradVarLists(mem_vars); - skip_vars.insert(mem_grad_vars.begin(), mem_grad_vars.end()); - auto pre_mem_grad_vars = GradVarLists(pre_mem_vars); - skip_vars.insert(pre_mem_grad_vars.begin(), pre_mem_grad_vars.end()); - } - AddSkipVars(op, skip_vars); - } -} - -// Set outputs and memory vars of the input forward op as skip vars -static void SetRecurrentForwardOpOnlySkipVarAttr(const OpVariant &fwd_op) { - ClearSkipVars(fwd_op); - - AddOpMemVarsAsSkip(fwd_op, /* set_grad_mem_vars = */ false); - auto &output_vars = fwd_op.Outputs().at(RecurrentBase::kOutputs); - AddSkipVars(fwd_op, output_vars); -} - -// Set skip vars of matched recurrent op and recurrent_grad op -static void SetRecurrentOpAndRecurrentGradOpSkipVarAttr( - const OpVariant &fwd_op, const OpVariant &bwd_op) { - // Find all skippable variables in forward recurrent_op - ClearSkipVars(fwd_op); - AddOpMemVarsAsSkip(fwd_op, /* set_grad_mem_vars = */ false); - - auto *grad_block = - bwd_op.Attr(RecurrentBase::kStepBlock); - std::unordered_set fwd_skip_vars; - for (auto *op_desc : grad_block->AllOps()) { - for (auto &in_arg_name : op_desc->InputArgumentNames()) { - if (IsSkippableVar(in_arg_name, grad_block)) { - fwd_skip_vars.insert(in_arg_name); - } - } - for (auto &out_arg_name : op_desc->OutputArgumentNames()) { - if (IsSkippableVar(out_arg_name, grad_block)) { - fwd_skip_vars.insert(out_arg_name); - } - } - } - AddSkipVars(fwd_op, fwd_skip_vars); - - // Find all skippable variables in recurrent_grad_op - // The skippable variables are those which would be used across time steps - ClearSkipVars(bwd_op); - AddOpMemVarsAsSkip(bwd_op, /* set_grad_mem_vars = */ true); - std::unordered_set bwd_skip_vars; - - auto &fwd_input = fwd_op.Inputs().at(RecurrentBase::kInputs); - auto &in_grads = - bwd_op.Outputs().at(framework::GradVarName(RecurrentBase::kInputs)); - - PADDLE_ENFORCE_EQ( - fwd_input.size(), - in_grads.size(), - phi::errors::PreconditionNotMet( - "Backward input gradient number does not match forward " - "input number. The number of forward input number is %d and the " - "number of backward input gradient number is %d.", - fwd_input.size(), - in_grads.size())); - for (size_t i = 0; i < in_grads.size(); ++i) { - if (in_grads[i] == framework::kEmptyVarName) { - continue; - } - bwd_skip_vars.insert(in_grads[i]); - bwd_skip_vars.insert(framework::GradVarName(fwd_input[i])); - } - - auto &fwd_param = fwd_op.Inputs().at(RecurrentBase::kParameters); - auto ¶m_grads = - bwd_op.Outputs().at(framework::GradVarName(RecurrentBase::kParameters)); - PADDLE_ENFORCE_EQ( - fwd_param.size(), - param_grads.size(), - phi::errors::PreconditionNotMet( - "Backward parameter gradient number does not match " - "forward parameter number. The number of forward parameter number is " - "%d and the number of backward parameter gradient is %d.", - fwd_param.size(), - param_grads.size())); - for (size_t i = 0; i < fwd_param.size(); ++i) { - if (param_grads[i] == framework::kEmptyVarName) { - continue; - } - bwd_skip_vars.insert(param_grads[i]); - bwd_skip_vars.insert(framework::GradVarName(fwd_param[i])); - } - - AddSkipVars(bwd_op, bwd_skip_vars); -} - -void PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( - const framework::ProgramDesc &program, - int block_id, - const std::vector> - &all_ops) { - // If block_id is not 0, returns - // This is because all recurrent_ops and recurrent_grad_ops in the whole - // program would be processed when block_id is 0 (i.e. when Executor::Run() - // or ParallelExecutor constructs). - - // What's more, all recurrent_ops and recurrent_grad_ops must be processed - // when block_id is zero. If not, recurrent_op may run first and erase - // variables - // used in recurrent_grad_op, and in this moment, recurrent_grad_ops may be - // not constructed yet. - if (block_id != 0) return; - - OpAndGradOpPair op_pair; - for (auto &op : all_ops) { - if (op->Type() == "recurrent") { - op_pair.first.emplace(op.get()); - } else if (op->Type() == "recurrent_grad") { - op_pair.second.emplace(op.get()); - } - } - PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(program, &op_pair); -} - -void PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( - const framework::ProgramDesc &program, OpAndGradOpPair *op_pair) { - // Find all ops and grad ops at all blocks - FindAllOpAndGradOp(program, op_pair, "recurrent", "recurrent_grad"); - - OpVariantSet &recurrent_ops = op_pair->first; - OpVariantSet &recurrent_grad_ops = op_pair->second; - - VLOG(2) << "Found recurrent op num: " << recurrent_ops.size() - << ", recurrent grad op num: " << recurrent_grad_ops.size(); - - if (recurrent_ops.empty()) { - return; - } - - for (auto &bwd_op : recurrent_grad_ops) { - const OpVariant *matched_fwd_op = nullptr; - for (auto &fwd_op : recurrent_ops) { - if (IsMatchedRecurrentOpAndRecurrentGradOp(fwd_op, bwd_op)) { - PADDLE_ENFORCE_EQ(matched_fwd_op, - nullptr, - phi::errors::PreconditionNotMet( - "Found multiple recurrent forward op matches " - "recurrent grad op.")); - matched_fwd_op = &fwd_op; - } - } - PADDLE_ENFORCE_NOT_NULL( - matched_fwd_op, - phi::errors::PreconditionNotMet("Cannot find matched forward op.")); - SetRecurrentOpAndRecurrentGradOpSkipVarAttr(*matched_fwd_op, bwd_op); - recurrent_ops.erase(*matched_fwd_op); - } - - for (auto &fwd_op : recurrent_ops) { - SetRecurrentForwardOpOnlySkipVarAttr(fwd_op); - } -} - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/controlflow/recurrent_op_helper.h b/paddle/fluid/operators/controlflow/recurrent_op_helper.h deleted file mode 100644 index 37573cc617643..0000000000000 --- a/paddle/fluid/operators/controlflow/recurrent_op_helper.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/controlflow/op_variant.h" -#include "paddle/fluid/operators/recurrent_op.h" - -#include "paddle/utils/string/string_helper.h" - -namespace paddle { -namespace framework { -class ProgramDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { - -using OpVariantSet = std::unordered_set; -using OpAndGradOpPair = std::pair; - -// Set vars to skip eager deletion on input recurrent and recurrent_grad for -// preparing safe eager deletion. Input contains all recurrent and -// recurrent_grad ops at block 0 and the function will find all recurrent and -// recurrent_grad ops across blocks. -void PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( - const framework::ProgramDesc &program, OpAndGradOpPair *op_pair); - -// Set vars to skip eager deletion on input recurrent and recurrent_grad for -// preparing safe eager deletion. The input block_id must be 0 and caller can -// input all ops in the block. The function will find all recurrent and -// recurrent_grad ops across blocks. -void PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( - const framework::ProgramDesc &program, - int block_id, - const std::vector> - &all_ops); - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc deleted file mode 100644 index 21443fc51d568..0000000000000 --- a/paddle/fluid/operators/recurrent_op.cc +++ /dev/null @@ -1,801 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/recurrent_op.h" - -namespace phi { -class DenseTensor; -} // namespace phi - -namespace paddle { -namespace framework { -class InferShapeContext; -class OpDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { - -using StepScopeVar = std::vector; - -const char RecurrentBase::kInputs[] = "inputs"; // NOLINT -const char RecurrentBase::kInitialStates[] = "initial_states"; // NOLINT -const char RecurrentBase::kParameters[] = "parameters"; // NOLINT -const char RecurrentBase::kOutputs[] = "outputs"; // NOLINT -const char RecurrentBase::kStepScopes[] = "step_scopes"; // NOLINT -const char RecurrentBase::kHasStates[] = "has_states"; // NOLINT -const char RecurrentBase::kExStates[] = "ex_states"; // NOLINT -const char RecurrentBase::kStates[] = "states"; // NOLINT -const char RecurrentBase::kStepBlock[] = "sub_block"; // NOLINT -const char RecurrentBase::kReverse[] = "reverse"; // NOLINT -const char RecurrentBase::kIsTrain[] = "is_train"; // NOLINT -const char RecurrentBase::kSkipEagerDeletionVars[] = // NOLINT - "skip_eager_deletion_vars"; -#define GRAD_SUFFIX "@GRAD" -const char RecurrentBase::kInputGrads[] = "inputs" GRAD_SUFFIX; // NOLINT -const char RecurrentBase::kOutputGrads[] = "outputs" GRAD_SUFFIX; // NOLINT -const char RecurrentBase::kParamGrads[] = "parameters" GRAD_SUFFIX; // NOLINT -const char RecurrentBase::kInitStateGrads[] = // NOLINT - "initial_states" GRAD_SUFFIX; - -static void ClearStepScopes(const platform::DeviceContext &dev_ctx, - framework::Scope *parent_scope, - StepScopeVar *step_scopes) { - if (step_scopes->empty()) return; - - dev_ctx.Wait(); - - for (auto *sub_scope : *step_scopes) { - if (parent_scope->HasKid(sub_scope)) { - parent_scope->DeleteScope(sub_scope); - } - } - - step_scopes->clear(); -} - -StepScopes::StepScopes(const platform::DeviceContext &dev_ctx, - const framework::Scope &parent, - StepScopeVar *scopes, - bool is_train, - size_t seq_len, - bool is_backward) - : counter_(is_backward ? seq_len - 1 : 0UL), - scopes_(scopes), - is_train_(is_train), - is_backward_(is_backward) { - size_t num_step_scopes = is_train ? seq_len : 2; - PADDLE_ENFORCE_EQ( - is_train || !is_backward, - true, - phi::errors::PreconditionNotMet("Cannot backward when is not training")); - if (!is_backward_) { - ClearStepScopes(dev_ctx, const_cast(&parent), scopes); - scopes->reserve(static_cast(num_step_scopes)); - for (size_t i = 0; i < num_step_scopes; ++i) { - scopes->emplace_back(&parent.NewScope()); - } - } -} - -framework::Scope &StepScopes::CurScope() { return GetScope(counter_); } - -framework::Scope &StepScopes::ExScope() { - auto &scope = GetScope(is_backward_ ? counter_ + 1 : counter_ - 1); - return scope; -} - -void StepScopes::BackwardNext(const platform::DeviceContext &dev_ctx, - framework::Scope *parent_scope) { - PADDLE_ENFORCE_EQ(is_backward_, - true, - phi::errors::PreconditionNotMet( - "Cannot get backward next scope when is forward")); - if (counter_ + 2 == scopes_->size()) { - parent_scope->DeleteScope((*scopes_)[counter_ + 1]); - scopes_->pop_back(); - VLOG(3) << "Deleted scope at " << counter_ + 1; - } - --counter_; -} - -void StepScopes::ForwardNext() { - PADDLE_ENFORCE_EQ(is_backward_, - false, - phi::errors::PreconditionNotMet( - "Cannot get forward next scope when is backward")); - ++counter_; -} - -framework::Scope &StepScopes::GetScope(size_t scope_id) const { - if (!is_train_) { - scope_id %= 2; - } - PADDLE_ENFORCE_LT( - scope_id, - scopes_->size(), - phi::errors::InvalidArgument( - "Input scope_id is greater than scopes size in RecurrentOp")); - return *(*scopes_)[scope_id]; -} - -RecurrentBase::RecurrentBase(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - -// Get SequenceLength from Scope -// The sequence length is got from input tensor. The input tensor's -// dimension should be [SEQ_LEN, ..., ...]. The first of the tensor's shape -// is SEQ_LEN. The second of the tensor's shape could be the batch size or -// nested sequence length. -int64_t RecurrentBase::GetSequenceLength(const framework::Scope &scope) const { - // Dim format SEQ_LEN, BATCH_SIZE, ... - int64_t seq_len = -1; - auto &all_inputs = Inputs(kInputs); - PADDLE_ENFORCE_EQ( - all_inputs.empty(), - false, - phi::errors::InvalidArgument("RecurrentOp gets empty input")); - for (auto &iname : all_inputs) { - auto *var = scope.FindVar(iname); - PADDLE_ENFORCE_NOT_NULL(var, - phi::errors::InvalidArgument( - "RecurrentOp finds var %s is NULL", iname)); - PADDLE_ENFORCE_EQ( - var->IsType(), - true, - phi::errors::InvalidArgument( - "RecurrentOp only accepts phi::DenseTensor as input but " - "input var %s is not phi::DenseTensor", - iname)); - auto &dim = var->Get().dims(); - if (seq_len == -1) { - seq_len = dim[0]; - } else { - PADDLE_ENFORCE_EQ(seq_len, - dim[0], - phi::errors::InvalidArgument( - "Sequence length of input %s in RecurrentOp is NOT " - "equal to sequence length of previous input", - iname)); - } - } - PADDLE_ENFORCE_GE(seq_len, - 0, - phi::errors::InvalidArgument( - "RecurrentOp gets invalid sequence length. Expected " - "seq_len >= 0. Received seq_len = %d", - seq_len)); - return seq_len; -} - -// for src_tensor, dst_tensor in zip(map(src_scope.FindVar, src_vars), -// map(dst_scope.Var, dst_vars)): -// dst_tensor.ShareDataWith(src_tensor) -void RecurrentBase::LinkTensor(const framework::Scope &src_scope, - const std::vector &src_vars, - framework::Scope *dst_scope, - const std::vector &dst_vars) { - LinkTensorWithCallback( - src_scope, - src_vars, - dst_scope, - dst_vars, - [&](const phi::DenseTensor &src, phi::DenseTensor *dst) { - dst->ShareDataWith(src); - }); -} - -// (seq_len, shape) -> return [seq_len] + list(shape) -phi::DDim RecurrentBase::PrependDims(size_t seq_len, const phi::DDim &src) { - auto dims = common::vectorize(src); - dims.insert(dims.begin(), static_cast(seq_len)); - return common::make_ddim(dims); -} - -RecurrentOp::RecurrentOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : RecurrentBase(type, inputs, outputs, attrs) {} - -void RecurrentOp::RunImpl(const framework::Scope &scope, - const phi::Place &place) const { - bool has_state = Attr(kHasStates); - auto seq_len = static_cast(this->GetSequenceLength(scope)); - - // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto &dev_ctx = *pool.Get(place); - - VLOG(3) << "Static RNN input sequence length = " << seq_len; - auto reverse = Attr(kReverse); - - framework::Executor executor(place); - auto *block = Attr(kStepBlock); - - auto *program = block->Program(); - auto ctx = executor.Prepare(*program, - block->ID(), - Attr>( - kSkipEagerDeletionVars), /*skip_ref_cnt_vars*/ - true); - - StepScopes scopes = CreateStepScopes(dev_ctx, scope, seq_len); - for (size_t i = 0; i < seq_len; ++i) { - size_t seq_offset = reverse ? seq_len - i - 1 : i; - VLOG(3) << "Recurrent operate at the time step " << seq_offset; - - auto &cur_scope = scopes.CurScope(); - - // Link outside::input --> inside::input - // inside::input = outside::input[seq_offset: seq_offset+1] - LinkTensorWithCallback(scope, - Inputs(kInputs), - &cur_scope, - Inputs(kInputs), - [&seq_offset](const phi::DenseTensor &outside, - phi::DenseTensor *inside) { - inside->ShareDataWith(outside.Slice( - seq_offset, seq_offset + 1)); // NOLINT - auto dims = common::vectorize(inside->dims()); - dims.erase(dims.begin()); - inside->Resize(common::make_ddim(dims)); - }); - - if (has_state) { - if (i == 0) { - // Link initial states --> ex_states - LinkTensor(scope, - Inputs(kInitialStates), - &cur_scope, - Attr>(kExStates)); - } else { - auto &ex_scope = scopes.ExScope(); - // Link ex_scope::state --> cur_scope::ex_state - LinkTensor(ex_scope, - Attr>(kStates), - &cur_scope, - Attr>(kExStates)); - } - } - - // Link inside::output -> outside::output - // outside::output[seq_offset: seq_offset + 1] = inside::output - executor.CreateVariables( - ctx->prog_, &cur_scope, static_cast(ctx->block_id_)); - - // Linked now, execute! - executor.RunPreparedContext(ctx.get(), - &cur_scope, - false /*create_local_scope*/, - false /*create_vars*/, - true /* keep_kids */); - if (i == 0) { - LinkTensorWithCallback( - cur_scope, - Outputs(kOutputs), - scope, - Outputs(kOutputs), - [&](const phi::DenseTensor &src_tensor, - phi::DenseTensor *dst_tensor) { - // create output tensor at begin - dst_tensor->Resize(PrependDims(seq_len, src_tensor.dims())); - dst_tensor->mutable_data(place, src_tensor.dtype()); - - auto dst_out = - dst_tensor->Slice(seq_offset, seq_offset + 1); // NOLINT - // Explicit copy output since the local RNN scope can be destroyed - // early. - framework::TensorCopy(src_tensor, place, dev_ctx, &dst_out); - }); - } else { - LinkTensorWithCallback( - cur_scope, - Outputs(kOutputs), - scope, - Outputs(kOutputs), - [&](const phi::DenseTensor &src_tensor, - phi::DenseTensor *dst_tensor) { - auto dst_out = - dst_tensor->Slice(seq_offset, seq_offset + 1); // NOLINT - framework::TensorCopy(src_tensor, place, dev_ctx, &dst_out); - }); - } - - scopes.ForwardNext(); - } -} - -StepScopes RecurrentOp::CreateStepScopes(const platform::DeviceContext &dev_ctx, - const framework::Scope &scope, - size_t seq_len) const { - static std::mutex mutex; - std::lock_guard lock(mutex); - // TODO(baoachun) Function CreateStepScopes may lead to segmentation - // fault in multithreading in eval process. The performance drop of - // adding mutex need to be fixed. - auto *var = scope.FindVar(Output(kStepScopes)); - PADDLE_ENFORCE_NOT_NULL( - var, - phi::errors::InvalidArgument("RecurrentOp gets empty StepScopes var")); - return StepScopes(dev_ctx, - scope, - var->GetMutable(), - Attr(kIsTrain), - seq_len); -} - -RecurrentGradOp::RecurrentGradOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : RecurrentBase(type, inputs, outputs, attrs) {} - -void RecurrentGradOp::RunImpl(const framework::Scope &scope, - const phi::Place &place) const { - bool has_state = Attr(kHasStates); - const size_t seq_len = static_cast(GetSequenceLength(scope)); - - // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto &dev_ctx = *pool.Get(place); - - StepScopes scopes = CreateStepScopes(dev_ctx, scope, seq_len); - auto reverse = Attr(kReverse); - - framework::Executor executor(place); - auto *block = Attr(kStepBlock); - auto *program = block->Program(); - auto ctx = - executor.Prepare(*program, - block->ID(), - Attr>( - kSkipEagerDeletionVars) /*skip_ref_cnt_vars*/); - - for (size_t step_id = 0; step_id < seq_len; ++step_id) { - size_t seq_offset = reverse ? step_id : seq_len - step_id - 1; - VLOG(3) << "Recurrent backward operate at the time step " << seq_offset; - auto &cur_scope = scopes.CurScope(); - - // Link outside::output_grads --> inside::output_grads - // inside::output_grad = outside::output_grad[seq_offset:seq_offset+1] - LinkTensorWithCallback( - scope, - Inputs(kOutputGrads), - &cur_scope, - Inputs(kOutputGrads), - [&](const phi::DenseTensor &outside, phi::DenseTensor *inside) { - inside->ShareDataWith( - outside.Slice(seq_offset, seq_offset + 1)); // NOLINT - auto dims = common::vectorize(inside->dims()); - dims.erase(dims.begin()); - inside->Resize(common::make_ddim(dims)); - }, - true /*is_backward*/); - auto og_set = List2Set(Inputs(kOutputGrads)); - - if (VLOG_IS_ON(10)) { - std::ostringstream sout; - std::copy(og_set.begin(), - og_set.end(), - std::ostream_iterator(sout, ",")); - VLOG(10) << " RNN output gradients = [" << sout.str() << "]"; - } - - if (has_state) { - // Link states - // if cur_scope::cur_state_grad in out_grads: - // cur_scope::cur_state_grad += ex_scope::ex_state_grad - // else: - // ex_scope::ex_state_grad --> cur_scope::cur_state_grad - if (step_id != 0) { // not at beginning - auto &ex_scope = scopes.ExScope(); - auto ex_state_grads = - GradVarLists(Attr>(kExStates)); - auto cur_state_grads = - GradVarLists(Attr>(kStates)); - - PADDLE_ENFORCE_EQ(ex_state_grads.size(), - cur_state_grads.size(), - phi::errors::InvalidArgument( - "lengths of ex_states and cur_states are not " - "equal in RecurrentGradOp")); - for (size_t i = 0; i < ex_state_grads.size(); ++i) { - auto &cur_grad = cur_state_grads[i]; - auto &ex_grad = ex_state_grads[i]; - auto &ex_grad_tensor = - ex_scope.FindVar(ex_grad)->Get(); - - VLOG(10) << " RNN link " << cur_grad << " from " << ex_grad; - auto *cur_grad_var = cur_scope.Var(cur_grad); - phi::DenseTensor *cur_grad_tensor = - cur_grad_var->GetMutable(); - cur_grad_tensor->ShareDataWith(ex_grad_tensor); - } - } - } - - // Link inside::output -> outside::output - // outside::output[seq_offset: seq_offset + 1] = inside::output - executor.CreateVariables( - ctx->prog_, &cur_scope, static_cast(ctx->block_id_)); - if (step_id > 0) { - LinkTensorWithCallback( - scope, - Outputs(kInputGrads), - cur_scope, - GradVarLists(Inputs(kInputs)), - [&](const phi::DenseTensor &src_tensor, - phi::DenseTensor *dst_tensor) { - if (src_tensor.memory_size() == - 0) { // Inside Gradient is not created. - return; - } - phi::DenseTensor src_slice = - src_tensor.Slice(seq_offset, seq_offset + 1); // NOLINT - dst_tensor->ShareDataWith(src_slice); - }, - true /*is_backward*/); - } - - VLOG(5) << "Recurrent memory linking finished "; - // Run step block with cur_scope - executor.RunPreparedContext(ctx.get(), - &cur_scope, - false /*create_local_scope*/, - false /*create_vars*/, - true /* keep_kids */); - - VLOG(5) << "executor.Run finished "; - - auto local_var_names = LocalVarNames(cur_scope); - - // Accumulate params - // if (step == 0): - // outside::param_grad = 0.0 - // outside::param_grad += inside::param_grad - { - auto &pg_names = Outputs(kParamGrads); - auto &p_names = Inputs(kParameters); - PADDLE_ENFORCE_EQ(pg_names.size(), - p_names.size(), - phi::errors::InvalidArgument( - "Sizes of Parameters and ParamGrads are not equal " - "in RecurrentGradOp")); - - for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) { - auto inside_grad_name = framework::GradVarName(p_names[param_id]); - - // If does not compute gradient of that variable inside rnn, just - // continue - if (local_var_names.find(inside_grad_name) == local_var_names.end()) { - continue; - } - - // zero gradient variable in step 0 - if (step_id == 0) { - auto &inside_tensor = - cur_scope.FindVar(inside_grad_name)->Get(); - framework::AttributeMap attrs; - attrs["dtype"] = - framework::TransToProtoVarType(inside_tensor.dtype()); - attrs["shape"] = common::vectorize(inside_tensor.dims()); - attrs["value"] = 0.0f; - - auto zero_op = - framework::OpRegistry::CreateOp("fill_constant", - framework::VariableNameMap{}, - {{"Out", {pg_names[param_id]}}}, - attrs); - zero_op->Run(scope, place); - } - - auto new_inside_name = cur_scope.Rename(inside_grad_name); - - // sum gradient - auto sum_op = framework::OpRegistry::CreateOp( - "sum", - {{"X", {pg_names[param_id], new_inside_name}}}, - {{"Out", {pg_names[param_id]}}}, - framework::AttributeMap{{"use_mkldnn", {false}}}); - sum_op->Run(cur_scope, place); - - cur_scope.Rename(new_inside_name, inside_grad_name); - } - } - VLOG(5) << "Accumulate Parameter finished "; - - // Copy input gradient from inside to outside - // outside::input_grad[seq_offset: seq_offset + 1] = inside::input_grad - if (step_id == 0) { - LinkTensorWithCallback( - cur_scope, - GradVarLists(Inputs(kInputs)), - scope, - Outputs(kInputGrads), - [&](const phi::DenseTensor &inside, phi::DenseTensor *outside) { - if (inside.memory_size() == 0) { // IG is not created. - return; - } - // Alloc outside memory - outside->Resize(PrependDims(seq_len, inside.dims())); - outside->mutable_data(place, inside.dtype()); - - auto dst = outside->Slice(seq_offset, seq_offset + 1); // NOLINT - framework::TensorCopy(inside, place, dev_ctx, &dst); - }, - true /*is_backward*/); - } - VLOG(5) << "Link outside gradient finished "; - - if (has_state) { - if (step_id + 1 == seq_len) { // at_end - // copy initialize states gradient from inside to outside - LinkTensorWithCallback( - cur_scope, - GradVarLists(Attr>(kExStates)), - scope, - Outputs(kInitStateGrads), - [&](const phi::DenseTensor &inside, phi::DenseTensor *outside) { - outside->Resize(inside.dims()); - outside->mutable_data(place, inside.dtype()); - framework::TensorCopy(inside, place, dev_ctx, outside); - }, - true /*is_backward*/); - VLOG(5) << "Link initialize state gradient finished "; - } - } - scopes.BackwardNext(dev_ctx, const_cast(&scope)); - } - // Delete the scope of StepScopes - auto *var = scope.FindVar(Input(kStepScopes)); - PADDLE_ENFORCE_NOT_NULL(var, - phi::errors::InvalidArgument( - "StepScopes var is empty in RecurrentGradOp")); - auto *step_scopes = var->GetMutable(); - ClearStepScopes(dev_ctx, const_cast(&scope), step_scopes); -} - -StepScopes RecurrentGradOp::CreateStepScopes( - const platform::DeviceContext &dev_ctx, - const framework::Scope &scope, - size_t seq_len) const { - auto *var = scope.FindVar(Input(kStepScopes)); - PADDLE_ENFORCE_NOT_NULL(var, - phi::errors::InvalidArgument( - "StepScopes var is empty in RecurrentGradOp")); - return StepScopes(dev_ctx, - scope, - var->GetMutable(), - Attr(kIsTrain), - seq_len, - true /*is_backward*/); -} - -std::unordered_set RecurrentGradOp::List2Set( - const std::vector &list) const { - std::unordered_set local_var_name_set; - local_var_name_set.reserve(list.size()); - for (auto &each : list) { - local_var_name_set.insert(each); - } - return local_var_name_set; -} - -std::unordered_set RecurrentGradOp::LocalVarNames( - const framework::Scope &scope) const { - return this->List2Set(scope.LocalVarNames()); -} - -std::vector RecurrentGradOp::GradVarLists( - const std::vector &var_names) { - std::vector retv; - retv.reserve(var_names.size()); - std::transform(var_names.begin(), - var_names.end(), - std::back_inserter(retv), - framework::GradVarName); - return retv; -} - -class RecurrentOpProtoMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput(RecurrentBase::kInputs, "rnn inputs").AsDuplicable(); - AddInput(RecurrentBase::kInitialStates, "rnn initial states") - .AsDuplicable(); - AddInput(RecurrentBase::kParameters, - "Parameters are used by step block as its input. However, the " - "input is not a sequence tensor. Every time step, each operator " - "in step block just use the parameter directly.") - .AsDuplicable(); - AddOutput(RecurrentBase::kOutputs, - "The output sequence of RNN. The sequence length must be same.") - .AsDuplicable(); - AddOutput(RecurrentBase::kStepScopes, - "StepScopes contain all local variables in each time step."); - AddAttr(RecurrentBase::kHasStates, "Whether has states.") - .SetDefault(false); - AddAttr>(RecurrentBase::kExStates, - string::Sprintf( - R"DOC(The ex-state variable names. -The ex-state means the state value in the ex-timestep or the previous time step -[%s, %s, %s] must be the same order)DOC", - RecurrentBase::kExStates, - RecurrentBase::kStates, - RecurrentBase::kInitStateGrads)); - AddAttr>( - RecurrentBase::kStates, - string::Sprintf( - "The state variable names. [%s, %s, %s] must be the same order", - RecurrentBase::kExStates, - RecurrentBase::kStates, - RecurrentBase::kInitStateGrads)); - AddAttr(RecurrentBase::kStepBlock, - "The step block inside RNN"); - AddAttr(RecurrentBase::kReverse, - R"DOC(Calculate RNN reversely or not. -By default reverse=False - -Assume the input data is [A, B, C, D] - -if reverse is False: - the computation of RNN is like - A B C D - | | | | - v v v v - rnn -----> rnn -----> rnn ----> rnn - | | | | - v v v v - o o o o - -if reverse is True - the computation of RNN is like - A B C D - | | | | - v v v v - rnn <----- rnn <----- rnn <---- rnn - | | | | - v v v v - o o o o -)DOC") - .SetDefault(false); - AddAttr(RecurrentBase::kIsTrain, "").SetDefault(true); - AddAttr>(RecurrentBase::kSkipEagerDeletionVars, - "Vars that would skip eager deletion." - "Users should not set this manually.") - .SetDefault(std::vector()); - - AddComment(R"DOC( -Static Length Recurrent Operator. - -The static length recurrent operator can only operate on fixed size sequence -data, i.e. in each mini-batch, the sequence length of all inputs are the same. - -)DOC"); - } -}; - -template -class RecurrentGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr grad) const override { - grad->SetType("recurrent_grad"); - for (auto &input_param : this->InputNames()) { - grad->SetInput(input_param, this->Input(input_param)); - grad->SetOutput(framework::GradVarName(input_param), - this->InputGrad(input_param, false)); - } - - for (auto &output_param : this->OutputNames()) { - if (output_param == RecurrentBase::kStepScopes) { - grad->SetInput(output_param, this->Output(output_param)); - grad->SetInput(framework::GradVarName(output_param), - this->Output(output_param)); - } else { - grad->SetInput(output_param, this->Output(output_param)); - grad->SetInput(framework::GradVarName(output_param), - this->OutputGrad(output_param)); - } - } - grad->SetAttrMap(this->Attrs()); - grad->SetBlockAttr(RecurrentBase::kStepBlock, this->grad_block_[0]); - } -}; - -class RecurrentGradOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - std::vector output{RecurrentBase::kOutputs}; - - // In some case the kInitialStates is empty. - // If the kInitialStates is empty, all the states should be empty. - if (!ctx->HasInputs(RecurrentBase::kInitialStates)) { - PADDLE_ENFORCE_EQ( - ctx->Attrs() - .Get>(RecurrentBase::kExStates) - .size(), - 0, - phi::errors::InvalidArgument("The Attr(%s) should be empty.", - RecurrentBase::kExStates)); - PADDLE_ENFORCE_EQ( - ctx->Attrs() - .Get>(RecurrentBase::kStates) - .size(), - 0, - phi::errors::InvalidArgument("The Attr(%s) should be empty.", - RecurrentBase::kStates)); - } - - PADDLE_ENFORCE_EQ( - ctx->HasInputs(RecurrentBase::kInputs), - true, - phi::errors::InvalidArgument("The input(%s) should not be empty.", - RecurrentBase::kInputs)); - PADDLE_ENFORCE_EQ( - ctx->HasInputs(RecurrentBase::kOutputs), - true, - phi::errors::InvalidArgument("The input(%s) should not be empty.", - RecurrentBase::kOutputs)); - - // In some case the kInitialStates is empty. - if (ctx->HasInputs(RecurrentBase::kInitialStates) && - ctx->HasOutputs( - framework::GradVarName(RecurrentBase::kInitialStates))) { - ctx->SetOutputsDim(framework::GradVarName(RecurrentBase::kInitialStates), - ctx->GetInputsDim(RecurrentBase::kInitialStates)); - } - - PADDLE_ENFORCE_EQ( - ctx->HasOutputs(framework::GradVarName(RecurrentBase::kInputs), - /*allow_null=*/true), - true, - phi::errors::InvalidArgument( - "The output of(%s) should not be empty.", - framework::GradVarName(RecurrentBase::kInputs))); - ctx->SetOutputsDim(framework::GradVarName(RecurrentBase::kInputs), - ctx->GetInputsDim(RecurrentBase::kInputs)); - - // In some case the kParameters is empty. - if (ctx->HasInputs(RecurrentBase::kParameters)) { - PADDLE_ENFORCE_EQ( - ctx->HasOutputs(framework::GradVarName(RecurrentBase::kParameters)), - true, - phi::errors::InvalidArgument( - "The output of(%s) should not be empty.", - framework::GradVarName(RecurrentBase::kParameters))); - ctx->SetOutputsDim(framework::GradVarName(RecurrentBase::kParameters), - ctx->GetInputsDim(RecurrentBase::kParameters)); - } - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR( - recurrent, - paddle::operators::RecurrentOp, - paddle::operators::RecurrentOpProtoMaker, - paddle::operators::RecurrentGradOpMaker); -REGISTER_OPERATOR(recurrent_grad, - paddle::operators::RecurrentGradOp, - paddle::operators::RecurrentGradOpShapeInference); diff --git a/paddle/fluid/operators/recurrent_op.h b/paddle/fluid/operators/recurrent_op.h deleted file mode 100644 index a95a4a0712b20..0000000000000 --- a/paddle/fluid/operators/recurrent_op.h +++ /dev/null @@ -1,260 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include - -#include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -// StepScopes manages the scopes inside Recurrent Op. -// -// if is_train = False, then -// there are two scopes for the RNN and just support forward -// else -// the len(scopes) == seq_len -// -// if is_backward = True, then -// reversely access scopes, delete useless ex-scope -// else -// access scopes from beginning to end -class StepScopes { - public: - StepScopes(const platform::DeviceContext &dev_ctx, - const framework::Scope &parent, - std::vector *scopes, - bool is_train, - size_t seq_len, - bool is_backward = false); - - // Get the current scope - framework::Scope &CurScope(); - - // Get the ex-scope, which is the scope in previous time step - framework::Scope &ExScope(); - - // Move to next time step when forwarding - void ForwardNext(); - - // Delete ex-scope after using it, then move to next time step when - // backwarding - void BackwardNext(const platform::DeviceContext &dev_ctx, - framework::Scope *parent_scope); - - private: - framework::Scope &GetScope(size_t scope_id) const; - - size_t counter_; - std::vector *scopes_; - bool is_train_; - bool is_backward_; -}; - -// Base class for RecurrentOp/RecurrentGradOp -// Some common protected functions for RecurrentOp/RecurrentGradOp -class RecurrentBase : public framework::OperatorBase { - public: - static const char kInputs[]; - static const char kInitialStates[]; - static const char kParameters[]; - static const char kOutputs[]; - static const char kStepScopes[]; - static const char kHasStates[]; - static const char kExStates[]; - static const char kStates[]; - static const char kStepBlock[]; - static const char kReverse[]; - static const char kIsTrain[]; - static const char kSkipEagerDeletionVars[]; - static const char kInputGrads[]; - static const char kOutputGrads[]; - static const char kParamGrads[]; - static const char kInitStateGrads[]; - - RecurrentBase(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs); - - protected: - // Get SequenceLength from Scope - // The sequence length is got from input tensor. The input tensor's - // dimension should be [SEQ_LEN, ..., ...]. The first of the tensor's shape - // is SEQ_LEN. The second of the tensor's shape could be the batch size or - // nested sequence length. - int64_t GetSequenceLength(const framework::Scope &scope) const; - - // for src_tensor, dst_tensor in zip(map(src_scope.FindVar, src_vars), - // map(dst_scope.Var, dst_vars)): - // dst_tensor.ShareDataWith(src_tensor) - static void LinkTensor(const framework::Scope &src_scope, - const std::vector &src_vars, - framework::Scope *dst_scope, - const std::vector &dst_vars); - - // for src_tensor, dst_tensor in zip(map(src_scope.FindVar, src_vars), - // map(dst_scope.Var, dst_vars)): - // callback(src_tensor, &dst_tensor) - template - static void LinkTensorWithCallback(const framework::Scope &src_scope, - const std::vector &src_vars, - framework::Scope *dst_scope, - const std::vector &dst_vars, - Callback callback, - bool is_backward = false) { - PADDLE_ENFORCE_EQ(src_vars.size(), - dst_vars.size(), - phi::errors::InvalidArgument( - "Sizes of source vars and destination vars are not " - "equal in LinkTensor.")); - for (size_t i = 0; i < dst_vars.size(); ++i) { - VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i]; - AccessTensor(src_scope, - src_vars[i], - dst_scope, - dst_vars[i], - callback, - is_backward); - } - } - - // for src_tensor, dst_tensor in zip(map(src_scope.FindVar, src_vars), - // map(dst_scope.FindVar, dst_vars)): - // callback(src_tensor, &dst_tensor) - template - static void LinkTensorWithCallback(const framework::Scope &src_scope, - const std::vector &src_vars, - const framework::Scope &dst_scope, - const std::vector &dst_vars, - Callback callback, - bool is_backward = false) { - PADDLE_ENFORCE_EQ(src_vars.size(), - dst_vars.size(), - phi::errors::InvalidArgument( - "Sizes of source vars and destination vars are not " - "equal in LinkTensor.")); - for (size_t i = 0; i < dst_vars.size(); ++i) { - VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i]; - AccessTensor(src_scope, - src_vars[i], - dst_scope, - dst_vars[i], - callback, - is_backward); - } - } - - // (seq_len, shape) -> return [seq_len] + list(shape) - static phi::DDim PrependDims(size_t seq_len, const phi::DDim &src); - - private: - template - static void AccessTensor(const framework::Scope &src_scope, - const std::string &src_var_name, - framework::Scope *dst_scope, - const std::string &dst_var_name, - Callback callback, - bool is_backward = false) { - auto *src_var = src_scope.FindVar(src_var_name); - if (is_backward && src_var == nullptr) { - return; - } - PADDLE_ENFORCE_NOT_NULL( - src_var, - phi::errors::NotFound("Source variable %s is not found.", - src_var_name)); - auto &src_tensor = src_var->Get(); - - auto *dst_var = dst_scope->Var(dst_var_name); - auto *dst_tensor = dst_var->GetMutable(); - callback(src_tensor, dst_tensor); - } - - template - static void AccessTensor(const framework::Scope &src_scope, - const std::string &src_var_name, - const framework::Scope &dst_scope, - const std::string &dst_var_name, - Callback callback, - bool is_backward = false) { - auto *dst_var = dst_scope.FindVar(dst_var_name); - if (is_backward && dst_var == nullptr) { - return; - } - auto *src_var = src_scope.FindVar(src_var_name); - PADDLE_ENFORCE_NOT_NULL( - src_var, - phi::errors::NotFound("Source variable %s is not found.", - src_var_name)); - auto &src_tensor = src_var->Get(); - PADDLE_ENFORCE_NOT_NULL( - dst_var, - phi::errors::NotFound("Destination variable %s is not found.", - src_var_name)); - auto *dst_tensor = dst_var->GetMutable(); - callback(src_tensor, dst_tensor); - } -}; - -class RecurrentOp : public RecurrentBase { - public: - RecurrentOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs); - - private: - void RunImpl(const framework::Scope &scope, - const phi::Place &place) const override; - - private: - StepScopes CreateStepScopes(const platform::DeviceContext &dev_ctx, - const framework::Scope &scope, - size_t seq_len) const; -}; - -class RecurrentGradOp : public RecurrentBase { - public: - RecurrentGradOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs); - - private: - void RunImpl(const framework::Scope &scope, - const phi::Place &place) const override; - - StepScopes CreateStepScopes(const platform::DeviceContext &dev_ctx, - const framework::Scope &scope, - size_t seq_len) const; - - std::unordered_set List2Set( - const std::vector &list) const; - - std::unordered_set LocalVarNames( - const framework::Scope &scope) const; - - static std::vector GradVarLists( - const std::vector &var_names); -}; - -} // namespace operators -} // namespace paddle diff --git a/test/cpp/fluid/framework/CMakeLists.txt b/test/cpp/fluid/framework/CMakeLists.txt index de3b99610d1f5..8fd806bc18570 100644 --- a/test/cpp/fluid/framework/CMakeLists.txt +++ b/test/cpp/fluid/framework/CMakeLists.txt @@ -198,7 +198,7 @@ endif() cc_test( prune_test SRCS prune_test.cc - DEPS op_info prune recurrent_op device_context) + DEPS op_info prune device_context) cc_test( var_type_inference_test SRCS var_type_inference_test.cc