From eff17757dc976ebc3cc50e0544da1ee33db74942 Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Fri, 24 May 2024 11:45:42 +0800
Subject: [PATCH] =?UTF-8?q?=E3=80=90Hackathon=206th=20Fundable=20Projects?=
 =?UTF-8?q?=203=20No.300=E3=80=91recurrent=20(#64545)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paddle/fluid/framework/CMakeLists.txt         |  24 +-
 paddle/fluid/framework/executor.cc            |   3 -
 paddle/fluid/framework/executor_gc_helper.cc  |   3 -
 .../ir/memory_optimize_pass/CMakeLists.txt    |   5 -
 .../eager_deletion_pass.cc                    |   5 -
 .../recurrent_op_eager_deletion_pass.cc       | 114 ---
 .../recurrent_op_eager_deletion_pass.h        |  43 -
 .../interpreter/interpreter_util.cc           |   3 -
 paddle/fluid/operators/CMakeLists.txt         |   8 +-
 .../operators/controlflow/CMakeLists.txt      |   4 -
 .../controlflow/recurrent_op_helper.cc        | 291 -------
 .../controlflow/recurrent_op_helper.h         |  59 --
 paddle/fluid/operators/recurrent_op.cc        | 801 ------------------
 paddle/fluid/operators/recurrent_op.h         | 260 ------
 test/cpp/fluid/framework/CMakeLists.txt       |   2 +-
 15 files changed, 14 insertions(+), 1611 deletions(-)
 delete mode 100644 paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc
 delete mode 100644 paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.h
 delete mode 100644 paddle/fluid/operators/controlflow/recurrent_op_helper.cc
 delete mode 100644 paddle/fluid/operators/controlflow/recurrent_op_helper.h
 delete mode 100644 paddle/fluid/operators/recurrent_op.cc
 delete mode 100644 paddle/fluid/operators/recurrent_op.h

diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 39e1a47d1d4ae..041339fe597c3 100755
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -550,7 +550,6 @@ cc_library(
   executor_gc_helper
   SRCS executor_gc_helper.cc
   DEPS while_op_helper
-       recurrent_op_helper
        conditional_block_op_helper
        pylayer_op_helper
        scope
@@ -585,7 +584,6 @@ if(WITH_DISTRIBUTE)
            device_worker_factory.cc
            data_set.cc
       DEPS fleet_wrapper
-           recurrent_op_helper
            op_registry
            device_context
            scope
@@ -660,8 +658,7 @@ if(WITH_DISTRIBUTE)
            heter_section_worker.cc
            device_worker_factory.cc
            data_set.cc
-      DEPS recurrent_op_helper
-           op_registry
+      DEPS op_registry
            device_context
            scope
            framework_proto
@@ -736,8 +733,7 @@ if(WITH_DISTRIBUTE)
            section_worker.cc
            device_worker_factory.cc
            data_set.cc
-      DEPS recurrent_op_helper
-           op_registry
+      DEPS op_registry
            device_context
            scope
            framework_proto
@@ -796,8 +792,7 @@ elseif(WITH_PSLIB)
          section_worker.cc
          device_worker_factory.cc
          data_set.cc
-    DEPS recurrent_op_helper
-         op_registry
+    DEPS op_registry
          device_context
          scope
          framework_proto
@@ -843,8 +838,7 @@ else()
          section_worker.cc
          device_worker_factory.cc
          data_set.cc
-    DEPS recurrent_op_helper
-         op_registry
+    DEPS op_registry
          device_context
          scope
          framework_proto
@@ -867,8 +861,14 @@ else()
 endif()
 
 target_link_libraries(
-  executor while_op_helper executor_gc_helper recurrent_op_helper
-  conditional_block_op_helper pylayer_op_helper)
+  executor
+  while_op_helper
+  executor_gc_helper
+  static_prim_api
+  static_utils
+  get_expected_kernel_func
+  conditional_block_op_helper
+  pylayer_op_helper)
 
 cc_library(
   parallel_executor
diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc
index 0ee48f24e6cbc..bd4530f906fac 100644
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@@ -20,7 +20,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/trainer_desc.pb.h"
 #include "paddle/fluid/framework/trainer_factory.h"
 #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
-#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h"
 #include "paddle/fluid/operators/controlflow/while_op_helper.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/platform/profiler.h"
@@ -54,8 +53,6 @@ void ExecutorPrepareContext::PrepareUnusedVars(
         prog_, static_cast<int>(block_id_), ops_);
     operators::PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp(
         prog_, static_cast<int>(block_id_), ops_);
-    operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
-        prog_, static_cast<int>(block_id_), ops_);
   }
 
   force_disable_gc_ = force_disable_gc;
diff --git a/paddle/fluid/framework/executor_gc_helper.cc b/paddle/fluid/framework/executor_gc_helper.cc
index 9866045fdd623..58728005e3268 100644
--- a/paddle/fluid/framework/executor_gc_helper.cc
+++ b/paddle/fluid/framework/executor_gc_helper.cc
@@ -25,7 +25,6 @@
 #include "paddle/fluid/framework/var_desc.h"
 #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
 #include "paddle/fluid/operators/controlflow/pylayer_op_helper.h"
-#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h"
 #include "paddle/fluid/operators/controlflow/while_op_helper.h"
 #include "paddle/fluid/platform/enforce.h"
 
@@ -271,8 +270,6 @@ GetEagerDeletionCleanVarsForPartial(const ProgramDesc &origin_program,
         program, 0, global_block_ops);
     operators::PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp(
         program, 0, global_block_ops);
-    operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
-        program, 0, global_block_ops);
   }
 
   // find the skip vars on each block
diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
index f5c4f9d419cae..94e59cc998ef2 100644
--- a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
@@ -14,10 +14,6 @@ cc_library(
   while_op_eager_deletion_pass
   SRCS while_op_eager_deletion_pass.cc
   DEPS while_op_helper graph_helper pass)
-cc_library(
-  recurrent_op_eager_deletion_pass
-  SRCS recurrent_op_eager_deletion_pass.cc
-  DEPS recurrent_op_helper graph_helper pass)
 cc_library(
   reference_count_pass_helper
   SRCS reference_count_pass_helper.cc
@@ -34,7 +30,6 @@ set(EAGER_DELETETION_PASS_DEPS
     conditional_block_op_eager_deletion_pass
     pylayer_op_eager_deletion_pass
     while_op_eager_deletion_pass
-    recurrent_op_eager_deletion_pass
     reference_count_pass_helper)
 
 cc_library(
diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc
index 1cb6fd4b4a8b4..0df4ebd7a7e0b 100644
--- a/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc
@@ -301,10 +301,6 @@ void EagerDeletionPass::ApplyImpl(ir::Graph *graph) const {
   auto while_op_eager_deletion_pass =
       ir::PassRegistry::Instance().Get("while_op_eager_deletion_pass");
   while_op_eager_deletion_pass->Apply(graph);
-
-  auto recurrent_op_eager_deletion_pass =
-      ir::PassRegistry::Instance().Get("recurrent_op_eager_deletion_pass");
-  recurrent_op_eager_deletion_pass->Apply(graph);
 }
 
 }  // namespace ir
@@ -320,4 +316,3 @@ REGISTER_PASS(eager_deletion_pass, paddle::framework::ir::EagerDeletionPass)
 USE_PASS(conditional_block_op_eager_deletion_pass);
 USE_PASS(pylayer_op_eager_deletion_pass);
 USE_PASS(while_op_eager_deletion_pass);
-USE_PASS(recurrent_op_eager_deletion_pass);
diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc
deleted file mode 100644
index 5431e62fe4220..0000000000000
--- a/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.h"
-
-#include <unordered_map>
-#include <vector>
-
-#include "paddle/fluid/framework/details/computation_op_handle.h"
-#include "paddle/fluid/framework/details/multi_devices_helper.h"
-#include "paddle/fluid/framework/ir/graph_helper.h"
-#include "paddle/utils/string/string_helper.h"
-
-namespace paddle {
-namespace framework {
-namespace ir {
-
-using paddle::operators::OpAndGradOpPair;
-using paddle::operators::OpVariantSet;
-
-void RecurrentOpEagerDeletionPass::ApplyImpl(Graph *graph) const {
-  // Find all recurrent_op and recurrent_grad_op in graph
-  // Note the graph only contains ops and block 0
-  std::unordered_map<size_t, OpAndGradOpPair> target_ops =
-      DeviceIdToRecurrentAndRecurrentGradOp(*graph);
-
-  if (graph->IsConstructedByPartialProgram()) {
-    PADDLE_ENFORCE_LE(target_ops.size(),
-                      1,
-                      platform::errors::InvalidArgument(
-                          "Unsupported multi devices if graph is constructed "
-                          "with partial program."));
-    size_t scope_idx = 0;
-    auto &recur_ops = target_ops[scope_idx].first;
-    auto &recur_grad_ops = target_ops[scope_idx].second;
-
-    auto all_ops = graph->OriginProgram().Block(0).AllOps();
-    if (recur_ops.empty()) {
-      operators::AppendOpVariantByOpName(
-          all_ops, std::string("recurrent"), &recur_ops);
-    } else if (recur_grad_ops.empty()) {
-      operators::AppendOpVariantByOpName(
-          all_ops, std::string("recurrent_grad"), &recur_grad_ops);
-    } else {
-      PADDLE_THROW(platform::errors::InvalidArgument(
-          "One of recur_ops or recur_grad_ops should be empty."));
-    }
-  }
-
-  for (auto &entry : target_ops) {
-    // Prepare safe eager deletion on different devices because the garbage
-    // collection may be different across devices
-    OpAndGradOpPair &op_pair = entry.second;
-    PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
-        graph->OriginProgram(), &op_pair);
-  }
-
-  auto all_ops = ir::FilterByNodeWrapper<details::OpHandleBase>(*graph);
-  for (auto op_hander : all_ops) {
-    auto *compute_op = dynamic_cast<details::ComputationOpHandle *>(op_hander);
-    if (compute_op == nullptr) continue;
-    if (compute_op->Name() == "recurrent" ||
-        compute_op->Name() == "recurrent_grad") {
-      ir::Node *op_node = op_hander->Node();
-      auto *op_base = compute_op->GetOp();
-      if (op_base->Attrs().count("skip_eager_deletion_vars")) {
-        op_node->Op()->SetAttr("skip_eager_deletion_vars",
-                               op_base->Attrs().at("skip_eager_deletion_vars"));
-      }
-    }
-  }
-}
-
-// Returns a std::unordered_map mapping from the device id to recurrent op and
-// grad op pair
-std::unordered_map<size_t, OpAndGradOpPair>
-RecurrentOpEagerDeletionPass::DeviceIdToRecurrentAndRecurrentGradOp(
-    const Graph &graph) const {
-  std::unordered_map<size_t, OpAndGradOpPair> ret;
-  std::vector<details::OpHandleBase *> all_ops =
-      FilterByNodeWrapper<details::OpHandleBase>(graph);
-
-  for (auto *op : all_ops) {
-    auto compute_op = dynamic_cast<details::ComputationOpHandle *>(op);
-    if (compute_op == nullptr) continue;
-
-    if (compute_op->Name() == "recurrent") {
-      // GetScopeIdx() returns device/place id
-      ret[compute_op->GetScopeIdx()].first.emplace(compute_op->GetOp());
-    } else if (compute_op->Name() == "recurrent_grad") {
-      // GetScopeIdx() returns device/place id
-      ret[compute_op->GetScopeIdx()].second.emplace(compute_op->GetOp());
-    }
-  }
-  return ret;
-}
-
-}  // namespace ir
-}  // namespace framework
-}  // namespace paddle
-
-REGISTER_PASS(recurrent_op_eager_deletion_pass,
-              paddle::framework::ir::RecurrentOpEagerDeletionPass);
diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.h b/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.h
deleted file mode 100644
index 9c39a9faf23ae..0000000000000
--- a/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <unordered_map>
-
-#include "paddle/fluid/framework/details/computation_op_handle.h"
-#include "paddle/fluid/framework/details/multi_devices_helper.h"
-#include "paddle/fluid/framework/ir/graph_helper.h"
-#include "paddle/fluid/operators/controlflow/op_variant.h"
-#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h"
-
-namespace paddle {
-namespace framework {
-namespace ir {
-
-// Pass class set skip eager deletion vars for recurrent ops
-class RecurrentOpEagerDeletionPass : public Pass {
- protected:
-  void ApplyImpl(Graph *graph) const override;
-
- private:
-  // Returns a std::unordered_map mapping from the device id to recurrent op and
-  // grad op pair
-  std::unordered_map<size_t, paddle::operators::OpAndGradOpPair>
-  DeviceIdToRecurrentAndRecurrentGradOp(const Graph &graph) const;
-};
-
-}  // namespace ir
-}  // namespace framework
-}  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
index 5c1d872078a42..52516d69794c8 100644
--- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
@@ -29,7 +29,6 @@
 #include "paddle/fluid/memory/stats.h"
 #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
 #include "paddle/fluid/operators/controlflow/pylayer_op_helper.h"
-#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h"
 #include "paddle/fluid/operators/controlflow/while_op_helper.h"
 #include "paddle/fluid/operators/ops_extra_info.h"
 #include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h"
@@ -604,8 +603,6 @@ void BuildOpFuncList(const platform::Place& place,
         main_program, block.ID(), ops_unique);
     operators::PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp(
         main_program, block.ID(), ops_unique);
-    operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
-        main_program, block.ID(), ops_unique);
   }
 
 #ifdef PADDLE_WITH_DNNL
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
index 6fbc11df5cd3c..4714f3a2eb446 100644
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -83,7 +83,7 @@ endif()
 set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi common phi_utils static_prim_api get_expected_kernel_func)
 
 register_operators(EXCLUDES py_func_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op
-        recurrent_op save_combine_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} processgroup_comm_utils)
+        save_combine_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} processgroup_comm_utils)
 
 op_library(generated_op UNITY SRCS generated_op1.cc generated_op2.cc generated_op3.cc generated_op4.cc DEPS ${OP_HEADER_DEPS})
 op_library(run_program_op DEPS executor_cache ${OP_HEADER_DEPS})
@@ -109,12 +109,6 @@ if (WITH_GPU OR WITH_ROCM)
 endif()
 
 op_library(lstm_op DEPS ${OP_HEADER_DEPS})
-if (WITH_ROCM)
-    op_library(recurrent_op DEPS executor ${OP_HEADER_DEPS})
-else()
-    op_library(recurrent_op DEPS ${OP_HEADER_DEPS})
-endif()
-
 
 set(COMMON_OP_DEPS ${OP_HEADER_DEPS})
 
diff --git a/paddle/fluid/operators/controlflow/CMakeLists.txt b/paddle/fluid/operators/controlflow/CMakeLists.txt
index ca32f31df1ff7..811542cbdf373 100644
--- a/paddle/fluid/operators/controlflow/CMakeLists.txt
+++ b/paddle/fluid/operators/controlflow/CMakeLists.txt
@@ -25,10 +25,6 @@ cc_library(
   pylayer_op_helper
   SRCS pylayer_op_helper.cc
   DEPS op_variant operator pylayer_op)
-cc_library(
-  recurrent_op_helper
-  SRCS recurrent_op_helper.cc
-  DEPS recurrent_op op_variant operator)
 cc_library(
   while_op_helper
   SRCS while_op_helper.cc
diff --git a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc b/paddle/fluid/operators/controlflow/recurrent_op_helper.cc
deleted file mode 100644
index e290fa3e016bd..0000000000000
--- a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc
+++ /dev/null
@@ -1,291 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h"
-
-#include <string>
-
-namespace paddle {
-namespace framework {
-class BlockDesc;
-class ProgramDesc;
-}  // namespace framework
-}  // namespace paddle
-
-namespace paddle {
-namespace operators {
-
-static bool IsMatchedRecurrentOpAndRecurrentGradOp(const OpVariant &fwd_op,
-                                                   const OpVariant &grad_op) {
-  return fwd_op.Inputs().at(RecurrentBase::kInputs) ==
-             grad_op.Inputs().at(RecurrentBase::kInputs) &&
-         fwd_op.Outputs().at(RecurrentBase::kOutputs) ==
-             grad_op.Inputs().at(RecurrentBase::kOutputs);
-}
-
-// Returns whether the variable is skippable in forward recurrent op
-// The variable is skippable in recurrent_op when the variable used in
-// recurrent_grad is not from grad_block.
-static bool IsSkippableVar(const std::string &name,
-                           framework::BlockDesc *grad_block) {
-  return name != framework::kEmptyVarName && !grad_block->HasVar(name);
-}
-
-static void ClearSkipVars(const OpVariant &op) {
-  auto &attrs = const_cast<framework::AttributeMap &>(op.Attrs());
-  std::vector<std::string> &attr_skip_vars = PADDLE_GET(
-      std::vector<std::string>, attrs[RecurrentBase::kSkipEagerDeletionVars]);
-  attr_skip_vars.clear();
-}
-
-// Add skip vars into op's attribute
-template <class Container>
-static void AddSkipVars(const OpVariant &op, const Container &skip_vars) {
-  auto &attrs = const_cast<framework::AttributeMap &>(op.Attrs());
-  VLOG(2) << "Prepare to add " << skip_vars.size()
-          << " skip var(s): " << paddle::string::join_strings(skip_vars, ' ');
-  std::vector<std::string> &attr_skip_vars = PADDLE_GET(
-      std::vector<std::string>, attrs[RecurrentBase::kSkipEagerDeletionVars]);
-  attr_skip_vars.insert(
-      attr_skip_vars.end(), skip_vars.cbegin(), skip_vars.cend());
-}
-
-// Find all ops and grad ops with given type name. The ops and grad ops
-// may locate in different blocks so we should traverse all blocks in the
-// program and find them out
-static void FindAllOpAndGradOp(const framework::ProgramDesc &program,
-                               OpAndGradOpPair *op_and_grad_op,
-                               const std::string &type_name,
-                               const std::string &backward_type_name) {
-  OpVariantSet &ops = op_and_grad_op->first;
-  OpVariantSet &grad_ops = op_and_grad_op->second;
-
-  PADDLE_ENFORCE_GE(
-      ops.size(),
-      grad_ops.size(),
-      phi::errors::InvalidArgument(
-          "There are more grad ops than forward ops in the graph or program, "
-          "the number of ops is %d and the number of grad_ops is %d.",
-          ops.size(),
-          grad_ops.size()));
-
-  for (size_t i = 1; i < program.Size(); ++i) {
-    auto &block = program.Block(i);
-    for (size_t j = 0; j < block.OpSize(); ++j) {
-      auto *op = block.Op(static_cast<int>(j));
-      if (op->Type() == type_name) {
-        ops.emplace(op);
-      } else if (op->Type() == backward_type_name) {
-        grad_ops.emplace(op);
-      }
-    }
-  }
-
-  PADDLE_ENFORCE_GE(
-      ops.size(),
-      grad_ops.size(),
-      phi::errors::InvalidArgument(
-          "There are more grad ops than forward ops in the graph or program, "
-          "the number of ops is %d and the number of grad_ops is %d.",
-          ops.size(),
-          grad_ops.size()));
-}
-
-// Returns GradVarName of input var names
-static std::vector<std::string> GradVarLists(
-    const std::vector<std::string> &var_names) {
-  std::vector<std::string> retv;
-  retv.reserve(var_names.size());
-  std::transform(var_names.begin(),
-                 var_names.end(),
-                 std::back_inserter(retv),
-                 framework::GradVarName);
-  return retv;
-}
-
-// Add memory vars in recurrent op as skip vars.
-static void AddOpMemVarsAsSkip(const OpVariant &op, bool set_grad_mem_vars) {
-  bool has_state = op.Attr<bool>(RecurrentBase::kHasStates);
-  if (has_state) {
-    std::unordered_set<std::string> skip_vars;
-
-    auto &mem_vars = op.Attr<std::vector<std::string>>(RecurrentBase::kStates);
-    skip_vars.insert(mem_vars.begin(), mem_vars.end());
-
-    auto &pre_mem_vars =
-        op.Attr<std::vector<std::string>>(RecurrentBase::kExStates);
-    skip_vars.insert(pre_mem_vars.begin(), pre_mem_vars.end());
-
-    if (set_grad_mem_vars) {
-      auto mem_grad_vars = GradVarLists(mem_vars);
-      skip_vars.insert(mem_grad_vars.begin(), mem_grad_vars.end());
-      auto pre_mem_grad_vars = GradVarLists(pre_mem_vars);
-      skip_vars.insert(pre_mem_grad_vars.begin(), pre_mem_grad_vars.end());
-    }
-    AddSkipVars(op, skip_vars);
-  }
-}
-
-// Set outputs and memory vars of the input forward op as skip vars
-static void SetRecurrentForwardOpOnlySkipVarAttr(const OpVariant &fwd_op) {
-  ClearSkipVars(fwd_op);
-
-  AddOpMemVarsAsSkip(fwd_op, /* set_grad_mem_vars = */ false);
-  auto &output_vars = fwd_op.Outputs().at(RecurrentBase::kOutputs);
-  AddSkipVars(fwd_op, output_vars);
-}
-
-// Set skip vars of matched recurrent op and recurrent_grad op
-static void SetRecurrentOpAndRecurrentGradOpSkipVarAttr(
-    const OpVariant &fwd_op, const OpVariant &bwd_op) {
-  // Find all skippable variables in forward recurrent_op
-  ClearSkipVars(fwd_op);
-  AddOpMemVarsAsSkip(fwd_op, /* set_grad_mem_vars = */ false);
-
-  auto *grad_block =
-      bwd_op.Attr<framework::BlockDesc *>(RecurrentBase::kStepBlock);
-  std::unordered_set<std::string> fwd_skip_vars;
-  for (auto *op_desc : grad_block->AllOps()) {
-    for (auto &in_arg_name : op_desc->InputArgumentNames()) {
-      if (IsSkippableVar(in_arg_name, grad_block)) {
-        fwd_skip_vars.insert(in_arg_name);
-      }
-    }
-    for (auto &out_arg_name : op_desc->OutputArgumentNames()) {
-      if (IsSkippableVar(out_arg_name, grad_block)) {
-        fwd_skip_vars.insert(out_arg_name);
-      }
-    }
-  }
-  AddSkipVars(fwd_op, fwd_skip_vars);
-
-  // Find all skippable variables in recurrent_grad_op
-  // The skippable variables are those which would be used across time steps
-  ClearSkipVars(bwd_op);
-  AddOpMemVarsAsSkip(bwd_op, /* set_grad_mem_vars = */ true);
-  std::unordered_set<std::string> bwd_skip_vars;
-
-  auto &fwd_input = fwd_op.Inputs().at(RecurrentBase::kInputs);
-  auto &in_grads =
-      bwd_op.Outputs().at(framework::GradVarName(RecurrentBase::kInputs));
-
-  PADDLE_ENFORCE_EQ(
-      fwd_input.size(),
-      in_grads.size(),
-      phi::errors::PreconditionNotMet(
-          "Backward input gradient number does not match forward "
-          "input number. The number of forward input number is %d and the "
-          "number of backward input gradient number is %d.",
-          fwd_input.size(),
-          in_grads.size()));
-  for (size_t i = 0; i < in_grads.size(); ++i) {
-    if (in_grads[i] == framework::kEmptyVarName) {
-      continue;
-    }
-    bwd_skip_vars.insert(in_grads[i]);
-    bwd_skip_vars.insert(framework::GradVarName(fwd_input[i]));
-  }
-
-  auto &fwd_param = fwd_op.Inputs().at(RecurrentBase::kParameters);
-  auto &param_grads =
-      bwd_op.Outputs().at(framework::GradVarName(RecurrentBase::kParameters));
-  PADDLE_ENFORCE_EQ(
-      fwd_param.size(),
-      param_grads.size(),
-      phi::errors::PreconditionNotMet(
-          "Backward parameter gradient number does not match "
-          "forward parameter number. The number of forward parameter number is "
-          "%d and the number of backward parameter gradient is %d.",
-          fwd_param.size(),
-          param_grads.size()));
-  for (size_t i = 0; i < fwd_param.size(); ++i) {
-    if (param_grads[i] == framework::kEmptyVarName) {
-      continue;
-    }
-    bwd_skip_vars.insert(param_grads[i]);
-    bwd_skip_vars.insert(framework::GradVarName(fwd_param[i]));
-  }
-
-  AddSkipVars(bwd_op, bwd_skip_vars);
-}
-
-void PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
-    const framework::ProgramDesc &program,
-    int block_id,
-    const std::vector<std::unique_ptr<paddle::framework::OperatorBase>>
-        &all_ops) {
-  // If block_id is not 0, returns
-  // This is because all recurrent_ops and recurrent_grad_ops in the whole
-  // program would be processed when block_id is 0 (i.e. when Executor::Run()
-  // or ParallelExecutor constructs).
-
-  // What's more, all recurrent_ops and recurrent_grad_ops must be processed
-  // when block_id is zero. If not, recurrent_op may run first and erase
-  // variables
-  // used in recurrent_grad_op, and in this moment, recurrent_grad_ops may be
-  // not constructed yet.
-  if (block_id != 0) return;
-
-  OpAndGradOpPair op_pair;
-  for (auto &op : all_ops) {
-    if (op->Type() == "recurrent") {
-      op_pair.first.emplace(op.get());
-    } else if (op->Type() == "recurrent_grad") {
-      op_pair.second.emplace(op.get());
-    }
-  }
-  PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(program, &op_pair);
-}
-
-void PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
-    const framework::ProgramDesc &program, OpAndGradOpPair *op_pair) {
-  // Find all ops and grad ops at all blocks
-  FindAllOpAndGradOp(program, op_pair, "recurrent", "recurrent_grad");
-
-  OpVariantSet &recurrent_ops = op_pair->first;
-  OpVariantSet &recurrent_grad_ops = op_pair->second;
-
-  VLOG(2) << "Found recurrent op num: " << recurrent_ops.size()
-          << ", recurrent grad op num: " << recurrent_grad_ops.size();
-
-  if (recurrent_ops.empty()) {
-    return;
-  }
-
-  for (auto &bwd_op : recurrent_grad_ops) {
-    const OpVariant *matched_fwd_op = nullptr;
-    for (auto &fwd_op : recurrent_ops) {
-      if (IsMatchedRecurrentOpAndRecurrentGradOp(fwd_op, bwd_op)) {
-        PADDLE_ENFORCE_EQ(matched_fwd_op,
-                          nullptr,
-                          phi::errors::PreconditionNotMet(
-                              "Found multiple recurrent forward op matches "
-                              "recurrent grad op."));
-        matched_fwd_op = &fwd_op;
-      }
-    }
-    PADDLE_ENFORCE_NOT_NULL(
-        matched_fwd_op,
-        phi::errors::PreconditionNotMet("Cannot find matched forward op."));
-    SetRecurrentOpAndRecurrentGradOpSkipVarAttr(*matched_fwd_op, bwd_op);
-    recurrent_ops.erase(*matched_fwd_op);
-  }
-
-  for (auto &fwd_op : recurrent_ops) {
-    SetRecurrentForwardOpOnlySkipVarAttr(fwd_op);
-  }
-}
-
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/fluid/operators/controlflow/recurrent_op_helper.h b/paddle/fluid/operators/controlflow/recurrent_op_helper.h
deleted file mode 100644
index 37573cc617643..0000000000000
--- a/paddle/fluid/operators/controlflow/recurrent_op_helper.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/operators/controlflow/op_variant.h"
-#include "paddle/fluid/operators/recurrent_op.h"
-
-#include "paddle/utils/string/string_helper.h"
-
-namespace paddle {
-namespace framework {
-class ProgramDesc;
-}  // namespace framework
-}  // namespace paddle
-
-namespace paddle {
-namespace operators {
-
-using OpVariantSet = std::unordered_set<OpVariant, OpVariant::Hasher>;
-using OpAndGradOpPair = std::pair<OpVariantSet, OpVariantSet>;
-
-// Set vars to skip eager deletion on input recurrent and recurrent_grad for
-// preparing safe eager deletion. Input contains all recurrent and
-// recurrent_grad ops at block 0 and the function will find all recurrent and
-// recurrent_grad ops across blocks.
-void PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
-    const framework::ProgramDesc &program, OpAndGradOpPair *op_pair);
-
-// Set vars to skip eager deletion on input recurrent and recurrent_grad for
-// preparing safe eager deletion. The input block_id must be 0 and caller can
-// input all ops in the block. The function will find all recurrent and
-// recurrent_grad ops across blocks.
-void PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
-    const framework::ProgramDesc &program,
-    int block_id,
-    const std::vector<std::unique_ptr<paddle::framework::OperatorBase>>
-        &all_ops);
-
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc
deleted file mode 100644
index 21443fc51d568..0000000000000
--- a/paddle/fluid/operators/recurrent_op.cc
+++ /dev/null
@@ -1,801 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/recurrent_op.h"
-
-namespace phi {
-class DenseTensor;
-}  // namespace phi
-
-namespace paddle {
-namespace framework {
-class InferShapeContext;
-class OpDesc;
-}  // namespace framework
-}  // namespace paddle
-
-namespace paddle {
-namespace operators {
-
-using StepScopeVar = std::vector<framework::Scope *>;
-
-const char RecurrentBase::kInputs[] = "inputs";                 // NOLINT
-const char RecurrentBase::kInitialStates[] = "initial_states";  // NOLINT
-const char RecurrentBase::kParameters[] = "parameters";         // NOLINT
-const char RecurrentBase::kOutputs[] = "outputs";               // NOLINT
-const char RecurrentBase::kStepScopes[] = "step_scopes";        // NOLINT
-const char RecurrentBase::kHasStates[] = "has_states";          // NOLINT
-const char RecurrentBase::kExStates[] = "ex_states";            // NOLINT
-const char RecurrentBase::kStates[] = "states";                 // NOLINT
-const char RecurrentBase::kStepBlock[] = "sub_block";           // NOLINT
-const char RecurrentBase::kReverse[] = "reverse";               // NOLINT
-const char RecurrentBase::kIsTrain[] = "is_train";              // NOLINT
-const char RecurrentBase::kSkipEagerDeletionVars[] =            // NOLINT
-    "skip_eager_deletion_vars";
-#define GRAD_SUFFIX "@GRAD"
-const char RecurrentBase::kInputGrads[] = "inputs" GRAD_SUFFIX;      // NOLINT
-const char RecurrentBase::kOutputGrads[] = "outputs" GRAD_SUFFIX;    // NOLINT
-const char RecurrentBase::kParamGrads[] = "parameters" GRAD_SUFFIX;  // NOLINT
-const char RecurrentBase::kInitStateGrads[] =                        // NOLINT
-    "initial_states" GRAD_SUFFIX;
-
-static void ClearStepScopes(const platform::DeviceContext &dev_ctx,
-                            framework::Scope *parent_scope,
-                            StepScopeVar *step_scopes) {
-  if (step_scopes->empty()) return;
-
-  dev_ctx.Wait();
-
-  for (auto *sub_scope : *step_scopes) {
-    if (parent_scope->HasKid(sub_scope)) {
-      parent_scope->DeleteScope(sub_scope);
-    }
-  }
-
-  step_scopes->clear();
-}
-
-StepScopes::StepScopes(const platform::DeviceContext &dev_ctx,
-                       const framework::Scope &parent,
-                       StepScopeVar *scopes,
-                       bool is_train,
-                       size_t seq_len,
-                       bool is_backward)
-    : counter_(is_backward ? seq_len - 1 : 0UL),
-      scopes_(scopes),
-      is_train_(is_train),
-      is_backward_(is_backward) {
-  size_t num_step_scopes = is_train ? seq_len : 2;
-  PADDLE_ENFORCE_EQ(
-      is_train || !is_backward,
-      true,
-      phi::errors::PreconditionNotMet("Cannot backward when is not training"));
-  if (!is_backward_) {
-    ClearStepScopes(dev_ctx, const_cast<framework::Scope *>(&parent), scopes);
-    scopes->reserve(static_cast<size_t>(num_step_scopes));
-    for (size_t i = 0; i < num_step_scopes; ++i) {
-      scopes->emplace_back(&parent.NewScope());
-    }
-  }
-}
-
-framework::Scope &StepScopes::CurScope() { return GetScope(counter_); }
-
-framework::Scope &StepScopes::ExScope() {
-  auto &scope = GetScope(is_backward_ ? counter_ + 1 : counter_ - 1);
-  return scope;
-}
-
-void StepScopes::BackwardNext(const platform::DeviceContext &dev_ctx,
-                              framework::Scope *parent_scope) {
-  PADDLE_ENFORCE_EQ(is_backward_,
-                    true,
-                    phi::errors::PreconditionNotMet(
-                        "Cannot get backward next scope when is forward"));
-  if (counter_ + 2 == scopes_->size()) {
-    parent_scope->DeleteScope((*scopes_)[counter_ + 1]);
-    scopes_->pop_back();
-    VLOG(3) << "Deleted scope at " << counter_ + 1;
-  }
-  --counter_;
-}
-
-void StepScopes::ForwardNext() {
-  PADDLE_ENFORCE_EQ(is_backward_,
-                    false,
-                    phi::errors::PreconditionNotMet(
-                        "Cannot get forward next scope when is backward"));
-  ++counter_;
-}
-
-framework::Scope &StepScopes::GetScope(size_t scope_id) const {
-  if (!is_train_) {
-    scope_id %= 2;
-  }
-  PADDLE_ENFORCE_LT(
-      scope_id,
-      scopes_->size(),
-      phi::errors::InvalidArgument(
-          "Input scope_id is greater than scopes size in RecurrentOp"));
-  return *(*scopes_)[scope_id];
-}
-
-RecurrentBase::RecurrentBase(const std::string &type,
-                             const framework::VariableNameMap &inputs,
-                             const framework::VariableNameMap &outputs,
-                             const framework::AttributeMap &attrs)
-    : OperatorBase(type, inputs, outputs, attrs) {}
-
-// Get SequenceLength from Scope
-//   The sequence length is got from input tensor. The input tensor's
-//   dimension should be [SEQ_LEN, ..., ...]. The first of the tensor's shape
-//   is SEQ_LEN. The second of the tensor's shape could be the batch size or
-//   nested sequence length.
-int64_t RecurrentBase::GetSequenceLength(const framework::Scope &scope) const {
-  // Dim format SEQ_LEN, BATCH_SIZE, ...
-  int64_t seq_len = -1;
-  auto &all_inputs = Inputs(kInputs);
-  PADDLE_ENFORCE_EQ(
-      all_inputs.empty(),
-      false,
-      phi::errors::InvalidArgument("RecurrentOp gets empty input"));
-  for (auto &iname : all_inputs) {
-    auto *var = scope.FindVar(iname);
-    PADDLE_ENFORCE_NOT_NULL(var,
-                            phi::errors::InvalidArgument(
-                                "RecurrentOp finds var %s is NULL", iname));
-    PADDLE_ENFORCE_EQ(
-        var->IsType<phi::DenseTensor>(),
-        true,
-        phi::errors::InvalidArgument(
-            "RecurrentOp only accepts phi::DenseTensor as input but "
-            "input var %s is not phi::DenseTensor",
-            iname));
-    auto &dim = var->Get<phi::DenseTensor>().dims();
-    if (seq_len == -1) {
-      seq_len = dim[0];
-    } else {
-      PADDLE_ENFORCE_EQ(seq_len,
-                        dim[0],
-                        phi::errors::InvalidArgument(
-                            "Sequence length of input %s in RecurrentOp is NOT "
-                            "equal to sequence length of previous input",
-                            iname));
-    }
-  }
-  PADDLE_ENFORCE_GE(seq_len,
-                    0,
-                    phi::errors::InvalidArgument(
-                        "RecurrentOp gets invalid sequence length. Expected "
-                        "seq_len >= 0. Received seq_len = %d",
-                        seq_len));
-  return seq_len;
-}
-
-// for src_tensor, dst_tensor in zip(map(src_scope.FindVar, src_vars),
-//                                   map(dst_scope.Var, dst_vars)):
-//   dst_tensor.ShareDataWith(src_tensor)
-void RecurrentBase::LinkTensor(const framework::Scope &src_scope,
-                               const std::vector<std::string> &src_vars,
-                               framework::Scope *dst_scope,
-                               const std::vector<std::string> &dst_vars) {
-  LinkTensorWithCallback(
-      src_scope,
-      src_vars,
-      dst_scope,
-      dst_vars,
-      [&](const phi::DenseTensor &src, phi::DenseTensor *dst) {
-        dst->ShareDataWith(src);
-      });
-}
-
-// (seq_len, shape) -> return [seq_len] + list(shape)
-phi::DDim RecurrentBase::PrependDims(size_t seq_len, const phi::DDim &src) {
-  auto dims = common::vectorize(src);
-  dims.insert(dims.begin(), static_cast<int64_t>(seq_len));
-  return common::make_ddim(dims);
-}
-
-RecurrentOp::RecurrentOp(const std::string &type,
-                         const framework::VariableNameMap &inputs,
-                         const framework::VariableNameMap &outputs,
-                         const framework::AttributeMap &attrs)
-    : RecurrentBase(type, inputs, outputs, attrs) {}
-
-void RecurrentOp::RunImpl(const framework::Scope &scope,
-                          const phi::Place &place) const {
-  bool has_state = Attr<bool>(kHasStates);
-  auto seq_len = static_cast<size_t>(this->GetSequenceLength(scope));
-
-  // get device context from pool
-  platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
-  auto &dev_ctx = *pool.Get(place);
-
-  VLOG(3) << "Static RNN input sequence length = " << seq_len;
-  auto reverse = Attr<bool>(kReverse);
-
-  framework::Executor executor(place);
-  auto *block = Attr<framework::BlockDesc *>(kStepBlock);
-
-  auto *program = block->Program();
-  auto ctx = executor.Prepare(*program,
-                              block->ID(),
-                              Attr<std::vector<std::string>>(
-                                  kSkipEagerDeletionVars), /*skip_ref_cnt_vars*/
-                              true);
-
-  StepScopes scopes = CreateStepScopes(dev_ctx, scope, seq_len);
-  for (size_t i = 0; i < seq_len; ++i) {
-    size_t seq_offset = reverse ? seq_len - i - 1 : i;
-    VLOG(3) << "Recurrent operate at the time step " << seq_offset;
-
-    auto &cur_scope = scopes.CurScope();
-
-    // Link outside::input --> inside::input
-    //   inside::input = outside::input[seq_offset: seq_offset+1]
-    LinkTensorWithCallback(scope,
-                           Inputs(kInputs),
-                           &cur_scope,
-                           Inputs(kInputs),
-                           [&seq_offset](const phi::DenseTensor &outside,
-                                         phi::DenseTensor *inside) {
-                             inside->ShareDataWith(outside.Slice(
-                                 seq_offset, seq_offset + 1));  // NOLINT
-                             auto dims = common::vectorize(inside->dims());
-                             dims.erase(dims.begin());
-                             inside->Resize(common::make_ddim(dims));
-                           });
-
-    if (has_state) {
-      if (i == 0) {
-        // Link initial states  --> ex_states
-        LinkTensor(scope,
-                   Inputs(kInitialStates),
-                   &cur_scope,
-                   Attr<std::vector<std::string>>(kExStates));
-      } else {
-        auto &ex_scope = scopes.ExScope();
-        // Link ex_scope::state --> cur_scope::ex_state
-        LinkTensor(ex_scope,
-                   Attr<std::vector<std::string>>(kStates),
-                   &cur_scope,
-                   Attr<std::vector<std::string>>(kExStates));
-      }
-    }
-
-    // Link inside::output -> outside::output
-    //   outside::output[seq_offset: seq_offset + 1] = inside::output
-    executor.CreateVariables(
-        ctx->prog_, &cur_scope, static_cast<int>(ctx->block_id_));
-
-    // Linked now, execute!
-    executor.RunPreparedContext(ctx.get(),
-                                &cur_scope,
-                                false /*create_local_scope*/,
-                                false /*create_vars*/,
-                                true /* keep_kids */);
-    if (i == 0) {
-      LinkTensorWithCallback(
-          cur_scope,
-          Outputs(kOutputs),
-          scope,
-          Outputs(kOutputs),
-          [&](const phi::DenseTensor &src_tensor,
-              phi::DenseTensor *dst_tensor) {
-            // create output tensor at begin
-            dst_tensor->Resize(PrependDims(seq_len, src_tensor.dims()));
-            dst_tensor->mutable_data(place, src_tensor.dtype());
-
-            auto dst_out =
-                dst_tensor->Slice(seq_offset, seq_offset + 1);  // NOLINT
-            // Explicit copy output since the local RNN scope can be destroyed
-            // early.
-            framework::TensorCopy(src_tensor, place, dev_ctx, &dst_out);
-          });
-    } else {
-      LinkTensorWithCallback(
-          cur_scope,
-          Outputs(kOutputs),
-          scope,
-          Outputs(kOutputs),
-          [&](const phi::DenseTensor &src_tensor,
-              phi::DenseTensor *dst_tensor) {
-            auto dst_out =
-                dst_tensor->Slice(seq_offset, seq_offset + 1);  // NOLINT
-            framework::TensorCopy(src_tensor, place, dev_ctx, &dst_out);
-          });
-    }
-
-    scopes.ForwardNext();
-  }
-}
-
-StepScopes RecurrentOp::CreateStepScopes(const platform::DeviceContext &dev_ctx,
-                                         const framework::Scope &scope,
-                                         size_t seq_len) const {
-  static std::mutex mutex;
-  std::lock_guard<std::mutex> lock(mutex);
-  // TODO(baoachun) Function CreateStepScopes may lead to segmentation
-  // fault in multithreading in eval process. The performance drop of
-  // adding mutex need to be fixed.
-  auto *var = scope.FindVar(Output(kStepScopes));
-  PADDLE_ENFORCE_NOT_NULL(
-      var,
-      phi::errors::InvalidArgument("RecurrentOp gets empty StepScopes var"));
-  return StepScopes(dev_ctx,
-                    scope,
-                    var->GetMutable<StepScopeVar>(),
-                    Attr<bool>(kIsTrain),
-                    seq_len);
-}
-
-RecurrentGradOp::RecurrentGradOp(const std::string &type,
-                                 const framework::VariableNameMap &inputs,
-                                 const framework::VariableNameMap &outputs,
-                                 const framework::AttributeMap &attrs)
-    : RecurrentBase(type, inputs, outputs, attrs) {}
-
-void RecurrentGradOp::RunImpl(const framework::Scope &scope,
-                              const phi::Place &place) const {
-  bool has_state = Attr<bool>(kHasStates);
-  const size_t seq_len = static_cast<size_t>(GetSequenceLength(scope));
-
-  // get device context from pool
-  platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
-  auto &dev_ctx = *pool.Get(place);
-
-  StepScopes scopes = CreateStepScopes(dev_ctx, scope, seq_len);
-  auto reverse = Attr<bool>(kReverse);
-
-  framework::Executor executor(place);
-  auto *block = Attr<framework::BlockDesc *>(kStepBlock);
-  auto *program = block->Program();
-  auto ctx =
-      executor.Prepare(*program,
-                       block->ID(),
-                       Attr<std::vector<std::string>>(
-                           kSkipEagerDeletionVars) /*skip_ref_cnt_vars*/);
-
-  for (size_t step_id = 0; step_id < seq_len; ++step_id) {
-    size_t seq_offset = reverse ? step_id : seq_len - step_id - 1;
-    VLOG(3) << "Recurrent backward operate at the time step " << seq_offset;
-    auto &cur_scope = scopes.CurScope();
-
-    // Link outside::output_grads --> inside::output_grads
-    //   inside::output_grad = outside::output_grad[seq_offset:seq_offset+1]
-    LinkTensorWithCallback(
-        scope,
-        Inputs(kOutputGrads),
-        &cur_scope,
-        Inputs(kOutputGrads),
-        [&](const phi::DenseTensor &outside, phi::DenseTensor *inside) {
-          inside->ShareDataWith(
-              outside.Slice(seq_offset, seq_offset + 1));  // NOLINT
-          auto dims = common::vectorize(inside->dims());
-          dims.erase(dims.begin());
-          inside->Resize(common::make_ddim(dims));
-        },
-        true /*is_backward*/);
-    auto og_set = List2Set(Inputs(kOutputGrads));
-
-    if (VLOG_IS_ON(10)) {
-      std::ostringstream sout;
-      std::copy(og_set.begin(),
-                og_set.end(),
-                std::ostream_iterator<std::string>(sout, ","));
-      VLOG(10) << " RNN output gradients = [" << sout.str() << "]";
-    }
-
-    if (has_state) {
-      // Link states
-      //   if cur_scope::cur_state_grad in out_grads:
-      //     cur_scope::cur_state_grad += ex_scope::ex_state_grad
-      //   else:
-      //     ex_scope::ex_state_grad --> cur_scope::cur_state_grad
-      if (step_id != 0) {  // not at beginning
-        auto &ex_scope = scopes.ExScope();
-        auto ex_state_grads =
-            GradVarLists(Attr<std::vector<std::string>>(kExStates));
-        auto cur_state_grads =
-            GradVarLists(Attr<std::vector<std::string>>(kStates));
-
-        PADDLE_ENFORCE_EQ(ex_state_grads.size(),
-                          cur_state_grads.size(),
-                          phi::errors::InvalidArgument(
-                              "lengths of ex_states and cur_states are not "
-                              "equal in RecurrentGradOp"));
-        for (size_t i = 0; i < ex_state_grads.size(); ++i) {
-          auto &cur_grad = cur_state_grads[i];
-          auto &ex_grad = ex_state_grads[i];
-          auto &ex_grad_tensor =
-              ex_scope.FindVar(ex_grad)->Get<phi::DenseTensor>();
-
-          VLOG(10) << " RNN link " << cur_grad << " from " << ex_grad;
-          auto *cur_grad_var = cur_scope.Var(cur_grad);
-          phi::DenseTensor *cur_grad_tensor =
-              cur_grad_var->GetMutable<phi::DenseTensor>();
-          cur_grad_tensor->ShareDataWith(ex_grad_tensor);
-        }
-      }
-    }
-
-    // Link inside::output -> outside::output
-    //   outside::output[seq_offset: seq_offset + 1] = inside::output
-    executor.CreateVariables(
-        ctx->prog_, &cur_scope, static_cast<int>(ctx->block_id_));
-    if (step_id > 0) {
-      LinkTensorWithCallback(
-          scope,
-          Outputs(kInputGrads),
-          cur_scope,
-          GradVarLists(Inputs(kInputs)),
-          [&](const phi::DenseTensor &src_tensor,
-              phi::DenseTensor *dst_tensor) {
-            if (src_tensor.memory_size() ==
-                0) {  // Inside Gradient is not created.
-              return;
-            }
-            phi::DenseTensor src_slice =
-                src_tensor.Slice(seq_offset, seq_offset + 1);  // NOLINT
-            dst_tensor->ShareDataWith(src_slice);
-          },
-          true /*is_backward*/);
-    }
-
-    VLOG(5) << "Recurrent memory linking finished ";
-    // Run step block with cur_scope
-    executor.RunPreparedContext(ctx.get(),
-                                &cur_scope,
-                                false /*create_local_scope*/,
-                                false /*create_vars*/,
-                                true /* keep_kids */);
-
-    VLOG(5) << "executor.Run finished ";
-
-    auto local_var_names = LocalVarNames(cur_scope);
-
-    // Accumulate params
-    //   if (step == 0):
-    //      outside::param_grad = 0.0
-    //   outside::param_grad += inside::param_grad
-    {
-      auto &pg_names = Outputs(kParamGrads);
-      auto &p_names = Inputs(kParameters);
-      PADDLE_ENFORCE_EQ(pg_names.size(),
-                        p_names.size(),
-                        phi::errors::InvalidArgument(
-                            "Sizes of Parameters and ParamGrads are not equal "
-                            "in RecurrentGradOp"));
-
-      for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) {
-        auto inside_grad_name = framework::GradVarName(p_names[param_id]);
-
-        // If does not compute gradient of that variable inside rnn, just
-        // continue
-        if (local_var_names.find(inside_grad_name) == local_var_names.end()) {
-          continue;
-        }
-
-        // zero gradient variable in step 0
-        if (step_id == 0) {
-          auto &inside_tensor =
-              cur_scope.FindVar(inside_grad_name)->Get<phi::DenseTensor>();
-          framework::AttributeMap attrs;
-          attrs["dtype"] =
-              framework::TransToProtoVarType(inside_tensor.dtype());
-          attrs["shape"] = common::vectorize<int>(inside_tensor.dims());
-          attrs["value"] = 0.0f;
-
-          auto zero_op =
-              framework::OpRegistry::CreateOp("fill_constant",
-                                              framework::VariableNameMap{},
-                                              {{"Out", {pg_names[param_id]}}},
-                                              attrs);
-          zero_op->Run(scope, place);
-        }
-
-        auto new_inside_name = cur_scope.Rename(inside_grad_name);
-
-        // sum gradient
-        auto sum_op = framework::OpRegistry::CreateOp(
-            "sum",
-            {{"X", {pg_names[param_id], new_inside_name}}},
-            {{"Out", {pg_names[param_id]}}},
-            framework::AttributeMap{{"use_mkldnn", {false}}});
-        sum_op->Run(cur_scope, place);
-
-        cur_scope.Rename(new_inside_name, inside_grad_name);
-      }
-    }
-    VLOG(5) << "Accumulate Parameter finished ";
-
-    // Copy input gradient from inside to outside
-    //   outside::input_grad[seq_offset: seq_offset + 1] = inside::input_grad
-    if (step_id == 0) {
-      LinkTensorWithCallback(
-          cur_scope,
-          GradVarLists(Inputs(kInputs)),
-          scope,
-          Outputs(kInputGrads),
-          [&](const phi::DenseTensor &inside, phi::DenseTensor *outside) {
-            if (inside.memory_size() == 0) {  // IG is not created.
-              return;
-            }
-            // Alloc outside memory
-            outside->Resize(PrependDims(seq_len, inside.dims()));
-            outside->mutable_data(place, inside.dtype());
-
-            auto dst = outside->Slice(seq_offset, seq_offset + 1);  // NOLINT
-            framework::TensorCopy(inside, place, dev_ctx, &dst);
-          },
-          true /*is_backward*/);
-    }
-    VLOG(5) << "Link outside gradient finished ";
-
-    if (has_state) {
-      if (step_id + 1 == seq_len) {  // at_end
-        // copy initialize states gradient from inside to outside
-        LinkTensorWithCallback(
-            cur_scope,
-            GradVarLists(Attr<std::vector<std::string>>(kExStates)),
-            scope,
-            Outputs(kInitStateGrads),
-            [&](const phi::DenseTensor &inside, phi::DenseTensor *outside) {
-              outside->Resize(inside.dims());
-              outside->mutable_data(place, inside.dtype());
-              framework::TensorCopy(inside, place, dev_ctx, outside);
-            },
-            true /*is_backward*/);
-        VLOG(5) << "Link initialize state gradient finished ";
-      }
-    }
-    scopes.BackwardNext(dev_ctx, const_cast<framework::Scope *>(&scope));
-  }
-  // Delete the scope of StepScopes
-  auto *var = scope.FindVar(Input(kStepScopes));
-  PADDLE_ENFORCE_NOT_NULL(var,
-                          phi::errors::InvalidArgument(
-                              "StepScopes var is empty in RecurrentGradOp"));
-  auto *step_scopes = var->GetMutable<StepScopeVar>();
-  ClearStepScopes(dev_ctx, const_cast<framework::Scope *>(&scope), step_scopes);
-}
-
-StepScopes RecurrentGradOp::CreateStepScopes(
-    const platform::DeviceContext &dev_ctx,
-    const framework::Scope &scope,
-    size_t seq_len) const {
-  auto *var = scope.FindVar(Input(kStepScopes));
-  PADDLE_ENFORCE_NOT_NULL(var,
-                          phi::errors::InvalidArgument(
-                              "StepScopes var is empty in RecurrentGradOp"));
-  return StepScopes(dev_ctx,
-                    scope,
-                    var->GetMutable<StepScopeVar>(),
-                    Attr<bool>(kIsTrain),
-                    seq_len,
-                    true /*is_backward*/);
-}
-
-std::unordered_set<std::string> RecurrentGradOp::List2Set(
-    const std::vector<std::string> &list) const {
-  std::unordered_set<std::string> local_var_name_set;
-  local_var_name_set.reserve(list.size());
-  for (auto &each : list) {
-    local_var_name_set.insert(each);
-  }
-  return local_var_name_set;
-}
-
-std::unordered_set<std::string> RecurrentGradOp::LocalVarNames(
-    const framework::Scope &scope) const {
-  return this->List2Set(scope.LocalVarNames());
-}
-
-std::vector<std::string> RecurrentGradOp::GradVarLists(
-    const std::vector<std::string> &var_names) {
-  std::vector<std::string> retv;
-  retv.reserve(var_names.size());
-  std::transform(var_names.begin(),
-                 var_names.end(),
-                 std::back_inserter(retv),
-                 framework::GradVarName);
-  return retv;
-}
-
-class RecurrentOpProtoMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput(RecurrentBase::kInputs, "rnn inputs").AsDuplicable();
-    AddInput(RecurrentBase::kInitialStates, "rnn initial states")
-        .AsDuplicable();
-    AddInput(RecurrentBase::kParameters,
-             "Parameters are used by step block as its input. However, the "
-             "input is not a sequence tensor. Every time step, each operator "
-             "in step block just use the parameter directly.")
-        .AsDuplicable();
-    AddOutput(RecurrentBase::kOutputs,
-              "The output sequence of RNN. The sequence length must be same.")
-        .AsDuplicable();
-    AddOutput(RecurrentBase::kStepScopes,
-              "StepScopes contain all local variables in each time step.");
-    AddAttr<bool>(RecurrentBase::kHasStates, "Whether has states.")
-        .SetDefault(false);
-    AddAttr<std::vector<std::string>>(RecurrentBase::kExStates,
-                                      string::Sprintf(
-                                          R"DOC(The ex-state variable names.
-The ex-state means the state value in the ex-timestep or the previous time step
-[%s, %s, %s] must be the same order)DOC",
-                                          RecurrentBase::kExStates,
-                                          RecurrentBase::kStates,
-                                          RecurrentBase::kInitStateGrads));
-    AddAttr<std::vector<std::string>>(
-        RecurrentBase::kStates,
-        string::Sprintf(
-            "The state variable names. [%s, %s, %s] must be the same order",
-            RecurrentBase::kExStates,
-            RecurrentBase::kStates,
-            RecurrentBase::kInitStateGrads));
-    AddAttr<framework::BlockDesc *>(RecurrentBase::kStepBlock,
-                                    "The step block inside RNN");
-    AddAttr<bool>(RecurrentBase::kReverse,
-                  R"DOC(Calculate RNN reversely or not.
-By default reverse=False
-
-Assume the input data is [A, B, C, D]
-
-if reverse is False:
-  the computation of RNN is like
-      A          B          C         D
-      |          |          |         |
-      v          v          v         v
-     rnn -----> rnn -----> rnn ----> rnn
-      |          |          |         |
-      v          v          v         v
-      o          o          o         o
-
-if reverse is True
-  the computation of RNN is like
-      A          B          C         D
-      |          |          |         |
-      v          v          v         v
-     rnn <----- rnn <----- rnn <---- rnn
-      |          |          |         |
-      v          v          v         v
-      o          o          o         o
-)DOC")
-        .SetDefault(false);
-    AddAttr<bool>(RecurrentBase::kIsTrain, "").SetDefault(true);
-    AddAttr<std::vector<std::string>>(RecurrentBase::kSkipEagerDeletionVars,
-                                      "Vars that would skip eager deletion."
-                                      "Users should not set this manually.")
-        .SetDefault(std::vector<std::string>());
-
-    AddComment(R"DOC(
-Static Length Recurrent Operator.
-
-The static length recurrent operator can only operate on fixed size sequence
-data, i.e. in each mini-batch, the sequence length of all inputs are the same.
-
-)DOC");
-  }
-};
-
-template <typename T>
-class RecurrentGradOpMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
- protected:
-  void Apply(GradOpPtr<T> grad) const override {
-    grad->SetType("recurrent_grad");
-    for (auto &input_param : this->InputNames()) {
-      grad->SetInput(input_param, this->Input(input_param));
-      grad->SetOutput(framework::GradVarName(input_param),
-                      this->InputGrad(input_param, false));
-    }
-
-    for (auto &output_param : this->OutputNames()) {
-      if (output_param == RecurrentBase::kStepScopes) {
-        grad->SetInput(output_param, this->Output(output_param));
-        grad->SetInput(framework::GradVarName(output_param),
-                       this->Output(output_param));
-      } else {
-        grad->SetInput(output_param, this->Output(output_param));
-        grad->SetInput(framework::GradVarName(output_param),
-                       this->OutputGrad(output_param));
-      }
-    }
-    grad->SetAttrMap(this->Attrs());
-    grad->SetBlockAttr(RecurrentBase::kStepBlock, this->grad_block_[0]);
-  }
-};
-
-class RecurrentGradOpShapeInference : public framework::InferShapeBase {
- public:
-  void operator()(framework::InferShapeContext *ctx) const override {
-    std::vector<std::string> output{RecurrentBase::kOutputs};
-
-    // In some case the kInitialStates is empty.
-    // If the kInitialStates is empty, all the states should be empty.
-    if (!ctx->HasInputs(RecurrentBase::kInitialStates)) {
-      PADDLE_ENFORCE_EQ(
-          ctx->Attrs()
-              .Get<std::vector<std::string>>(RecurrentBase::kExStates)
-              .size(),
-          0,
-          phi::errors::InvalidArgument("The Attr(%s) should be empty.",
-                                       RecurrentBase::kExStates));
-      PADDLE_ENFORCE_EQ(
-          ctx->Attrs()
-              .Get<std::vector<std::string>>(RecurrentBase::kStates)
-              .size(),
-          0,
-          phi::errors::InvalidArgument("The Attr(%s) should be empty.",
-                                       RecurrentBase::kStates));
-    }
-
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInputs(RecurrentBase::kInputs),
-        true,
-        phi::errors::InvalidArgument("The input(%s) should not be empty.",
-                                     RecurrentBase::kInputs));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInputs(RecurrentBase::kOutputs),
-        true,
-        phi::errors::InvalidArgument("The input(%s) should not be empty.",
-                                     RecurrentBase::kOutputs));
-
-    // In some case the kInitialStates is empty.
-    if (ctx->HasInputs(RecurrentBase::kInitialStates) &&
-        ctx->HasOutputs(
-            framework::GradVarName(RecurrentBase::kInitialStates))) {
-      ctx->SetOutputsDim(framework::GradVarName(RecurrentBase::kInitialStates),
-                         ctx->GetInputsDim(RecurrentBase::kInitialStates));
-    }
-
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutputs(framework::GradVarName(RecurrentBase::kInputs),
-                        /*allow_null=*/true),
-        true,
-        phi::errors::InvalidArgument(
-            "The output of(%s) should not be empty.",
-            framework::GradVarName(RecurrentBase::kInputs)));
-    ctx->SetOutputsDim(framework::GradVarName(RecurrentBase::kInputs),
-                       ctx->GetInputsDim(RecurrentBase::kInputs));
-
-    // In some case the kParameters is empty.
-    if (ctx->HasInputs(RecurrentBase::kParameters)) {
-      PADDLE_ENFORCE_EQ(
-          ctx->HasOutputs(framework::GradVarName(RecurrentBase::kParameters)),
-          true,
-          phi::errors::InvalidArgument(
-              "The output of(%s) should not be empty.",
-              framework::GradVarName(RecurrentBase::kParameters)));
-      ctx->SetOutputsDim(framework::GradVarName(RecurrentBase::kParameters),
-                         ctx->GetInputsDim(RecurrentBase::kParameters));
-    }
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-REGISTER_OPERATOR(
-    recurrent,
-    paddle::operators::RecurrentOp,
-    paddle::operators::RecurrentOpProtoMaker,
-    paddle::operators::RecurrentGradOpMaker<paddle::framework::OpDesc>);
-REGISTER_OPERATOR(recurrent_grad,
-                  paddle::operators::RecurrentGradOp,
-                  paddle::operators::RecurrentGradOpShapeInference);
diff --git a/paddle/fluid/operators/recurrent_op.h b/paddle/fluid/operators/recurrent_op.h
deleted file mode 100644
index a95a4a0712b20..0000000000000
--- a/paddle/fluid/operators/recurrent_op.h
+++ /dev/null
@@ -1,260 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#include "paddle/fluid/framework/executor.h"
-#include "paddle/fluid/framework/op_registry.h"
-
-namespace paddle {
-namespace operators {
-
-// StepScopes manages the scopes inside Recurrent Op.
-//
-// if is_train = False, then
-//   there are two scopes for the RNN and just support forward
-// else
-//   the len(scopes) == seq_len
-//
-// if is_backward = True, then
-//   reversely access scopes, delete useless ex-scope
-// else
-//   access scopes from beginning to end
-class StepScopes {
- public:
-  StepScopes(const platform::DeviceContext &dev_ctx,
-             const framework::Scope &parent,
-             std::vector<framework::Scope *> *scopes,
-             bool is_train,
-             size_t seq_len,
-             bool is_backward = false);
-
-  // Get the current scope
-  framework::Scope &CurScope();
-
-  // Get the ex-scope, which is the scope in previous time step
-  framework::Scope &ExScope();
-
-  // Move to next time step when forwarding
-  void ForwardNext();
-
-  // Delete ex-scope after using it, then move to next time step when
-  // backwarding
-  void BackwardNext(const platform::DeviceContext &dev_ctx,
-                    framework::Scope *parent_scope);
-
- private:
-  framework::Scope &GetScope(size_t scope_id) const;
-
-  size_t counter_;
-  std::vector<framework::Scope *> *scopes_;
-  bool is_train_;
-  bool is_backward_;
-};
-
-// Base class for RecurrentOp/RecurrentGradOp
-//    Some common protected functions for RecurrentOp/RecurrentGradOp
-class RecurrentBase : public framework::OperatorBase {
- public:
-  static const char kInputs[];
-  static const char kInitialStates[];
-  static const char kParameters[];
-  static const char kOutputs[];
-  static const char kStepScopes[];
-  static const char kHasStates[];
-  static const char kExStates[];
-  static const char kStates[];
-  static const char kStepBlock[];
-  static const char kReverse[];
-  static const char kIsTrain[];
-  static const char kSkipEagerDeletionVars[];
-  static const char kInputGrads[];
-  static const char kOutputGrads[];
-  static const char kParamGrads[];
-  static const char kInitStateGrads[];
-
-  RecurrentBase(const std::string &type,
-                const framework::VariableNameMap &inputs,
-                const framework::VariableNameMap &outputs,
-                const framework::AttributeMap &attrs);
-
- protected:
-  // Get SequenceLength from Scope
-  //   The sequence length is got from input tensor. The input tensor's
-  //   dimension should be [SEQ_LEN, ..., ...]. The first of the tensor's shape
-  //   is SEQ_LEN. The second of the tensor's shape could be the batch size or
-  //   nested sequence length.
-  int64_t GetSequenceLength(const framework::Scope &scope) const;
-
-  // for src_tensor, dst_tensor in zip(map(src_scope.FindVar, src_vars),
-  //                                   map(dst_scope.Var, dst_vars)):
-  //   dst_tensor.ShareDataWith(src_tensor)
-  static void LinkTensor(const framework::Scope &src_scope,
-                         const std::vector<std::string> &src_vars,
-                         framework::Scope *dst_scope,
-                         const std::vector<std::string> &dst_vars);
-
-  // for src_tensor, dst_tensor in zip(map(src_scope.FindVar, src_vars),
-  //                                   map(dst_scope.Var, dst_vars)):
-  //   callback(src_tensor, &dst_tensor)
-  template <typename Callback>
-  static void LinkTensorWithCallback(const framework::Scope &src_scope,
-                                     const std::vector<std::string> &src_vars,
-                                     framework::Scope *dst_scope,
-                                     const std::vector<std::string> &dst_vars,
-                                     Callback callback,
-                                     bool is_backward = false) {
-    PADDLE_ENFORCE_EQ(src_vars.size(),
-                      dst_vars.size(),
-                      phi::errors::InvalidArgument(
-                          "Sizes of source vars and destination vars are not "
-                          "equal in LinkTensor."));
-    for (size_t i = 0; i < dst_vars.size(); ++i) {
-      VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i];
-      AccessTensor(src_scope,
-                   src_vars[i],
-                   dst_scope,
-                   dst_vars[i],
-                   callback,
-                   is_backward);
-    }
-  }
-
-  // for src_tensor, dst_tensor in zip(map(src_scope.FindVar, src_vars),
-  //                                   map(dst_scope.FindVar, dst_vars)):
-  //   callback(src_tensor, &dst_tensor)
-  template <typename Callback>
-  static void LinkTensorWithCallback(const framework::Scope &src_scope,
-                                     const std::vector<std::string> &src_vars,
-                                     const framework::Scope &dst_scope,
-                                     const std::vector<std::string> &dst_vars,
-                                     Callback callback,
-                                     bool is_backward = false) {
-    PADDLE_ENFORCE_EQ(src_vars.size(),
-                      dst_vars.size(),
-                      phi::errors::InvalidArgument(
-                          "Sizes of source vars and destination vars are not "
-                          "equal in LinkTensor."));
-    for (size_t i = 0; i < dst_vars.size(); ++i) {
-      VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i];
-      AccessTensor(src_scope,
-                   src_vars[i],
-                   dst_scope,
-                   dst_vars[i],
-                   callback,
-                   is_backward);
-    }
-  }
-
-  // (seq_len, shape) -> return [seq_len] + list(shape)
-  static phi::DDim PrependDims(size_t seq_len, const phi::DDim &src);
-
- private:
-  template <typename Callback>
-  static void AccessTensor(const framework::Scope &src_scope,
-                           const std::string &src_var_name,
-                           framework::Scope *dst_scope,
-                           const std::string &dst_var_name,
-                           Callback callback,
-                           bool is_backward = false) {
-    auto *src_var = src_scope.FindVar(src_var_name);
-    if (is_backward && src_var == nullptr) {
-      return;
-    }
-    PADDLE_ENFORCE_NOT_NULL(
-        src_var,
-        phi::errors::NotFound("Source variable %s is not found.",
-                              src_var_name));
-    auto &src_tensor = src_var->Get<phi::DenseTensor>();
-
-    auto *dst_var = dst_scope->Var(dst_var_name);
-    auto *dst_tensor = dst_var->GetMutable<phi::DenseTensor>();
-    callback(src_tensor, dst_tensor);
-  }
-
-  template <typename Callback>
-  static void AccessTensor(const framework::Scope &src_scope,
-                           const std::string &src_var_name,
-                           const framework::Scope &dst_scope,
-                           const std::string &dst_var_name,
-                           Callback callback,
-                           bool is_backward = false) {
-    auto *dst_var = dst_scope.FindVar(dst_var_name);
-    if (is_backward && dst_var == nullptr) {
-      return;
-    }
-    auto *src_var = src_scope.FindVar(src_var_name);
-    PADDLE_ENFORCE_NOT_NULL(
-        src_var,
-        phi::errors::NotFound("Source variable %s is not found.",
-                              src_var_name));
-    auto &src_tensor = src_var->Get<phi::DenseTensor>();
-    PADDLE_ENFORCE_NOT_NULL(
-        dst_var,
-        phi::errors::NotFound("Destination variable %s is not found.",
-                              src_var_name));
-    auto *dst_tensor = dst_var->GetMutable<phi::DenseTensor>();
-    callback(src_tensor, dst_tensor);
-  }
-};
-
-class RecurrentOp : public RecurrentBase {
- public:
-  RecurrentOp(const std::string &type,
-              const framework::VariableNameMap &inputs,
-              const framework::VariableNameMap &outputs,
-              const framework::AttributeMap &attrs);
-
- private:
-  void RunImpl(const framework::Scope &scope,
-               const phi::Place &place) const override;
-
- private:
-  StepScopes CreateStepScopes(const platform::DeviceContext &dev_ctx,
-                              const framework::Scope &scope,
-                              size_t seq_len) const;
-};
-
-class RecurrentGradOp : public RecurrentBase {
- public:
-  RecurrentGradOp(const std::string &type,
-                  const framework::VariableNameMap &inputs,
-                  const framework::VariableNameMap &outputs,
-                  const framework::AttributeMap &attrs);
-
- private:
-  void RunImpl(const framework::Scope &scope,
-               const phi::Place &place) const override;
-
-  StepScopes CreateStepScopes(const platform::DeviceContext &dev_ctx,
-                              const framework::Scope &scope,
-                              size_t seq_len) const;
-
-  std::unordered_set<std::string> List2Set(
-      const std::vector<std::string> &list) const;
-
-  std::unordered_set<std::string> LocalVarNames(
-      const framework::Scope &scope) const;
-
-  static std::vector<std::string> GradVarLists(
-      const std::vector<std::string> &var_names);
-};
-
-}  // namespace operators
-}  // namespace paddle
diff --git a/test/cpp/fluid/framework/CMakeLists.txt b/test/cpp/fluid/framework/CMakeLists.txt
index de3b99610d1f5..8fd806bc18570 100644
--- a/test/cpp/fluid/framework/CMakeLists.txt
+++ b/test/cpp/fluid/framework/CMakeLists.txt
@@ -198,7 +198,7 @@ endif()
 cc_test(
   prune_test
   SRCS prune_test.cc
-  DEPS op_info prune recurrent_op device_context)
+  DEPS op_info prune device_context)
 cc_test(
   var_type_inference_test
   SRCS var_type_inference_test.cc