From b67480c81c7c0437a0de9364ff46910901e03399 Mon Sep 17 00:00:00 2001
From: YuanRisheng <yuanrisheng@baidu.com>
Date: Tue, 19 Oct 2021 09:26:24 +0000
Subject: [PATCH] adapt Flatten kernel for new register architecture

---
 paddle/fluid/operators/flatten_op.cc | 19 +++++++++++++++++++
 paddle/fluid/operators/flatten_op.h  | 19 +++++++++++++------
 2 files changed, 32 insertions(+), 6 deletions(-)
diff --git a/paddle/fluid/operators/flatten_op.cc b/paddle/fluid/operators/flatten_op.cc
index 14f2e9061b742..0c67578420a72 100644
--- a/paddle/fluid/operators/flatten_op.cc
+++ b/paddle/fluid/operators/flatten_op.cc
@@ -365,6 +365,25 @@ class FlattenContiguousRangeOp : public framework::OperatorWithKernel {
 
     return out_shape;
   }
+
+  framework::KernelSignature GetExpectedPtKernelArgs(
+      const framework::ExecutionContext &ctx) const override {
+    if (ctx.HasOutput("XShape")) {
+      return std::make_pair(
+          "flatten_contiguous_range.mid",
+          std::make_tuple(
+              paddle::SmallVector<std::string>({"X"}),
+              paddle::SmallVector<std::string>({"start_axis", "stop_axis"}),
+              paddle::SmallVector<std::string>({"Out", "XShape"})));
+    } else {
+      return std::make_pair(
+          "flatten_contiguous_range",
+          std::make_tuple(
+              paddle::SmallVector<std::string>({"X"}),
+              paddle::SmallVector<std::string>({"start_axis", "stop_axis"}),
+              paddle::SmallVector<std::string>({"Out"})));
+    }
+  }
 };
 
 class FlattenContiguousRangeOpMaker : public FlattenOpMaker {
diff --git a/paddle/fluid/operators/flatten_op.h b/paddle/fluid/operators/flatten_op.h
index efcb0cbe2e2a8..40fd7b05d9a49 100644
--- a/paddle/fluid/operators/flatten_op.h
+++ b/paddle/fluid/operators/flatten_op.h
@@ -15,10 +15,13 @@ limitations under the License. */
 #pragma once
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/tcmpt_utils.h"
 #include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/operators/math/pooling.h"
 #include "paddle/fluid/platform/device_context.h"
+#include "paddle/tcmpt/api/include/core.h"
+#include "paddle/tcmpt/api/include/manipulation.h"
 
 namespace paddle {
 namespace operators {
@@ -122,13 +125,17 @@ class FlattenContiguousRangeKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext &context) const override {
     auto *in = context.Input<framework::LoDTensor>("X");
     auto *out = context.Output<framework::LoDTensor>("Out");
-    auto out_dims = out->dims();
-
     out->mutable_data(context.GetPlace(), in->type());
-    framework::TensorCopy(
-        *in, context.GetPlace(),
-        context.template device_context<platform::DeviceContext>(), out);
-    out->Resize(out_dims);
+    auto &start_axis = context.Attr<int>("start_axis");
+    auto &stop_axis = context.Attr<int>("stop_axis");
+    auto &dev_ctx = context.device_context<DeviceContext>();
+    auto pt_x = framework::MakeTensorImpl<pt::DenseTensor>(*in, in->place(),
+                                                           in->type());
+    auto pt_out = framework::MakeTensorImpl<pt::DenseTensor>(*out, out->place(),
+                                                             out->type());
+
+    // call new kernel
+    pt::Flatten<T>(dev_ctx, *pt_x.get(), start_axis, stop_axis, pt_out.get());
   }
 };