From 756c7bca26da72f2eb32c2db447705e5031ad9fd Mon Sep 17 00:00:00 2001
From: co63oc <co63@163.com>
Date: Sun, 14 Jul 2024 14:17:29 +0800
Subject: [PATCH 1/3] Fix

---
 paddle/fluid/operators/pull_sparse_op.cc | 150 -----------------------
 1 file changed, 150 deletions(-)
 delete mode 100644 paddle/fluid/operators/pull_sparse_op.cc
diff --git a/paddle/fluid/operators/pull_sparse_op.cc b/paddle/fluid/operators/pull_sparse_op.cc
deleted file mode 100644
index 9462e247ad9a7..0000000000000
--- a/paddle/fluid/operators/pull_sparse_op.cc
+++ /dev/null
@@ -1,150 +0,0 @@
-//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/operators/pull_sparse_op.h"
-
-#include <string>
-
-namespace paddle {
-namespace operators {
-
-class PullSparseOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_GE(ctx->Inputs("Ids").size(),
-                      1UL,
-                      phi::errors::InvalidArgument(
-                          "Input(Ids) of PullSparseOp can not be null"));
-    PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(),
-                      1UL,
-                      phi::errors::InvalidArgument(
-                          "Output(Out) of PullSparseOp can not be null"));
-
-    auto hidden_size =
-        static_cast<uint32_t>(ctx->Attrs().Get<int>("EmbeddingDim"));
-    auto all_ids_dim = ctx->GetInputsDim("Ids");
-    const size_t n_ids = all_ids_dim.size();
-    std::vector<phi::DDim> outs_dims;
-    outs_dims.resize(n_ids);
-    for (size_t i = 0; i < n_ids; ++i) {
-      const auto ids_dims = all_ids_dim[i];
-      int ids_rank = ids_dims.size();
-      PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1],
-                        1,
-                        phi::errors::InvalidArgument(
-                            "Shape error in %lu id, the last dimension of "
-                            " the 'Ids' tensor must be 1.",
-                            i));
-      auto out_dim =
-          common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1));
-      out_dim.push_back(hidden_size);
-      outs_dims[i] = common::make_ddim(out_dim);
-    }
-    ctx->SetOutputsDim("Out", outs_dims);
-    for (size_t i = 0; i < n_ids; ++i) {
-      ctx->ShareLoD("Ids", "Out", i, i);
-    }
-  }
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(framework::proto::VarType::FP32, ctx.GetPlace());
-  }
-};
-
-class PullSparseOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("Ids",
-             "Input tensors with type int64 contains "
-             "the ids to be looked up in PSLib. "
-             "The last dimension size must be 1.")
-        .AsDuplicable();
-    AddInput("W", "The lookup table tensors.").AsDuplicable();
-    AddOutput("Out", "The lookup results tensors.").AsDuplicable();
-    AddAttr<int>("EmbeddingDim", "(int, the embedding hidden size")
-        .SetDefault(11);
-    AddAttr<int>("TableId", "(int, the table id of this embedding")
-        .SetDefault(0);
-    AddAttr<std::string>("AccessorClass", "(string, the class name of accessor")
-        .SetDefault("");
-    AddAttr<std::string>("CtrLabelName", "(string, ctr label name")
-        .SetDefault("");
-    AddAttr<int>("PaddingId", "(int, the padding id of this embedding")
-        .SetDefault(0);
-    AddAttr<bool>("ScaleSparseGrad",
-                  "(bool, whether scale sparse gradient with batch size")
-        .SetDefault(true);
-    AddAttr<std::vector<std::string>>("InputNames", "(vector, slot names")
-        .SetDefault(std::vector<std::string>());
-    AddAttr<bool>("is_distributed", "(bool, it must be true").SetDefault(true);
-    AddComment(R"DOC(
-Pull Sparse Operator.
-
-This operator is used to perform lookups on the PSLib
-then concatenated into a dense tensor.
-
-The input Ids can carry the LoD (Level of Details) information,
-or not. And the output only shares the LoD information with input Ids.
-
-)DOC");
-  }
-};
-
-template <typename T>
-class PushSparseOpMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
- protected:
-  void Apply(GradOpPtr<T> retv) const override {
-    retv->SetType("push_sparse");
-    retv->SetInput("Ids", this->Input("Ids"));
-    retv->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-    retv->SetInput("W", this->Input("W"));
-    retv->SetOutput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-    retv->SetAttrMap(this->Attrs());
-  }
-};
-
-class PushSparseOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {}
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(
-                              ctx, framework::GradVarName("Out")),
-                          ctx.GetPlace());
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(pull_sparse,
-                  ops::PullSparseOp,
-                  ops::PullSparseOpMaker,
-                  ops::PushSparseOpMaker<paddle::framework::OpDesc>,
-                  ops::PushSparseOpMaker<paddle::imperative::OpBase>);
-REGISTER_OPERATOR(push_sparse, ops::PushSparseOp);
-PD_REGISTER_STRUCT_KERNEL(
-    pull_sparse, CPU, ALL_LAYOUT, ops::PullSparseCPUKernel, float) {}
-PD_REGISTER_STRUCT_KERNEL(
-    push_sparse, CPU, ALL_LAYOUT, ops::PushSparseCPUKernel, float) {}

From 8bed16134c0ebd65404239caf6252d85c26abf07 Mon Sep 17 00:00:00 2001
From: co63oc <co63@163.com>
Date: Mon, 15 Jul 2024 14:39:27 +0800
Subject: [PATCH 2/3] Fix

---
 paddle/fluid/operators/pull_sparse_op.h       | 85 ------------------
 paddle/fluid/operators/pull_sparse_v2_op.h    | 42 +++++++++
 python/paddle/base/device_worker.py           |  1 -
 .../fleet/parameter_server/pslib/__init__.py  | 89 -------------------
 .../pslib/optimizer_factory.py                |  2 -
 5 files changed, 42 insertions(+), 177 deletions(-)
 delete mode 100644 paddle/fluid/operators/pull_sparse_op.h

diff --git a/paddle/fluid/operators/pull_sparse_op.h b/paddle/fluid/operators/pull_sparse_op.h
deleted file mode 100644
index 88d0f437ff506..0000000000000
--- a/paddle/fluid/operators/pull_sparse_op.h
+++ /dev/null
@@ -1,85 +0,0 @@
-//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/framework/fleet/fleet_wrapper.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/tensor.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T>
-void PullSparseFunctor(const framework::ExecutionContext& ctx) {
-  auto inputs = ctx.MultiInput<phi::DenseTensor>("Ids");
-  auto outputs = ctx.MultiOutput<phi::DenseTensor>("Out");
-  uint32_t fea_dim = static_cast<uint32_t>(ctx.Attr<int>("EmbeddingDim"));
-  uint64_t padding_id = static_cast<uint64_t>(ctx.Attr<int>("PaddingId"));
-  auto table_id = static_cast<uint32_t>(ctx.Attr<int>("TableId"));
-  // note: GetInstance() is not thread-safe
-  // we assume FleetWrapper has been already initialized
-  auto fleet_ptr = framework::FleetWrapper::GetInstance();
-  fleet_ptr->PullSparseToTensorSync(
-      table_id, fea_dim, padding_id, ctx.GetPlace(), &inputs, &outputs);
-}
-
-template <typename T>
-void PushSparseFunctor(const framework::ExecutionContext& ctx) {
-  auto inputs = ctx.MultiInput<phi::DenseTensor>("Ids");
-  auto grads = ctx.MultiInput<phi::DenseTensor>(framework::GradVarName("Out"));
-  uint32_t fea_dim = static_cast<uint32_t>(ctx.Attr<int>("EmbeddingDim"));
-  std::string accessor = ctx.Attr<std::string>("AccessorClass");
-  bool scale_sparse = ctx.Attr<bool>("ScaleSparseGrad");
-  uint64_t padding_id = static_cast<uint64_t>(ctx.Attr<int>("PaddingId"));
-  const std::string& label_name = ctx.Attr<std::string>("CtrLabelName");
-  const framework::Scope& scope = ctx.scope();
-  auto input_names = ctx.Attr<std::vector<std::string>>("InputNames");
-  auto table_id = static_cast<uint32_t>(ctx.Attr<int>("TableId"));
-  // note: GetInstance() is not thread-safe
-  // we assume FleetWrapper has been already initialized
-  auto fleet_ptr = framework::FleetWrapper::GetInstance();
-  fleet_ptr->PushSparseFromTensorWithLabelAsync(scope,
-                                                table_id,
-                                                fea_dim,
-                                                padding_id,
-                                                scale_sparse,
-                                                accessor,
-                                                label_name,
-                                                ctx.GetPlace(),
-                                                input_names,
-                                                &inputs,
-                                                &grads);
-}
-
-template <typename T, typename DeviceContext>
-class PullSparseCPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    PullSparseFunctor<T>(ctx);
-  }
-};
-
-template <typename T, typename DeviceContext>
-class PushSparseCPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    PushSparseFunctor<T>(ctx);
-  }
-};
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/fluid/operators/pull_sparse_v2_op.h b/paddle/fluid/operators/pull_sparse_v2_op.h
index 95ce718385780..3be84a52be115 100644
--- a/paddle/fluid/operators/pull_sparse_v2_op.h
+++ b/paddle/fluid/operators/pull_sparse_v2_op.h
@@ -25,6 +25,48 @@
 namespace paddle {
 namespace operators {
 
+template <typename T>
+void PullSparseFunctor(const framework::ExecutionContext& ctx) {
+  auto inputs = ctx.MultiInput<phi::DenseTensor>("Ids");
+  auto outputs = ctx.MultiOutput<phi::DenseTensor>("Out");
+  uint32_t fea_dim = static_cast<uint32_t>(ctx.Attr<int>("EmbeddingDim"));
+  uint64_t padding_id = static_cast<uint64_t>(ctx.Attr<int>("PaddingId"));
+  auto table_id = static_cast<uint32_t>(ctx.Attr<int>("TableId"));
+  // note: GetInstance() is not thread-safe
+  // we assume FleetWrapper has been already initialized
+  auto fleet_ptr = framework::FleetWrapper::GetInstance();
+  fleet_ptr->PullSparseToTensorSync(
+      table_id, fea_dim, padding_id, ctx.GetPlace(), &inputs, &outputs);
+}
+
+template <typename T>
+void PushSparseFunctor(const framework::ExecutionContext& ctx) {
+  auto inputs = ctx.MultiInput<phi::DenseTensor>("Ids");
+  auto grads = ctx.MultiInput<phi::DenseTensor>(framework::GradVarName("Out"));
+  uint32_t fea_dim = static_cast<uint32_t>(ctx.Attr<int>("EmbeddingDim"));
+  std::string accessor = ctx.Attr<std::string>("AccessorClass");
+  bool scale_sparse = ctx.Attr<bool>("ScaleSparseGrad");
+  uint64_t padding_id = static_cast<uint64_t>(ctx.Attr<int>("PaddingId"));
+  const std::string& label_name = ctx.Attr<std::string>("CtrLabelName");
+  const framework::Scope& scope = ctx.scope();
+  auto input_names = ctx.Attr<std::vector<std::string>>("InputNames");
+  auto table_id = static_cast<uint32_t>(ctx.Attr<int>("TableId"));
+  // note: GetInstance() is not thread-safe
+  // we assume FleetWrapper has been already initialized
+  auto fleet_ptr = framework::FleetWrapper::GetInstance();
+  fleet_ptr->PushSparseFromTensorWithLabelAsync(scope,
+                                                table_id,
+                                                fea_dim,
+                                                padding_id,
+                                                scale_sparse,
+                                                accessor,
+                                                label_name,
+                                                ctx.GetPlace(),
+                                                input_names,
+                                                &inputs,
+                                                &grads);
+}
+
 template <typename T, typename DeviceContext>
 class PullSparseV2CPUKernel : public framework::OpKernel<T> {
  public:
diff --git a/python/paddle/base/device_worker.py b/python/paddle/base/device_worker.py
index a8313efe257b7..c2cf9e5e81fd9 100644
--- a/python/paddle/base/device_worker.py
+++ b/python/paddle/base/device_worker.py
@@ -92,7 +92,6 @@ def _gen_worker_desc(self, trainer_desc):
             trainer_desc.hogwild_param.skip_ops.extend(
                 [
                     "feed",
-                    "push_sparse",
                     "push_sparse_v2",
                     "push_dense",
                     "distributed_push_sparse",
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py
index 46be72c0166d7..83dd468d38b47 100644
--- a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py
+++ b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py
@@ -1045,95 +1045,6 @@ def _prepare_params(
         raise ValueError("dtype must be float32")
 
 
-def _fleet_embedding(
-    input,
-    size,
-    is_sparse=False,
-    is_distributed=False,
-    padding_idx=None,
-    param_attr=None,
-    dtype='float32',
-):
-    """
-    Add fleet embedding, this interface is not for users.
-
-    Args:
-        input (Variable|list of Variable): Input is a Tensor<int64> Variable.
-        size (list[int]): The embedding dim.
-        is_sparse (bool, optional): Whether input is sparse ids. Default is False.
-        is_distributed (bool, optional): Whether in distributed mode. Default is False.
-        padding_idx (int, optional): Padding idx of input. Default is None.
-        param_attr (ParamAttr, optional): To specify the weight parameter property. Default is None.
-        dtype (str, optional): Data type of output. Default is 'float32'.
-    """
-
-    def _pull_sparse(
-        input,
-        size,
-        table_id,
-        accessor_class,
-        name="embedding",
-        ctr_label_name="",
-        padding_id=0,
-        dtype='float32',
-        scale_sparse_grad=True,
-    ):
-        helper = LayerHelper(name, **locals())
-        inputs = helper.multiple_input()
-        outs = [helper.create_variable_for_type_inference(dtype)]
-        input_names = [i.name for i in inputs]
-        attrs = {
-            'EmbeddingDim': size,
-            'TableId': table_id,
-            'AccessorClass': accessor_class,
-            'CtrLabelName': ctr_label_name,
-            'PaddingId': padding_id,
-            'ScaleSparseGrad': scale_sparse_grad,
-            'InputNames': input_names,
-            # this is only for compatible with embedding op
-            'is_distributed': True,
-        }
-        # this is only for compatible with embedding op
-        w, _ = helper.create_or_get_global_variable(
-            name=name,
-            shape=[size],
-            dtype=dtype,
-            is_bias=False,
-            persistable=True,
-        )
-        helper.append_op(
-            type='pull_sparse',
-            inputs={'Ids': inputs, 'W': w},
-            outputs={'Out': outs},
-            attrs=attrs,
-        )
-        if len(outs) == 1:
-            return outs[0]
-        return outs
-
-    # check and set params
-    _prepare_params(
-        input, size, is_sparse, is_distributed, padding_idx, param_attr, dtype
-    )
-    name = param_attr.name
-    size = size[-1]
-    if padding_idx is None:
-        padding_idx = 0
-    global FLEET_GLOBAL_DICT
-
-    return _pull_sparse(
-        input=input,
-        size=size,
-        table_id=FLEET_GLOBAL_DICT["emb_to_table"][name],
-        accessor_class=FLEET_GLOBAL_DICT["emb_to_accessor"][name],
-        name=name,
-        ctr_label_name=FLEET_GLOBAL_DICT["click_name"],
-        padding_id=padding_idx,
-        dtype=dtype,
-        scale_sparse_grad=FLEET_GLOBAL_DICT["scale_sparse_grad"],
-    )
-
-
 def _fleet_embedding_v2(
     input,
     size,
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/optimizer_factory.py b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/optimizer_factory.py
index 10c057a7f1a43..87936ba975fbb 100644
--- a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/optimizer_factory.py
+++ b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/optimizer_factory.py
@@ -101,14 +101,12 @@ def __init__(self, optimizer):
         ]
         self.supported_embedding_types = [
             "lookup_table",
-            "pull_sparse",
             "pull_sparse_v2",
             "pull_box_sparse",
             "pull_gpups_sparse",
         ]
         self.supported_embedding_grad_types = [
             "lookup_table_grad",
-            "push_sparse",
             "push_sparse_v2",
         ]
         op_maker = core.op_proto_and_checker_maker

From 3562ae9bfdd211c6825cb02df6c8e1e003c8457f Mon Sep 17 00:00:00 2001
From: co63oc <co63@163.com>
Date: Mon, 15 Jul 2024 15:36:17 +0800
Subject: [PATCH 3/3] Fix

---
 paddle/fluid/operators/pull_sparse_v2_op.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddle/fluid/operators/pull_sparse_v2_op.h b/paddle/fluid/operators/pull_sparse_v2_op.h
index 3be84a52be115..6dbf7f3bcb5e0 100644
--- a/paddle/fluid/operators/pull_sparse_v2_op.h
+++ b/paddle/fluid/operators/pull_sparse_v2_op.h
@@ -20,7 +20,6 @@
 #include "paddle/fluid/framework/fleet/fleet_wrapper.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/operators/pull_sparse_op.h"
 
 namespace paddle {
 namespace operators {