PaddlePaddle · luotao1 · Dec 15, 2022 · Dec 9, 2022 · Dec 9, 2022 · Dec 9, 2022
diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu
@@ -13,13 +13,15 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h"
-#include "paddle/fluid/operators/math/softmax_impl.h"
+
+#include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/platform/collective_helper.h"
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
 #include "paddle/fluid/string/string_helper.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/kernels/funcs/axis_utils.h"
 #include "paddle/phi/kernels/funcs/cross_entropy.h"
+#include "paddle/phi/kernels/funcs/softmax_impl.h"
 
 namespace paddle {
 namespace operators {
@@ -129,15 +131,15 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
     softmax_2d.ShareDataWith(*softmax).Resize({N, D});
     loss_2d.ShareDataWith(*loss).Resize({N, 1});
 
-    auto eigen_logits = math::EigenMatrix<T>::From(logits_2d);
-    auto eigen_softmax = math::EigenMatrix<T>::From(softmax_2d);
+    auto eigen_logits = phi::funcs::EigenMatrix<T>::From(logits_2d);
+    auto eigen_softmax = phi::funcs::EigenMatrix<T>::From(softmax_2d);
 
     // step 1, obtain logit_max
     phi::DenseTensor logits_max;
     logits_max = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
     void* logits_max_buff = logits_max.mutable_data<T>(place);
 
-    auto eigen_logits_max = math::EigenMatrix<T>::From(logits_max);
+    auto eigen_logits_max = phi::funcs::EigenMatrix<T>::From(logits_max);
     Eigen::DSizes<int, 1> along_axis(1);
     eigen_logits_max.device(*dev_ctx.eigen_device()) =
         eigen_logits.maximum(along_axis);
@@ -158,7 +160,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
     eigen_softmax.device(*dev_ctx.eigen_device()) =
         (eigen_logits -
          eigen_logits_max.reshape(batch_by_one).broadcast(one_by_class))
-            .unaryExpr(math::ValueClip<T>());
+            .unaryExpr(phi::funcs::ValueClip<T>());
 
     // step 3, obtain predict target
     phi::DenseTensor predicted_logits;
@@ -217,7 +219,8 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
     sum_exp_logits = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
     void* sum_exp_logits_buff = sum_exp_logits.mutable_data<T>(place);
 
-    auto eigen_sum_exp_logits = math::EigenMatrix<T>::From(sum_exp_logits);
+    auto eigen_sum_exp_logits =
+        phi::funcs::EigenMatrix<T>::From(sum_exp_logits);
     eigen_sum_exp_logits.device(*dev_ctx.eigen_device()) =
         eigen_softmax.sum(along_axis);
 
@@ -231,8 +234,9 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
         comm->comm(),
         stream));
 
-    auto eigen_loss = math::EigenMatrix<T>::From(loss_2d);
-    auto eigen_predicted_logits = math::EigenMatrix<T>::From(predicted_logits);
+    auto eigen_loss = phi::funcs::EigenMatrix<T>::From(loss_2d);
+    auto eigen_predicted_logits =
+        phi::funcs::EigenMatrix<T>::From(predicted_logits);
 
     eigen_loss.device(*dev_ctx.eigen_device()) =
         (eigen_sum_exp_logits.log().unaryExpr(phi::funcs::TolerableValue<T>()) -
@@ -281,14 +285,14 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
     softmax_2d.ShareDataWith(*softmax).Resize({N, D});
     loss_2d.ShareDataWith(*loss).Resize({N, 1});
 
-    auto eigen_logits = math::EigenMatrix<T>::From(logits_2d);
-    auto eigen_softmax = math::EigenMatrix<T>::From(softmax_2d);
+    auto eigen_logits = phi::funcs::EigenMatrix<T>::From(logits_2d);
+    auto eigen_softmax = phi::funcs::EigenMatrix<T>::From(softmax_2d);
 
     // step 1, obtain logit_max
     phi::DenseTensor logits_max;
     logits_max = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
 
-    auto eigen_logits_max = math::EigenMatrix<T>::From(logits_max);
+    auto eigen_logits_max = phi::funcs::EigenMatrix<T>::From(logits_max);
     Eigen::DSizes<int, 1> along_axis(1);
     eigen_logits_max.device(*dev_ctx.eigen_device()) =
         eigen_logits.maximum(along_axis);
@@ -304,7 +308,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
     eigen_softmax.device(*dev_ctx.eigen_device()) =
         (eigen_logits -
          eigen_logits_max.reshape(batch_by_one).broadcast(one_by_class))
-            .unaryExpr(math::ValueClip<T>());
+            .unaryExpr(phi::funcs::ValueClip<T>());
 
     // step 3, obtain predict target
     phi::DenseTensor predicted_logits;
@@ -357,7 +361,8 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
     sum_exp_logits = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
     void* sum_exp_logits_buff = sum_exp_logits.mutable_data<T>(place);
 
-    auto eigen_sum_exp_logits = math::EigenMatrix<T>::From(sum_exp_logits);
+    auto eigen_sum_exp_logits =
+        phi::funcs::EigenMatrix<T>::From(sum_exp_logits);
     eigen_sum_exp_logits.device(*dev_ctx.eigen_device()) =
         eigen_softmax.sum(along_axis);
 
@@ -366,8 +371,9 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
     opts.reduce_op = distributed::ReduceOp::SUM;
     pg->AllReduce(in_out, in_out, opts)->Synchronize();
 
-    auto eigen_loss = math::EigenMatrix<T>::From(loss_2d);
-    auto eigen_predicted_logits = math::EigenMatrix<T>::From(predicted_logits);
+    auto eigen_loss = phi::funcs::EigenMatrix<T>::From(loss_2d);
+    auto eigen_predicted_logits =
+        phi::funcs::EigenMatrix<T>::From(predicted_logits);
 
     eigen_loss.device(*dev_ctx.eigen_device()) =
         (eigen_sum_exp_logits.log().unaryExpr(phi::funcs::TolerableValue<T>()) -

diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h
@@ -22,9 +22,9 @@ limitations under the License. */
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/softmax.h"
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/phi/kernels/funcs/cross_entropy.h"
+#include "paddle/phi/kernels/funcs/softmax.h"
 
 namespace paddle {
 namespace operators {

diff --git a/paddle/fluid/operators/jit/CMakeLists.txt b/paddle/fluid/operators/jit/CMakeLists.txt
@@ -9,7 +9,7 @@ file(APPEND ${jit_file} "\#include \"paddle/fluid/operators/jit/helper.h\"\n")
 file(APPEND ${jit_file}
      "\#include \"paddle/fluid/operators/jit/registry.h\"\n\n")
 
-set(JIT_KERNEL_DEPS cpu_info cblas gflags enforce place xxhash)
+set(JIT_KERNEL_DEPS device_context cblas gflags enforce place xxhash)
 
 file(
   GLOB jit_kernel_cc_srcs

diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt
@@ -32,7 +32,6 @@ math_library(maxouting)
 math_library(sequence_padding)
 math_library(sequence_pooling DEPS math_function jit_kernel_helper)
 math_library(sequence_scale)
-math_library(softmax DEPS math_function jit_kernel_helper)
 if(WITH_ASCEND_CL)
   math_library(beam_search DEPS math_function beam_search_npu)
 elseif(WITH_XPU)