PaddlePaddle · denglianbin · Mar 23, 2023 · Mar 23, 2023 · Apr 17, 2023 · Apr 17, 2023
diff --git a/paddle/phi/api/yaml/static_ops.yaml b/paddle/phi/api/yaml/static_ops.yaml
@@ -48,7 +48,7 @@
   backward : assign_grad
 
 - op : assign_value
-  args : (int[] shape, DataType dtype, int[] bool_values = {}, float[] fp32_values = {}, int[] int32_values = {}, int64_t[] int64_values = {})
+  args : (int[] shape, DataType dtype, int[] bool_values = {}, float[] fp32_values = {}, float[] fp16_values = {}, int[] int32_values = {}, int64_t[] int64_values = {})
   output : Tensor(out)
   infer_meta :
     func : AssignValueInferMeta

diff --git a/paddle/phi/kernels/assign_kernel.cc b/paddle/phi/kernels/assign_kernel.cc
@@ -157,6 +157,7 @@ PD_REGISTER_KERNEL(assign_value,
                    phi::AssignValueKernel,
                    bool,
                    int,
+                   phi::dtype::float16,
                    float,
                    int64_t) {}
 #endif

diff --git a/paddle/phi/kernels/gpu/determinant_grad_kernel.cu b/paddle/phi/kernels/gpu/determinant_grad_kernel.cu
@@ -21,5 +21,6 @@ PD_REGISTER_KERNEL(determinant_grad,
                    GPU,
                    ALL_LAYOUT,
                    phi::DeterminantGradKernel,
+                   phi::dtype::float16,
                    float,
                    double) {}
diff --git a/paddle/phi/kernels/gpu/determinant_kernel.cu b/paddle/phi/kernels/gpu/determinant_kernel.cu
@@ -17,5 +17,10 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/determinant_kernel_impl.h"
 
-PD_REGISTER_KERNEL(
-    determinant, GPU, ALL_LAYOUT, phi::DeterminantKernel, float, double) {}
+PD_REGISTER_KERNEL(determinant,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::DeterminantKernel,
+                   phi::dtype::float16,
+                   float,
+                   double) {}
diff --git a/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc b/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc
@@ -22,6 +22,7 @@ PD_REGISTER_KERNEL(meshgrid_grad,
                    GPU,
                    ALL_LAYOUT,
                    phi::MeshgridGradKernel,
+                   phi::dtype::float16,
                    float,
                    double,
                    int,

diff --git a/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc b/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc
@@ -22,6 +22,7 @@ PD_REGISTER_KERNEL(meshgrid,
                    GPU,
                    ALL_LAYOUT,
                    phi::MeshgridKernel,
+                   phi::dtype::float16,
                    float,
                    double,
                    int,

diff --git a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h
@@ -15,8 +15,10 @@
 #pragma once
 
 #include "glog/logging.h"
+#include "paddle/phi/common/amp_type_traits.h"
 
 #include "paddle/phi/core/tensor_utils.h"
+#include "paddle/phi/kernels/cast_kernel.h"
 #include "paddle/phi/kernels/determinant_grad_kernel.h"
 #include "paddle/phi/kernels/elementwise_multiply_kernel.h"
 #include "paddle/phi/kernels/empty_kernel.h"
@@ -26,7 +28,6 @@
 #include "paddle/phi/kernels/funcs/matrix_inverse.h"
 #include "paddle/phi/kernels/funcs/unsqueeze.h"
 #include "paddle/phi/kernels/transpose_kernel.h"
-
 namespace phi {
 namespace detail {
 
@@ -113,6 +114,11 @@ void DeterminantGradKernel(const Context& dev_ctx,
     return;
   }
 
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  auto origin_dt = std::is_same<phi::dtype::float16, T>::value
+                       ? DataType::FLOAT16
+                       : DataType::BFLOAT16;
+
   // The matrix is invertible
   // let |A| = Determinant(A)
   // Ref to https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf
@@ -123,16 +129,22 @@ void DeterminantGradKernel(const Context& dev_ctx,
   DenseTensor inverse_A;
   // A must be square matrices!
   inverse_A.Resize(x.dims());
-  dev_ctx.template Alloc<T>(&inverse_A);
+  dev_ctx.template Alloc<MPType>(&inverse_A);
 
-  phi::funcs::MatrixInverseFunctor<Context, T> mat_inv;
-  mat_inv(dev_ctx, x, &inverse_A);
+  phi::funcs::MatrixInverseFunctor<Context, MPType> mat_inv;
+  if (!std::is_same<MPType, T>::value) {
+    mat_inv(dev_ctx,
+            phi::Cast<T, Context>(dev_ctx, x, DataType::FLOAT32),
+            &inverse_A);
+  } else {
+    mat_inv(dev_ctx, x, &inverse_A);
+  }
 
   VLOG(3) << "inverse(A) dims: " << inverse_A.dims();
 
   // Second: inverse(A).transpose(-2, -1)
   DenseTensor transpose_inverse_A =
-      phi::TransposeLast2Dim<T>(dev_ctx, inverse_A);
+      phi::TransposeLast2Dim<MPType>(dev_ctx, inverse_A);
 
   VLOG(3) << "(dA * |A|).transpose(-2, -1) dims: "
           << transpose_inverse_A.dims();
@@ -147,7 +159,15 @@ void DeterminantGradKernel(const Context& dev_ctx,
   VLOG(3) << "unsqueezed(dA * |A|) dims: " << unsqueeze2.dims();
 
   // Finally: unsqueeze(dA * |A|) * inverse(A)
-  auto res = phi::Multiply<T>(dev_ctx, unsqueeze2, transpose_inverse_A);
+  DenseTensor res;
+  if (!std::is_same<MPType, T>::value) {
+    res = phi::Multiply<T>(
+        dev_ctx,
+        unsqueeze2,
+        phi::Cast<MPType, Context>(dev_ctx, transpose_inverse_A, origin_dt));
+  } else {
+    res = phi::Multiply<T>(dev_ctx, unsqueeze2, transpose_inverse_A);
+  }
 
   VLOG(3) << "unsqueeze(dA * |A|) * inverse(A) dims: " << res.dims();
 

diff --git a/paddle/phi/kernels/impl/determinant_kernel_impl.h b/paddle/phi/kernels/impl/determinant_kernel_impl.h
@@ -21,6 +21,7 @@
 #include <vector>
 
 #include "glog/logging.h"
+#include "paddle/phi/common/amp_type_traits.h"
 
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/tensor_utils.h"
@@ -31,6 +32,13 @@ namespace detail {
 template <typename T>
 class EigenMatrix {};
 
+template <>
+class EigenMatrix<phi::dtype::float16> {
+ public:
+  using MatrixType =
+      Eigen::Matrix<phi::dtype::float16, Eigen::Dynamic, Eigen::Dynamic>;
+};
+
 template <>
 class EigenMatrix<float> {
  public:
@@ -74,6 +82,7 @@ struct DeterminantFunctor {
     std::vector<T> input_vec;
     std::vector<T> output_vec;
     phi::TensorToVector(input, dev_ctx, &input_vec);
+    using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
     for (int64_t i = 0; i < batch_count; ++i) {  // maybe can be parallel
       auto begin_iter = input_vec.begin() + i * rank * rank;
       auto end_iter = input_vec.begin() + (i + 1) * rank * rank;
@@ -85,7 +94,8 @@ struct DeterminantFunctor {
           matrix(i, j) = sub_vec[rank * i + j];
         }
       }
-      output_vec.push_back(matrix.determinant());
+      output_vec.push_back(
+          static_cast<T>(matrix.template cast<MPType>().determinant()));
     }
     phi::TensorFromVector(output_vec, dev_ctx, output);
   }

diff --git a/paddle/phi/ops/compat/assign_value_sig.cc b/paddle/phi/ops/compat/assign_value_sig.cc
@@ -36,6 +36,9 @@ KernelSignature AssignValueOpArgumentMapping(
   } else if (dtype == /*INT64*/ 3) {
     return KernelSignature(
         "assign_value", {}, {"shape", "dtype", "int64_values"}, {"Out"});
+  } else if (dtype == /*FP16*/ 4) {
+    return KernelSignature(
+        "assign_value", {}, {"shape", "dtype", "fp16_values"}, {"Out"});
   } else {
     return KernelSignature("unregistered", {}, {}, {});
   }

diff --git a/python/paddle/fluid/tests/unittests/test_assign_value_op.py b/python/paddle/fluid/tests/unittests/test_assign_value_op.py
@@ -19,7 +19,7 @@
 
 import paddle
 from paddle import fluid
-from paddle.fluid import framework
+from paddle.fluid import core, framework
 
 
 def assign_value_wrapper(
@@ -72,6 +72,16 @@ def init_data(self):
         self.attrs["bool_values"] = [int(v) for v in self.value.flat]
 
 
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
+)
+class TestAssignValueOpFp16(TestAssignValueOp):
+    def init_data(self):
+        self.dtype = np.float16
+        self.value = np.random.random(size=(2, 5)).astype(self.dtype)
+        self.attrs["fp16_values"] = [float(v) for v in self.value.flat]
+
+
 class TestAssignApi(unittest.TestCase):
     def setUp(self):
         with eager_op_test.paddle_static_guard():
@@ -128,5 +138,13 @@ def init_dtype(self):
         self.dtype = "bool"
 
 
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
+)
+class TestAssignApiFp16(TestAssignApi):
+    def init_dtype(self):
+        self.dtype = np.float16
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_determinant_op.py b/python/paddle/fluid/tests/unittests/test_determinant_op.py
@@ -50,6 +50,14 @@ def init_data(self):
         self.target = np.linalg.det(self.case)
 
 
+class TestDeterminantOpCase1FP16(TestDeterminantOp):
+    def init_data(self):
+        np.random.seed(0)
+        self.case = np.random.rand(10, 10).astype(np.float16)
+        self.inputs = {'Input': self.case}
+        self.target = np.linalg.det(self.case.astype(np.float32))
+
+
 class TestDeterminantOpCase2(TestDeterminantOp):
     def init_data(self):
         np.random.seed(0)
@@ -59,6 +67,17 @@ def init_data(self):
         self.target = np.linalg.det(self.case)
 
 
+class TestDeterminantOpCase2FP16(TestDeterminantOp):
+    def init_data(self):
+        np.random.seed(0)
+        # not invertible matrix
+        self.case = np.ones([4, 2, 4, 4]).astype(np.float16)
+        self.inputs = {'Input': self.case}
+        self.target = np.linalg.det(self.case.astype(np.float32)).astype(
+            np.float16
+        )
+
+
 class TestDeterminantAPI(unittest.TestCase):
     def setUp(self):
         np.random.seed(0)

diff --git a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py
@@ -40,11 +40,14 @@ def init_args(self):
         self.k = 5
         self.axis = -1
 
+    def init_dtype(self):
+        self.dtype = np.float64
+
     def setUp(self):
         self.op_type = "kthvalue"
         self.python_api = paddle.kthvalue
-        self.dtype = np.float64
-        self.input_data = np.random.random((2, 1, 2, 4, 10))
+        self.init_dtype()
+        self.input_data = np.random.random((2, 1, 2, 4, 10)).astype(self.dtype)
         self.init_args()
         self.inputs = {'X': self.input_data}
         self.attrs = {'k': self.k, 'axis': self.axis}
@@ -62,17 +65,25 @@ def test_check_grad(self):
         self.check_grad({'X'}, 'Out')
 
 
+class TestKthvalueOpFp16(TestKthvalueOp):
+    def init_dtype(self):
+        self.dtype = np.float16
+
+
 class TestKthvalueOpWithKeepdim(OpTest):
     def init_args(self):
         self.k = 2
         self.axis = 1
 
+    def init_dtype(self):
+        self.dtype = np.float64
+
     def setUp(self):
         self.init_args()
+        self.init_dtype()
         self.op_type = "kthvalue"
         self.python_api = paddle.kthvalue
-        self.dtype = np.float64
-        self.input_data = np.random.random((1, 3, 2, 4, 10))
+        self.input_data = np.random.random((1, 3, 2, 4, 10)).astype(self.dtype)
         self.inputs = {'X': self.input_data}
         self.attrs = {'k': self.k, 'axis': self.axis, 'keepdim': True}
         output, indices = cal_kthvalue(
@@ -89,6 +100,11 @@ def test_check_grad(self):
         self.check_grad({'X'}, 'Out')
 
 
+class TestKthvalueOpWithKeepdimFp16(TestKthvalueOpWithKeepdim):
+    def init_dtype(self):
+        self.dtype = np.float16
+
+
 class TestKthvalueOpKernels(unittest.TestCase):
     def setUp(self):
         self.axises = [2, -1]

diff --git a/python/paddle/fluid/tests/unittests/test_meshgrid_op.py b/python/paddle/fluid/tests/unittests/test_meshgrid_op.py
@@ -75,6 +75,14 @@ def get_x_shape(self):
         return [100, 300]
 
 
+class TestMeshgridOp2Fp16(TestMeshgridOp):
+    def get_x_shape(self):
+        return [100, 300]
+
+    def get_dtype(self):
+        return np.float16
+
+
 class TestMeshgridOp3(unittest.TestCase):
     def test_api(self):
         x = paddle.static.data(shape=[100], dtype='int32', name='x')

diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
@@ -1513,7 +1513,7 @@ def meshgrid(*args, **kwargs):
 
     Args:
         *args(Tensor|list of Tensor) : tensors (tuple(list) of tensor): the shapes of input k tensors are (N1,),
-            (N2,),..., (Nk,). Support data types: ``float64``, ``float32``, ``int32``, ``int64``.
+            (N2,),..., (Nk,). Support data types: ``float64``, ``float32``, ``float16``, ``int32``, ``int64``.
         **kwargs (optional): Currently, only accept name in **kwargs
             The default value is None. Normally there is no need for
             user to set this property. For more information, please refer to :ref:`api_guide_Name`.
@@ -2124,6 +2124,9 @@ def convert_scalar(x):
         if dtype == core.VarDesc.VarType.BOOL:
             value_name = "bool_values"
             values = [int(v) for v in input.flat]
+        elif dtype == core.VarDesc.VarType.FP16:
+            value_name = "fp16_values"
+            values = [float(v) for v in input.flat]
         elif dtype == core.VarDesc.VarType.FP32:
             value_name = "fp32_values"
             values = [float(v) for v in input.flat]
@@ -2136,7 +2139,7 @@ def convert_scalar(x):
         else:
             raise TypeError(
                 "When the type of 'input' in assign is numpy.ndarray, "
-                "the data type of 'input' must be bool, float32, int32 or int64, but "
+                "the data type of 'input' must be bool, float16, float32, int32 or int64, but "
                 "received %s." % convert_dtype(dtype)
             )
         if input.size > 1024 * 1024:

diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py
@@ -1800,7 +1800,7 @@ def det(x, name=None):
     if in_dygraph_mode():
         return _C_ops.det(x)
     else:
-        check_dtype(x.dtype, 'Input', ['float32', 'float64'], 'det')
+        check_dtype(x.dtype, 'Input', ['float16', 'float32', 'float64'], 'det')
 
         input_shape = list(x.shape)
         assert len(input_shape) >= 2, (

diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py
@@ -1074,7 +1074,7 @@ def kthvalue(x, k, axis=None, keepdim=False, name=None):
     Find values and indices of the k-th smallest at the axis.
 
     Args:
-        x(Tensor): A N-D Tensor with type float32, float64, int32, int64.
+        x(Tensor): A N-D Tensor with type float16, float32, float64, int32, int64.
         k(int): The k for the k-th smallest number to look for along the axis.
         axis(int, optional): Axis to compute indices along. The effective range
             is [-R, R), where R is x.ndim. when axis < 0, it works the same way