diff --git a/paddle/phi/api/yaml/static_ops.yaml b/paddle/phi/api/yaml/static_ops.yaml index d1d07681ed189..803b0284a91d4 100755 --- a/paddle/phi/api/yaml/static_ops.yaml +++ b/paddle/phi/api/yaml/static_ops.yaml @@ -48,7 +48,7 @@ backward : assign_grad - op : assign_value - args : (int[] shape, DataType dtype, int[] bool_values = {}, float[] fp32_values = {}, int[] int32_values = {}, int64_t[] int64_values = {}) + args : (int[] shape, DataType dtype, int[] bool_values = {}, float[] fp32_values = {}, float[] fp16_values = {}, int[] int32_values = {}, int64_t[] int64_values = {}) output : Tensor(out) infer_meta : func : AssignValueInferMeta diff --git a/paddle/phi/kernels/assign_kernel.cc b/paddle/phi/kernels/assign_kernel.cc index 7a6e8d392da1d..905714d3b74ff 100644 --- a/paddle/phi/kernels/assign_kernel.cc +++ b/paddle/phi/kernels/assign_kernel.cc @@ -157,6 +157,7 @@ PD_REGISTER_KERNEL(assign_value, phi::AssignValueKernel, bool, int, + phi::dtype::float16, float, int64_t) {} #endif diff --git a/paddle/phi/kernels/gpu/determinant_grad_kernel.cu b/paddle/phi/kernels/gpu/determinant_grad_kernel.cu index cce12a87fac72..f3187d5fefb51 100644 --- a/paddle/phi/kernels/gpu/determinant_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/determinant_grad_kernel.cu @@ -21,5 +21,6 @@ PD_REGISTER_KERNEL(determinant_grad, GPU, ALL_LAYOUT, phi::DeterminantGradKernel, + phi::dtype::float16, float, double) {} diff --git a/paddle/phi/kernels/gpu/determinant_kernel.cu b/paddle/phi/kernels/gpu/determinant_kernel.cu index 2518408387395..58e27e3ce4abd 100644 --- a/paddle/phi/kernels/gpu/determinant_kernel.cu +++ b/paddle/phi/kernels/gpu/determinant_kernel.cu @@ -17,5 +17,10 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/determinant_kernel_impl.h" -PD_REGISTER_KERNEL( - determinant, GPU, ALL_LAYOUT, phi::DeterminantKernel, float, double) {} +PD_REGISTER_KERNEL(determinant, + GPU, + ALL_LAYOUT, + phi::DeterminantKernel, + phi::dtype::float16, + float, + double) {} diff --git a/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc b/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc index 80cf88b3ceb7f..17f74cd3743bd 100644 --- a/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc +++ b/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc @@ -22,6 +22,7 @@ PD_REGISTER_KERNEL(meshgrid_grad, GPU, ALL_LAYOUT, phi::MeshgridGradKernel, + phi::dtype::float16, float, double, int, diff --git a/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc b/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc index c863550979444..73120c1391642 100644 --- a/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc +++ b/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc @@ -22,6 +22,7 @@ PD_REGISTER_KERNEL(meshgrid, GPU, ALL_LAYOUT, phi::MeshgridKernel, + phi::dtype::float16, float, double, int, diff --git a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h index 3f463e1d9e064..4d58698c64d22 100644 --- a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h @@ -15,8 +15,10 @@ #pragma once #include "glog/logging.h" +#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/tensor_utils.h" +#include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/determinant_grad_kernel.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" @@ -26,7 +28,6 @@ #include "paddle/phi/kernels/funcs/matrix_inverse.h" #include "paddle/phi/kernels/funcs/unsqueeze.h" #include "paddle/phi/kernels/transpose_kernel.h" - namespace phi { namespace detail { @@ -113,6 +114,11 @@ void DeterminantGradKernel(const Context& dev_ctx, return; } + using MPType = typename phi::dtype::MPTypeTrait::Type; + auto origin_dt = std::is_same::value + ? DataType::FLOAT16 + : DataType::BFLOAT16; + // The matrix is invertible // let |A| = Determinant(A) // Ref to https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf @@ -123,16 +129,22 @@ void DeterminantGradKernel(const Context& dev_ctx, DenseTensor inverse_A; // A must be square matrices! inverse_A.Resize(x.dims()); - dev_ctx.template Alloc(&inverse_A); + dev_ctx.template Alloc(&inverse_A); - phi::funcs::MatrixInverseFunctor mat_inv; - mat_inv(dev_ctx, x, &inverse_A); + phi::funcs::MatrixInverseFunctor mat_inv; + if (!std::is_same::value) { + mat_inv(dev_ctx, + phi::Cast(dev_ctx, x, DataType::FLOAT32), + &inverse_A); + } else { + mat_inv(dev_ctx, x, &inverse_A); + } VLOG(3) << "inverse(A) dims: " << inverse_A.dims(); // Second: inverse(A).transpose(-2, -1) DenseTensor transpose_inverse_A = - phi::TransposeLast2Dim(dev_ctx, inverse_A); + phi::TransposeLast2Dim(dev_ctx, inverse_A); VLOG(3) << "(dA * |A|).transpose(-2, -1) dims: " << transpose_inverse_A.dims(); @@ -147,7 +159,15 @@ void DeterminantGradKernel(const Context& dev_ctx, VLOG(3) << "unsqueezed(dA * |A|) dims: " << unsqueeze2.dims(); // Finally: unsqueeze(dA * |A|) * inverse(A) - auto res = phi::Multiply(dev_ctx, unsqueeze2, transpose_inverse_A); + DenseTensor res; + if (!std::is_same::value) { + res = phi::Multiply( + dev_ctx, + unsqueeze2, + phi::Cast(dev_ctx, transpose_inverse_A, origin_dt)); + } else { + res = phi::Multiply(dev_ctx, unsqueeze2, transpose_inverse_A); + } VLOG(3) << "unsqueeze(dA * |A|) * inverse(A) dims: " << res.dims(); diff --git a/paddle/phi/kernels/impl/determinant_kernel_impl.h b/paddle/phi/kernels/impl/determinant_kernel_impl.h index 36e47c78c832c..3c437ad659c43 100644 --- a/paddle/phi/kernels/impl/determinant_kernel_impl.h +++ b/paddle/phi/kernels/impl/determinant_kernel_impl.h @@ -21,6 +21,7 @@ #include #include "glog/logging.h" +#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/tensor_utils.h" @@ -31,6 +32,13 @@ namespace detail { template class EigenMatrix {}; +template <> +class EigenMatrix { + public: + using MatrixType = + Eigen::Matrix; +}; + template <> class EigenMatrix { public: @@ -74,6 +82,7 @@ struct DeterminantFunctor { std::vector input_vec; std::vector output_vec; phi::TensorToVector(input, dev_ctx, &input_vec); + using MPType = typename phi::dtype::MPTypeTrait::Type; for (int64_t i = 0; i < batch_count; ++i) { // maybe can be parallel auto begin_iter = input_vec.begin() + i * rank * rank; auto end_iter = input_vec.begin() + (i + 1) * rank * rank; @@ -85,7 +94,8 @@ struct DeterminantFunctor { matrix(i, j) = sub_vec[rank * i + j]; } } - output_vec.push_back(matrix.determinant()); + output_vec.push_back( + static_cast(matrix.template cast().determinant())); } phi::TensorFromVector(output_vec, dev_ctx, output); } diff --git a/paddle/phi/ops/compat/assign_value_sig.cc b/paddle/phi/ops/compat/assign_value_sig.cc index 0fa1889ccde34..d1c7b83c7b26f 100644 --- a/paddle/phi/ops/compat/assign_value_sig.cc +++ b/paddle/phi/ops/compat/assign_value_sig.cc @@ -36,6 +36,9 @@ KernelSignature AssignValueOpArgumentMapping( } else if (dtype == /*INT64*/ 3) { return KernelSignature( "assign_value", {}, {"shape", "dtype", "int64_values"}, {"Out"}); + } else if (dtype == /*FP16*/ 4) { + return KernelSignature( + "assign_value", {}, {"shape", "dtype", "fp16_values"}, {"Out"}); } else { return KernelSignature("unregistered", {}, {}, {}); } diff --git a/python/paddle/fluid/tests/unittests/test_assign_value_op.py b/python/paddle/fluid/tests/unittests/test_assign_value_op.py index 243dccc242244..18cc68e4a1f81 100644 --- a/python/paddle/fluid/tests/unittests/test_assign_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_assign_value_op.py @@ -19,7 +19,7 @@ import paddle from paddle import fluid -from paddle.fluid import framework +from paddle.fluid import core, framework def assign_value_wrapper( @@ -72,6 +72,13 @@ def init_data(self): self.attrs["bool_values"] = [int(v) for v in self.value.flat] +class TestAssignValueOpFp16(TestAssignValueOp): + def init_data(self): + self.dtype = np.float16 + self.value = np.random.random(size=(2, 5)).astype(self.dtype) + self.attrs["fp16_values"] = [float(v) for v in self.value.flat] + + class TestAssignApi(unittest.TestCase): def setUp(self): with eager_op_test.paddle_static_guard(): @@ -128,5 +135,13 @@ def init_dtype(self): self.dtype = "bool" +@unittest.skipIf( + not core.is_compiled_with_cuda(), "core is not compiled with CUDA" +) +class TestAssignApiFp16(TestAssignApi): + def init_dtype(self): + self.dtype = np.float16 + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_determinant_op.py b/python/paddle/fluid/tests/unittests/test_determinant_op.py index ade000cda8712..8e50f0c5552ec 100644 --- a/python/paddle/fluid/tests/unittests/test_determinant_op.py +++ b/python/paddle/fluid/tests/unittests/test_determinant_op.py @@ -50,6 +50,14 @@ def init_data(self): self.target = np.linalg.det(self.case) +class TestDeterminantOpCase1FP16(TestDeterminantOp): + def init_data(self): + np.random.seed(0) + self.case = np.random.rand(10, 10).astype(np.float16) + self.inputs = {'Input': self.case} + self.target = np.linalg.det(self.case.astype(np.float32)) + + class TestDeterminantOpCase2(TestDeterminantOp): def init_data(self): np.random.seed(0) @@ -59,6 +67,17 @@ def init_data(self): self.target = np.linalg.det(self.case) +class TestDeterminantOpCase2FP16(TestDeterminantOp): + def init_data(self): + np.random.seed(0) + # not invertible matrix + self.case = np.ones([4, 2, 4, 4]).astype(np.float16) + self.inputs = {'Input': self.case} + self.target = np.linalg.det(self.case.astype(np.float32)).astype( + np.float16 + ) + + class TestDeterminantAPI(unittest.TestCase): def setUp(self): np.random.seed(0) diff --git a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py index 0bf3d8e948097..e2fa225fd8f7e 100644 --- a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py +++ b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py @@ -40,11 +40,14 @@ def init_args(self): self.k = 5 self.axis = -1 + def init_dtype(self): + self.dtype = np.float64 + def setUp(self): self.op_type = "kthvalue" self.python_api = paddle.kthvalue - self.dtype = np.float64 - self.input_data = np.random.random((2, 1, 2, 4, 10)) + self.init_dtype() + self.input_data = np.random.random((2, 1, 2, 4, 10)).astype(self.dtype) self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis} @@ -62,17 +65,25 @@ def test_check_grad(self): self.check_grad({'X'}, 'Out') +class TestKthvalueOpFp16(TestKthvalueOp): + def init_dtype(self): + self.dtype = np.float16 + + class TestKthvalueOpWithKeepdim(OpTest): def init_args(self): self.k = 2 self.axis = 1 + def init_dtype(self): + self.dtype = np.float64 + def setUp(self): self.init_args() + self.init_dtype() self.op_type = "kthvalue" self.python_api = paddle.kthvalue - self.dtype = np.float64 - self.input_data = np.random.random((1, 3, 2, 4, 10)) + self.input_data = np.random.random((1, 3, 2, 4, 10)).astype(self.dtype) self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'keepdim': True} output, indices = cal_kthvalue( @@ -89,6 +100,11 @@ def test_check_grad(self): self.check_grad({'X'}, 'Out') +class TestKthvalueOpWithKeepdimFp16(TestKthvalueOpWithKeepdim): + def init_dtype(self): + self.dtype = np.float16 + + class TestKthvalueOpKernels(unittest.TestCase): def setUp(self): self.axises = [2, -1] diff --git a/python/paddle/fluid/tests/unittests/test_meshgrid_op.py b/python/paddle/fluid/tests/unittests/test_meshgrid_op.py index 60af417ebc545..0039d4ee422e8 100644 --- a/python/paddle/fluid/tests/unittests/test_meshgrid_op.py +++ b/python/paddle/fluid/tests/unittests/test_meshgrid_op.py @@ -76,6 +76,14 @@ def get_x_shape(self): return [100, 300] +class TestMeshgridOp2Fp16(TestMeshgridOp): + def get_x_shape(self): + return [100, 300] + + def get_dtype(self): + return np.float16 + + class TestMeshgridOp3(unittest.TestCase): def test_api(self): x = paddle.static.data(shape=[100], dtype='int32', name='x') diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 186eda03e74d8..ee32f8e5f0fd0 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -1512,7 +1512,7 @@ def meshgrid(*args, **kwargs): Args: *args(Tensor|list of Tensor) : tensors (tuple(list) of tensor): the shapes of input k tensors are (N1,), - (N2,),..., (Nk,). Support data types: ``float64``, ``float32``, ``int32``, ``int64``. + (N2,),..., (Nk,). Support data types: ``float64``, ``float32``, ``float16``, ``int32``, ``int64``. **kwargs (optional): Currently, only accept name in **kwargs The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. @@ -2123,6 +2123,9 @@ def convert_scalar(x): if dtype == core.VarDesc.VarType.BOOL: value_name = "bool_values" values = [int(v) for v in input.flat] + elif dtype == core.VarDesc.VarType.FP16: + value_name = "fp16_values" + values = [float(v) for v in input.flat] elif dtype == core.VarDesc.VarType.FP32: value_name = "fp32_values" values = [float(v) for v in input.flat] @@ -2135,7 +2138,7 @@ def convert_scalar(x): else: raise TypeError( "When the type of 'input' in assign is numpy.ndarray, " - "the data type of 'input' must be bool, float32, int32 or int64, but " + "the data type of 'input' must be bool, float16, float32, int32 or int64, but " "received %s." % convert_dtype(dtype) ) if input.size > 1024 * 1024: diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 3dcbc7c6ac63b..2235cf93cfb60 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -1809,7 +1809,7 @@ def det(x, name=None): if in_dygraph_mode(): return _C_ops.det(x) else: - check_dtype(x.dtype, 'Input', ['float32', 'float64'], 'det') + check_dtype(x.dtype, 'Input', ['float16', 'float32', 'float64'], 'det') input_shape = list(x.shape) assert len(input_shape) >= 2, ( diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py index c9c1d4c35c165..9fc8e39a9ed82 100755 --- a/python/paddle/tensor/search.py +++ b/python/paddle/tensor/search.py @@ -1074,7 +1074,7 @@ def kthvalue(x, k, axis=None, keepdim=False, name=None): Find values and indices of the k-th smallest at the axis. Args: - x(Tensor): A N-D Tensor with type float32, float64, int32, int64. + x(Tensor): A N-D Tensor with type float16, float32, float64, int32, int64. k(int): The k for the k-th smallest number to look for along the axis. axis(int, optional): Axis to compute indices along. The effective range is [-R, R), where R is x.ndim. when axis < 0, it works the same way