PaddlePaddle · qili93 · Mar 9, 2023 · Mar 2, 2023 · Mar 2, 2023 · Mar 2, 2023
diff --git a/paddle/phi/kernels/gpu/arange_kernel.cu b/paddle/phi/kernels/gpu/arange_kernel.cu
@@ -20,12 +20,16 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/range_function.h"
+#include "paddle/phi/common/amp_type_traits.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/common/bfloat16.h"
 
 namespace phi {
 
 template <typename T>
 __global__ void Range(T start, T step, int64_t size, T* out) {
-  CUDA_KERNEL_LOOP(index, size) { out[index] = start + step * index; }
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  CUDA_KERNEL_LOOP(index, size) { out[index] = static_cast<T>(static_cast<MPType>(start) + static_cast<MPType>(step) * index); }
 }
 
 template <typename T, typename Context>
@@ -39,7 +43,8 @@ void ArangeKernel(const Context& dev_ctx,
   T step_value = GetValue<T, Context>(dev_ctx, step);
 
   int64_t size = 0;
-  phi::funcs::GetSize(start_value, end_value, step_value, &size);
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  phi::funcs::GetSize(static_cast<MPType>(start_value), static_cast<MPType>(end_value), static_cast<MPType>(step_value), &size);
   out->Resize(phi::make_ddim({size}));
   T* out_data = dev_ctx.template Alloc<T>(out);
 
@@ -55,7 +60,7 @@ void ArangeKernel(const Context& dev_ctx,
 }  // namespace phi
 
 PD_REGISTER_KERNEL(
-    arange, GPU, ALL_LAYOUT, phi::ArangeKernel, float, double, int64_t, int) {
+    arange, GPU, ALL_LAYOUT, phi::ArangeKernel, float, double, int64_t, int, phi::dtype::float16, phi::dtype::bfloat16) {
   kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(1).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND);

diff --git a/python/paddle/fluid/tests/unittests/test_arange.py b/python/paddle/fluid/tests/unittests/test_arange.py
@@ -1,4 +1,4 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import unittest
 
 import numpy as np
-from eager_op_test import OpTest
+from eager_op_test import OpTest, convert_float_to_uint16
 
 import paddle
 from paddle.fluid import core
@@ -57,6 +57,37 @@ def init_config(self):
         self.python_api = paddle.arange
         self.case = (0, 5, 1)
 
+class TestFloa16ArangeOp(TestArangeOp):
+    def init_config(self):
+        self.dtype = np.float16
+        self.python_api = paddle.arange
+        self.case = (0, 5, 1)
+
+    def test_check_output(self):
+        self.check_output(atol=1e-3)
+
+class TestBFloat16ArangeOp(TestArangeOp):
+    def init_config(self):
+        self.dtype = np.uint16
+        self.python_api = arange_wrapper
+        self.case = (0, 1, 0.2)
+
+    @unittest.skipIf(
+        not core.is_compiled_with_cuda() 
+        or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+        "core is not compiled with CUDA and not support the bfloat16",
+    )
+    def test_check_output(self):
+        self.inputs = {
+            'Start': convert_float_to_uint16(np.array([self.case[0]]).astype(np.float32)),
+            'End': convert_float_to_uint16(np.array([self.case[1]]).astype(np.float32)),
+            'Step': convert_float_to_uint16(np.array([self.case[2]]).astype(np.float32)),
+        }
+
+        self.outputs = {
+            'Out': convert_float_to_uint16(np.arange(self.case[0], self.case[1], self.case[2]))
+        }
+        self.check_output(atol=1e-2)
 
 class TestInt32ArangeOp(TestArangeOp):
     def init_config(self):

diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
@@ -1234,7 +1234,7 @@ def arange(start=0, end=None, step=1, dtype=None, name=None):
         check_dtype(
             dtype,
             'dtype',
-            ['float32', 'float64', 'int32', 'int64'],
+            ['float32', 'float64', 'int32', 'int64', 'float16', 'bfloat16'],
             'range/arange',
         )
         helper = LayerHelper('range', **locals())