PaddlePaddle · luotao1 · Apr 27, 2023 · Feb 27, 2023 · Feb 28, 2023 · Mar 6, 2023
diff --git a/paddle/phi/kernels/funcs/maxouting.cc b/paddle/phi/kernels/funcs/maxouting.cc
@@ -108,8 +108,10 @@ void MaxOutGradFunctor<DeviceContext, T>::operator()(
 }
 
 template class MaxOutGradFunctor<phi::CPUContext, float>;
+template class MaxOutGradFunctor<phi::CPUContext, phi::dtype::float16>;
 template class MaxOutGradFunctor<phi::CPUContext, double>;
 template class MaxOutFunctor<phi::CPUContext, float>;
+template class MaxOutFunctor<phi::CPUContext, phi::dtype::float16>;
 template class MaxOutFunctor<phi::CPUContext, double>;
 
 }  // namespace funcs

diff --git a/paddle/phi/kernels/funcs/maxouting.cu b/paddle/phi/kernels/funcs/maxouting.cu
@@ -175,9 +175,11 @@ void MaxOutGradFunctor<DeviceContext, T>::operator()(
 }
 
 template class MaxOutGradFunctor<phi::GPUContext, float>;
+template class MaxOutGradFunctor<phi::GPUContext, phi::dtype::float16>;
 template class MaxOutGradFunctor<phi::GPUContext, double>;
 
 template class MaxOutFunctor<phi::GPUContext, float>;
+template class MaxOutFunctor<phi::GPUContext, phi::dtype::float16>;
 template class MaxOutFunctor<phi::GPUContext, double>;
 
 }  // namespace funcs

diff --git a/paddle/phi/kernels/gpu/maxout_grad_kernel.cu b/paddle/phi/kernels/gpu/maxout_grad_kernel.cu
@@ -15,5 +15,10 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/maxout_grad_kernel_impl.h"
 
-PD_REGISTER_KERNEL(
-    maxout_grad, GPU, ALL_LAYOUT, phi::MaxOutGradKernel, float, double) {}
+PD_REGISTER_KERNEL(maxout_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MaxOutGradKernel,
+                   float,
+                   phi::dtype::float16,
+                   double) {}
diff --git a/paddle/phi/kernels/gpu/maxout_kernel.cu b/paddle/phi/kernels/gpu/maxout_kernel.cu
@@ -15,4 +15,10 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/maxout_kernel_impl.h"
 
-PD_REGISTER_KERNEL(maxout, GPU, ALL_LAYOUT, phi::MaxOutKernel, float, double) {}
+PD_REGISTER_KERNEL(maxout,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MaxOutKernel,
+                   float,
+                   phi::dtype::float16,
+                   double) {}
diff --git a/python/paddle/fluid/tests/unittests/test_maxout_op.py b/python/paddle/fluid/tests/unittests/test_maxout_op.py
@@ -136,5 +136,47 @@ def test_errors(self):
             self.assertRaises(ValueError, F.maxout, x_float32, 2, 2)
 
 
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
+)
+class TestMaxOutOpFP16(OpTest):
+    def setUp(self):
+        self.op_type = "maxout"
+        self.python_api = paddle.nn.Maxout
+        input_np = np.random.uniform(-1, 1, [2, 6, 5, 4]).astype(np.float16)
+        self.groups = 2
+        self.axis = 1
+        output_np = maxout_forward_naive(input_np, self.groups, self.axis)
+        self.attrs = {'groups': self.groups, 'axis': self.axis}
+        self.inputs = {'X': input_np}
+        self.outputs = {'Out': output_np}
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=1e-3)
+
+    def test_check_grad(self):
+        place = core.CUDAPlace(0)
+        if core.is_float16_supported(place):
+            self.check_grad_with_place(
+                place, ['X'], 'Out', max_relative_error=0.5
+            )
+
+    def set_attrs(self):
+        pass
+
+
+class TestMaxoutFP16Case1(TestMaxOutOpFP16):
+    def set_attrs(self):
+        self.axis = -1
+
+
+class TestMaxoutFP16Case2(TestMaxOutOpFP16):
+    def set_attrs(self):
+        self.axis = 3
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
@@ -784,7 +784,7 @@ def maxout(x, groups, axis=1, name=None):
 
     Parameters:
         x (Tensor): The input is 4-D Tensor with shape [N, C, H, W] or [N, H, W, C], the data type
-            of input is float32 or float64.
+            of input is float16, float32 or float64.
         groups (int): The groups number of maxout. `groups` specifies the
             index of channel dimension where maxout will be performed. This must be
             a factor of number of features.