[codegen] Add multiple operands and function support when using fp16 …

…compilation (apache#4056) * overload half operators for cuda codegen * add float16 te test_op_level1 * fix test_op_level1.py * fix lint * disable fp16 test if gpu does not support * disable fp16 test if gpu does not support * bypass float16 test if gpu does not support float16
anijain2305 · Oct 17, 2019 · 67aa0d0 · 67aa0d0
1 parent 959bcf7
commit 67aa0d0
Show file tree

Hide file tree

Showing 3 changed files with 251 additions and 203 deletions.
diff --git a/src/codegen/codegen_cuda.cc b/src/codegen/codegen_cuda.cc
@@ -50,6 +50,20 @@ void CodeGenCUDA::AddFunction(LoweredFunc f) {
 std::string CodeGenCUDA::Finish() {
   if (enable_fp16_) {
     decl_stream << "#include <cuda_fp16.h>\n";
+    decl_stream << "__device__ half max" \
+                    "(const half a, const half b)\n"
+                    "{\n  return __hgt(__half(a), __half(b)) ? a : b;\n}\n";
+    decl_stream << "__device__ half min(const half a, const half b)\n"
+                    "{\n  return __hlt(__half(a), __half(b)) ? a : b;\n}\n";
+    decl_stream << "__device__ half operator+" \
+                    "(const volatile __half &a,  const volatile __half &b)\n"
+                    "{\n  return __hadd(a, b);\n}\n";
+    decl_stream << "__device__ half operator<=" \
+                   "(const volatile __half &a,  const volatile __half &b)\n"
+                    "{\n  return __hlt(a, b);\n}\n";
+    decl_stream << "__device__ half operator*" \
+                    "(const volatile __half &a,  const volatile __half &b)\n"
+                    "{\n  return __hmul(a, b);\n}\n";
   }
 
   if (enable_int8_) {