diff --git a/src/target/source/codegen_cuda.cc b/src/target/source/codegen_cuda.cc index 2239cef92060..d96e0cbc1679 100644 --- a/src/target/source/codegen_cuda.cc +++ b/src/target/source/codegen_cuda.cc @@ -1197,8 +1197,10 @@ inline void PrintConst(const FloatImmNode* op, std::ostream& os, CodeGenCUDA* p) break; } case 16: { - os << "__float2half_rn"; - os << '(' << std::scientific << op->value << 'f' << ')'; + os << "__float2half_rn" << '('; + FloatImm const_f32 = FloatImm(DataType::Float(32), op->value); + PrintConst(const_f32.get(), os, p); + os << ')'; break; } default: diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index 2fe40ae2f88e..400f7dcf0b42 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -1344,7 +1344,7 @@ def verify_gather_nd(xshape, yshape, y_data, batch_dims=0, indices_dtype="int32" verify_gather_nd((2, 2, 2), (2, 2, 1), [[[1], [0]], [[0], [1]]], 1, indices_dtype="uint32") -def _verify_infiniteness_ops(relay_op, ref_op): +def _verify_infiniteness_ops(relay_op, ref_op, target="llvm", dev=None): for dtype in ["float32", "float16", "float16", "int32", "int16"]: shape = (2, 8, 8) x = relay.var("x", relay.TensorType(shape, dtype)) @@ -1359,17 +1359,25 @@ def _verify_infiniteness_ops(relay_op, ref_op): ] = np.infty data.ravel()[np.random.choice(data.size, int(data.size * 0.5), replace=False)] = np.nan - op_res = create_executor().evaluate(y, {x: data}) + op_res = create_executor(target=target, device=dev).evaluate(y, {x: data}) ref_res = ref_op(data) np.testing.assert_allclose(op_res.numpy(), ref_res, rtol=0.01) +@tvm.testing.requires_gpu def test_isfinite(): - _verify_infiniteness_ops(relay.isfinite, np.isfinite) + for target, dev in tvm.testing.enabled_targets(): + if target not in ["llvm", "cuda"]: + continue + _verify_infiniteness_ops(relay.isfinite, np.isfinite, target=target, dev=dev) +@tvm.testing.requires_gpu def test_isinf(): - _verify_infiniteness_ops(relay.isinf, np.isinf) + for target, dev in tvm.testing.enabled_targets(): + if target not in ["llvm", "cuda"]: + continue + _verify_infiniteness_ops(relay.isinf, np.isinf, target=target, dev=dev) def test_unravel_index(target, dev, executor_kind):