From 8e10079c3c8baf607c01800b5e19102c32637cfc Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Mon, 13 May 2019 22:03:12 +0000 Subject: [PATCH] fp16 conversion --- python/tvm/relay/build_module.py | 2 +- tests/python/relay/test_cpp_build_module.py | 35 +++++++++++++++++++-- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py index e03de63749356..d0ad78fee67f8 100644 --- a/python/tvm/relay/build_module.py +++ b/python/tvm/relay/build_module.py @@ -155,7 +155,7 @@ def build(self, func, target=None, target_host=None, params=None): Host compilation target, if target is device. When TVM compiles device specific program such as CUDA, we also need host(CPU) side code to interact with the driver - setup the dimensions and parameters correctly. + to setup the dimensions and parameters correctly. target_host is used to specify the host side codegen target. By default, llvm is used if it is enabled, otherwise a stackvm intepreter is used. diff --git a/tests/python/relay/test_cpp_build_module.py b/tests/python/relay/test_cpp_build_module.py index 4d221f04f31c1..67eee0c8c651d 100644 --- a/tests/python/relay/test_cpp_build_module.py +++ b/tests/python/relay/test_cpp_build_module.py @@ -20,9 +20,6 @@ from tvm import relay from tvm.contrib.nvcc import have_fp16 -from tvm._ffi.function import _init_api -_init_api("tvm.relay.build_module") - def test_basic_build(): tgt = "llvm" @@ -99,6 +96,38 @@ def test_fp16_build(): atol=1e-5, rtol=1e-5) +def test_fp16_conversion(): + def check_conversion(tgt, ctx): + n = 10 + + for (src, dst) in [('float32', 'float16'), ('float16', 'float32')]: + x = relay.var("x", relay.TensorType((n,), src)) + y = x.astype(dst) + func = relay.Function([x], y) + + # init input + X = tvm.nd.array(n * np.random.randn(n).astype(src) - n / 2) + params = {"p0": X} + + # build + with relay.build_config(opt_level=1): + g_json, mmod, params = relay.build(func, tgt, params=params) + + # test + rt = tvm.contrib.graph_runtime.create(g_json, mmod, ctx) + rt.set_input("x", X) + rt.load_params(relay.save_param_dict(params)) + rt.run() + out = rt.get_output(0) + + np.testing.assert_allclose(out.asnumpy(), X.asnumpy().astype(dst), + atol=1e-5, rtol=1e-5) + + for target, ctx in [('llvm', tvm.cpu()), ('cuda', tvm.gpu())]: + check_conversion(target, ctx) + + if __name__ == "__main__": test_basic_build() test_fp16_build() + test_fp16_conversion()