diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 87ed9026ba11..ab9950a9f31d 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -34,3 +34,4 @@ List of Contributors - To contributors: please add your name to the list. - [Qiao Zhang](https://github.com/zhangqiaorjc) - [Jian Weng](https://github.com/were) +- [Masahiro Masuda](https://github.com/masahi) diff --git a/topi/tests/python/test_topi_broadcast.py b/topi/tests/python/test_topi_broadcast.py index 029c12ec4081..e993b5324176 100644 --- a/topi/tests/python/test_topi_broadcast.py +++ b/topi/tests/python/test_topi_broadcast.py @@ -13,7 +13,7 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) foo = tvm.build(s, [A, B], device, name="broadcast_to") data_npy = np.random.uniform(size=in_shape).astype(A.dtype) out_npy = np.broadcast_to(data_npy, out_shape) @@ -27,6 +27,7 @@ def check_device(device): check_device("opencl") check_device("cuda") check_device("metal") + check_device("rocm") def verify_broadcast_binary_ele(lhs_shape, rhs_shape, typ="add"): @@ -52,7 +53,7 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) foo = tvm.build(s, [A, B, C], device, name="broadcast_binary" + "_" + typ) lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype) rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype) @@ -81,7 +82,7 @@ def check_device(device): check_device("opencl") check_device("cuda") check_device("metal") - + check_device("rocm") def test_broadcast_to(): verify_broadcast_to_ele((1,), (10,)) diff --git a/topi/tests/python/test_topi_conv2d_hwcn.py b/topi/tests/python/test_topi_conv2d_hwcn.py index 94421969f79f..d40a702abf7e 100644 --- a/topi/tests/python/test_topi_conv2d_hwcn.py +++ b/topi/tests/python/test_topi_conv2d_hwcn.py @@ -34,14 +34,14 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx) with tvm.build_config(auto_unroll_max_step=32, auto_unroll_min_depth=0, - unroll_explicit=False): + unroll_explicit=device == 'rocm'): func1 = tvm.build(s1, [A, W, B], device) func2 = tvm.build(s2, [A, W, C], device) func1(a, w, b) @@ -49,7 +49,7 @@ def check_device(device): np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5) - for device in ['cuda', 'opencl', 'metal']: + for device in ['cuda', 'opencl', 'metal', 'rocm']: check_device(device) diff --git a/topi/tests/python/test_topi_conv2d_nchw.py b/topi/tests/python/test_topi_conv2d_nchw.py index bf9a144c7630..d2fc7cd5b2a3 100644 --- a/topi/tests/python/test_topi_conv2d_nchw.py +++ b/topi/tests/python/test_topi_conv2d_nchw.py @@ -35,14 +35,14 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx) with tvm.build_config(auto_unroll_max_step=32, auto_unroll_min_depth=0, - unroll_explicit=False): + unroll_explicit=device == 'rocm'): func1 = tvm.build(s1, [A, W, B], device) func2 = tvm.build(s2, [A, W, C], device) func1(a, w, b) @@ -50,7 +50,7 @@ def check_device(device): np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5) - for device in ['cuda', 'opencl', 'metal']: + for device in ['cuda', 'opencl', 'metal', 'rocm']: check_device(device) diff --git a/topi/tests/python/test_topi_dense.py b/topi/tests/python/test_topi_dense.py index fe0b5d3918ae..94dcb14af4d2 100644 --- a/topi/tests/python/test_topi_dense.py +++ b/topi/tests/python/test_topi_dense.py @@ -33,7 +33,7 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(b_np, ctx) c = tvm.nd.array(c_np, ctx) @@ -42,7 +42,7 @@ def check_device(device): f(a, b, c, d) np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5) - for device in ['cuda', 'opencl', 'metal']: + for device in ['cuda', 'opencl', 'metal', 'rocm']: check_device(device) def test_dense(): diff --git a/topi/tests/python/test_topi_depthwise_conv2d.py b/topi/tests/python/test_topi_depthwise_conv2d.py index 92fdac865a81..b0f09c55ca4e 100644 --- a/topi/tests/python/test_topi_depthwise_conv2d.py +++ b/topi/tests/python/test_topi_depthwise_conv2d.py @@ -87,7 +87,8 @@ def get_ref_data(): check_device("opencl") check_device("cuda") check_device("metal") - + check_device("rocm") + def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_multiplier, filter_height, stride_h, padding): in_width = in_height filter_channel = in_channel @@ -170,7 +171,7 @@ def get_ref_data(): check_device("opencl") check_device("cuda") check_device("metal") - + check_device("rocm") def test_depthwise_conv2d(): print("testing nchw") diff --git a/topi/tests/python/test_topi_depthwise_conv2d_back_input.py b/topi/tests/python/test_topi_depthwise_conv2d_back_input.py index 7dac05fb6abc..616c8e57795d 100644 --- a/topi/tests/python/test_topi_depthwise_conv2d_back_input.py +++ b/topi/tests/python/test_topi_depthwise_conv2d_back_input.py @@ -83,7 +83,7 @@ def get_ref_data(): check_device("opencl") check_device("cuda") check_device("metal") - + check_device("rocm") def test_topi_depthwise_conv2d_backward_input_nhwc(): verify_depthwise_conv2d_back_input(16, 256, 56, 1, 3, 1, 1) diff --git a/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py b/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py index 1a8bfe9d7566..4a4d2552ccfd 100644 --- a/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py +++ b/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py @@ -76,7 +76,7 @@ def get_ref_data(): check_device("opencl") check_device("cuda") check_device("metal") - + check_device("rocm") def test_topi_depthwise_conv2d_backward_weight_nhwc(): verify_depthwise_conv2d_back_weight(16, 256, 56, 1, 3, 1, 1) diff --git a/topi/tests/python/test_topi_pooling.py b/topi/tests/python/test_topi_pooling.py index 20d40901aa12..88b5a769093c 100644 --- a/topi/tests/python/test_topi_pooling.py +++ b/topi/tests/python/test_topi_pooling.py @@ -36,14 +36,14 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) - for device in ['cuda', 'opencl', 'metal']: + for device in ['cuda', 'opencl', 'metal', 'rocm']: check_device(device) def test_pool(): @@ -70,14 +70,14 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) - for device in ['cuda', 'opencl', 'metal']: + for device in ['cuda', 'opencl', 'metal', 'rocm']: check_device(device) def test_global_pool(): diff --git a/topi/tests/python/test_topi_reduce.py b/topi/tests/python/test_topi_reduce.py index c3b89c975740..7922090ec112 100644 --- a/topi/tests/python/test_topi_reduce.py +++ b/topi/tests/python/test_topi_reduce.py @@ -50,7 +50,7 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) foo = tvm.build(s, [A, B], device, name="sum") # Test in_npy = np.random.uniform(size=in_shape).astype(np.float32) @@ -76,7 +76,7 @@ def check_device(device): check_device("opencl") check_device("cuda") check_device("metal") - + check_device("rocm") def test_reduce_map(): verify_reduce_map_ele(in_shape=(128, 24, 128, 24), diff --git a/topi/tests/python/test_topi_relu.py b/topi/tests/python/test_topi_relu.py index 2b0f11dceafc..3307100043d1 100644 --- a/topi/tests/python/test_topi_relu.py +++ b/topi/tests/python/test_topi_relu.py @@ -17,14 +17,14 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) foo = tvm.build(s, [A, B], device, name="relu") foo(a, b) np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) - for device in ['cuda', 'opencl', 'metal']: + for device in ['cuda', 'opencl', 'metal', 'rocm']: check_device(device) diff --git a/topi/tests/python/test_topi_softmax.py b/topi/tests/python/test_topi_softmax.py index cef5762295e5..7359c6fa56f0 100644 --- a/topi/tests/python/test_topi_softmax.py +++ b/topi/tests/python/test_topi_softmax.py @@ -21,14 +21,14 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) foo = tvm.build(s, [A, B], device, name="softmax") foo(a, b) np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) - for device in ['cuda', 'opencl', 'metal']: + for device in ['cuda', 'opencl', 'metal', 'rocm']: check_device(device) def test_softmax(): @@ -52,14 +52,14 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) foo = tvm.build(s, [A, B], device, name="log_softmax") foo(a, b) np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) - for device in ['cuda', 'opencl', 'metal']: + for device in ['cuda', 'opencl', 'metal', 'rocm']: check_device(device) def test_log_softmax(): diff --git a/topi/tests/python/test_topi_transform.py b/topi/tests/python/test_topi_transform.py index 1113856fbfdb..623c8c6185ab 100644 --- a/topi/tests/python/test_topi_transform.py +++ b/topi/tests/python/test_topi_transform.py @@ -11,7 +11,7 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) foo = tvm.build(s, [A, B], device, name="expand_dims") data_npy = np.random.uniform(size=in_shape).astype(A.dtype) out_npy = data_npy.reshape(out_shape) @@ -23,6 +23,7 @@ def check_device(device): check_device("opencl") check_device("cuda") check_device("metal") + check_device("rocm") def verify_tranpose(in_shape, axes): @@ -33,7 +34,7 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) foo = tvm.build(s, [A, B], device, name="tranpose") data_npy = np.arange(np.prod(in_shape)).reshape(in_shape).astype(A.dtype) out_npy = data_npy.transpose(axes) @@ -45,7 +46,7 @@ def check_device(device): check_device("cuda") check_device("opencl") check_device("metal") - + check_device("rocm") def verify_reshape(src_shape, dst_shape): A = tvm.placeholder(shape=src_shape, name="A") @@ -55,7 +56,7 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) foo = tvm.build(s, [A, B], device, name="reshape") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.reshape(data_npy, newshape=dst_shape) @@ -67,7 +68,7 @@ def check_device(device): check_device("cuda") check_device("opencl") check_device("metal") - + check_device("rocm") def verify_squeeze(src_shape, axis): A = tvm.placeholder(shape=src_shape, name="A") @@ -77,7 +78,7 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) foo = tvm.build(s, [A, B], device, name="squeeze") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.squeeze(data_npy, axis=axis) @@ -93,7 +94,7 @@ def check_device(device): check_device("cuda") check_device("opencl") check_device("metal") - + check_device("rocm") def verify_concatenate(shapes, axis): tensor_l = [] @@ -105,7 +106,7 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) foo = tvm.build(s, tensor_l + [out_tensor], device, name="concatenate") data_npys = [np.random.normal(size=shape).astype(tensor_l[0].dtype) for shape in shapes] out_npy = np.concatenate(data_npys, axis=axis) @@ -117,7 +118,7 @@ def check_device(device): check_device("cuda") check_device("opencl") check_device("metal") - + check_device("rocm") def verify_split(src_shape, indices_or_sections, axis): A = tvm.placeholder(shape=src_shape, name="A") @@ -127,7 +128,7 @@ def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return - ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + ctx = tvm.context(device, 0) foo = tvm.build(s, [A] + tensor_l, device, name="split") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npys = np.split(data_npy, indices_or_sections, axis=axis) @@ -140,7 +141,8 @@ def check_device(device): check_device("cuda") check_device("opencl") check_device("metal") - + check_device("rocm") + def test_expand_dims(): verify_expand_dims((3, 10), (3, 10, 1, 1), 2, 2) verify_expand_dims((3, 10), (1, 3, 10), -3, 1)