From fecc59409b2fb29e8a182490d05ee50f4bab53ae Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Fri, 13 Oct 2017 22:46:58 +0900
Subject: [PATCH 1/2] add masahi to contributors

---
 CONTRIBUTORS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 87ed9026ba11..ab9950a9f31d 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -34,3 +34,4 @@ List of Contributors
   - To contributors: please add your name to the list.
 - [Qiao Zhang](https://github.com/zhangqiaorjc)
 - [Jian Weng](https://github.com/were)
+- [Masahiro Masuda](https://github.com/masahi)

From c672e5f504d9602e9e99bc585e3c02ca21e4edd4 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Fri, 13 Oct 2017 00:34:06 +0900
Subject: [PATCH 2/2] enable rocm target in topi tests

---
 topi/tests/python/test_topi_broadcast.py      |  7 +++---
 topi/tests/python/test_topi_conv2d_hwcn.py    |  6 ++---
 topi/tests/python/test_topi_conv2d_nchw.py    |  6 ++---
 topi/tests/python/test_topi_dense.py          |  4 ++--
 .../python/test_topi_depthwise_conv2d.py      |  5 ++--
 .../test_topi_depthwise_conv2d_back_input.py  |  2 +-
 .../test_topi_depthwise_conv2d_back_weight.py |  2 +-
 topi/tests/python/test_topi_pooling.py        |  8 +++----
 topi/tests/python/test_topi_reduce.py         |  4 ++--
 topi/tests/python/test_topi_relu.py           |  4 ++--
 topi/tests/python/test_topi_softmax.py        |  8 +++----
 topi/tests/python/test_topi_transform.py      | 24 ++++++++++---------
 12 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/topi/tests/python/test_topi_broadcast.py b/topi/tests/python/test_topi_broadcast.py
index 029c12ec4081..e993b5324176 100644
--- a/topi/tests/python/test_topi_broadcast.py
+++ b/topi/tests/python/test_topi_broadcast.py
@@ -13,7 +13,7 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         foo = tvm.build(s, [A, B], device, name="broadcast_to")
         data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
         out_npy = np.broadcast_to(data_npy, out_shape)
@@ -27,6 +27,7 @@ def check_device(device):
     check_device("opencl")
     check_device("cuda")
     check_device("metal")
+    check_device("rocm")
 
 
 def verify_broadcast_binary_ele(lhs_shape, rhs_shape, typ="add"):
@@ -52,7 +53,7 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         foo = tvm.build(s, [A, B, C], device, name="broadcast_binary" + "_" + typ)
         lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype)
         rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype)
@@ -81,7 +82,7 @@ def check_device(device):
     check_device("opencl")
     check_device("cuda")
     check_device("metal")
-
+    check_device("rocm")
 
 def test_broadcast_to():
     verify_broadcast_to_ele((1,), (10,))
diff --git a/topi/tests/python/test_topi_conv2d_hwcn.py b/topi/tests/python/test_topi_conv2d_hwcn.py
index 94421969f79f..d40a702abf7e 100644
--- a/topi/tests/python/test_topi_conv2d_hwcn.py
+++ b/topi/tests/python/test_topi_conv2d_hwcn.py
@@ -34,14 +34,14 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         a = tvm.nd.array(a_np, ctx)
         w = tvm.nd.array(w_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
         c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
         with tvm.build_config(auto_unroll_max_step=32,
                               auto_unroll_min_depth=0,
-                              unroll_explicit=False):
+                              unroll_explicit=device == 'rocm'):
             func1 = tvm.build(s1, [A, W, B], device)
             func2 = tvm.build(s2, [A, W, C], device)
             func1(a, w, b)
@@ -49,7 +49,7 @@ def check_device(device):
             np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
             np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
 
-    for device in ['cuda', 'opencl', 'metal']:
+    for device in ['cuda', 'opencl', 'metal', 'rocm']:
         check_device(device)
 
 
diff --git a/topi/tests/python/test_topi_conv2d_nchw.py b/topi/tests/python/test_topi_conv2d_nchw.py
index bf9a144c7630..d2fc7cd5b2a3 100644
--- a/topi/tests/python/test_topi_conv2d_nchw.py
+++ b/topi/tests/python/test_topi_conv2d_nchw.py
@@ -35,14 +35,14 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         a = tvm.nd.array(a_np, ctx)
         w = tvm.nd.array(w_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
         c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
         with tvm.build_config(auto_unroll_max_step=32,
                               auto_unroll_min_depth=0,
-                              unroll_explicit=False):
+                              unroll_explicit=device == 'rocm'):
             func1 = tvm.build(s1, [A, W, B], device)
             func2 = tvm.build(s2, [A, W, C], device)
             func1(a, w, b)
@@ -50,7 +50,7 @@ def check_device(device):
             np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
             np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
 
-    for device in ['cuda', 'opencl', 'metal']:
+    for device in ['cuda', 'opencl', 'metal', 'rocm']:
         check_device(device)
 
 
diff --git a/topi/tests/python/test_topi_dense.py b/topi/tests/python/test_topi_dense.py
index fe0b5d3918ae..94dcb14af4d2 100644
--- a/topi/tests/python/test_topi_dense.py
+++ b/topi/tests/python/test_topi_dense.py
@@ -33,7 +33,7 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(b_np, ctx)
         c = tvm.nd.array(c_np, ctx)
@@ -42,7 +42,7 @@ def check_device(device):
         f(a, b, c, d)
         np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
 
-    for device in ['cuda', 'opencl', 'metal']:
+    for device in ['cuda', 'opencl', 'metal', 'rocm']:
         check_device(device)
 
 def test_dense():
diff --git a/topi/tests/python/test_topi_depthwise_conv2d.py b/topi/tests/python/test_topi_depthwise_conv2d.py
index 92fdac865a81..b0f09c55ca4e 100644
--- a/topi/tests/python/test_topi_depthwise_conv2d.py
+++ b/topi/tests/python/test_topi_depthwise_conv2d.py
@@ -87,7 +87,8 @@ def get_ref_data():
     check_device("opencl")
     check_device("cuda")
     check_device("metal")
-
+    check_device("rocm")
+    
 def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_multiplier, filter_height, stride_h, padding):
     in_width = in_height
     filter_channel = in_channel
@@ -170,7 +171,7 @@ def get_ref_data():
     check_device("opencl")
     check_device("cuda")
     check_device("metal")
-
+    check_device("rocm")
 
 def test_depthwise_conv2d():
     print("testing nchw")
diff --git a/topi/tests/python/test_topi_depthwise_conv2d_back_input.py b/topi/tests/python/test_topi_depthwise_conv2d_back_input.py
index 7dac05fb6abc..616c8e57795d 100644
--- a/topi/tests/python/test_topi_depthwise_conv2d_back_input.py
+++ b/topi/tests/python/test_topi_depthwise_conv2d_back_input.py
@@ -83,7 +83,7 @@ def get_ref_data():
     check_device("opencl")
     check_device("cuda")
     check_device("metal")
-
+    check_device("rocm")
 
 def test_topi_depthwise_conv2d_backward_input_nhwc():
     verify_depthwise_conv2d_back_input(16, 256, 56, 1, 3, 1, 1)
diff --git a/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py b/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py
index 1a8bfe9d7566..4a4d2552ccfd 100644
--- a/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py
+++ b/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py
@@ -76,7 +76,7 @@ def get_ref_data():
     check_device("opencl")
     check_device("cuda")
     check_device("metal")
-
+    check_device("rocm")
 
 def test_topi_depthwise_conv2d_backward_weight_nhwc():
     verify_depthwise_conv2d_back_weight(16, 256, 56, 1, 3, 1, 1)
diff --git a/topi/tests/python/test_topi_pooling.py b/topi/tests/python/test_topi_pooling.py
index 20d40901aa12..88b5a769093c 100644
--- a/topi/tests/python/test_topi_pooling.py
+++ b/topi/tests/python/test_topi_pooling.py
@@ -36,14 +36,14 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
         f = tvm.build(s, [A, B], device)
         f(a, b)
         np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 
-    for device in ['cuda', 'opencl', 'metal']:
+    for device in ['cuda', 'opencl', 'metal', 'rocm']:
         check_device(device)
 
 def test_pool():
@@ -70,14 +70,14 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
         f = tvm.build(s, [A, B], device)
         f(a, b)
         np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 
-    for device in ['cuda', 'opencl', 'metal']:
+    for device in ['cuda', 'opencl', 'metal', 'rocm']:
         check_device(device)
 
 def test_global_pool():
diff --git a/topi/tests/python/test_topi_reduce.py b/topi/tests/python/test_topi_reduce.py
index c3b89c975740..7922090ec112 100644
--- a/topi/tests/python/test_topi_reduce.py
+++ b/topi/tests/python/test_topi_reduce.py
@@ -50,7 +50,7 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         foo = tvm.build(s, [A, B], device, name="sum")
         # Test
         in_npy = np.random.uniform(size=in_shape).astype(np.float32)
@@ -76,7 +76,7 @@ def check_device(device):
     check_device("opencl")
     check_device("cuda")
     check_device("metal")
-
+    check_device("rocm")
 
 def test_reduce_map():
     verify_reduce_map_ele(in_shape=(128, 24, 128, 24),
diff --git a/topi/tests/python/test_topi_relu.py b/topi/tests/python/test_topi_relu.py
index 2b0f11dceafc..3307100043d1 100644
--- a/topi/tests/python/test_topi_relu.py
+++ b/topi/tests/python/test_topi_relu.py
@@ -17,14 +17,14 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
         foo = tvm.build(s, [A, B], device, name="relu")
         foo(a, b)
         np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 
-    for device in ['cuda', 'opencl', 'metal']:
+    for device in ['cuda', 'opencl', 'metal', 'rocm']:
         check_device(device)
 
 
diff --git a/topi/tests/python/test_topi_softmax.py b/topi/tests/python/test_topi_softmax.py
index cef5762295e5..7359c6fa56f0 100644
--- a/topi/tests/python/test_topi_softmax.py
+++ b/topi/tests/python/test_topi_softmax.py
@@ -21,14 +21,14 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
         foo = tvm.build(s, [A, B], device, name="softmax")
         foo(a, b)
         np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 
-    for device in ['cuda', 'opencl', 'metal']:
+    for device in ['cuda', 'opencl', 'metal', 'rocm']:
         check_device(device)
 
 def test_softmax():
@@ -52,14 +52,14 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
         foo = tvm.build(s, [A, B], device, name="log_softmax")
         foo(a, b)
         np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 
-    for device in ['cuda', 'opencl', 'metal']:
+    for device in ['cuda', 'opencl', 'metal', 'rocm']:
         check_device(device)
 
 def test_log_softmax():
diff --git a/topi/tests/python/test_topi_transform.py b/topi/tests/python/test_topi_transform.py
index 1113856fbfdb..623c8c6185ab 100644
--- a/topi/tests/python/test_topi_transform.py
+++ b/topi/tests/python/test_topi_transform.py
@@ -11,7 +11,7 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         foo = tvm.build(s, [A, B], device, name="expand_dims")
         data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
         out_npy = data_npy.reshape(out_shape)
@@ -23,6 +23,7 @@ def check_device(device):
     check_device("opencl")
     check_device("cuda")
     check_device("metal")
+    check_device("rocm")    
 
 
 def verify_tranpose(in_shape, axes):
@@ -33,7 +34,7 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         foo = tvm.build(s, [A, B], device, name="tranpose")
         data_npy = np.arange(np.prod(in_shape)).reshape(in_shape).astype(A.dtype)
         out_npy = data_npy.transpose(axes)
@@ -45,7 +46,7 @@ def check_device(device):
     check_device("cuda")
     check_device("opencl")
     check_device("metal")
-
+    check_device("rocm")    
 
 def verify_reshape(src_shape, dst_shape):
     A = tvm.placeholder(shape=src_shape, name="A")
@@ -55,7 +56,7 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         foo = tvm.build(s, [A, B], device, name="reshape")
         data_npy = np.random.normal(size=src_shape).astype(A.dtype)
         out_npy = np.reshape(data_npy, newshape=dst_shape)
@@ -67,7 +68,7 @@ def check_device(device):
     check_device("cuda")
     check_device("opencl")
     check_device("metal")
-
+    check_device("rocm")    
 
 def verify_squeeze(src_shape, axis):
     A = tvm.placeholder(shape=src_shape, name="A")
@@ -77,7 +78,7 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         foo = tvm.build(s, [A, B], device, name="squeeze")
         data_npy = np.random.normal(size=src_shape).astype(A.dtype)
         out_npy = np.squeeze(data_npy, axis=axis)
@@ -93,7 +94,7 @@ def check_device(device):
     check_device("cuda")
     check_device("opencl")
     check_device("metal")
-
+    check_device("rocm")    
 
 def verify_concatenate(shapes, axis):
     tensor_l = []
@@ -105,7 +106,7 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         foo = tvm.build(s, tensor_l + [out_tensor], device, name="concatenate")
         data_npys = [np.random.normal(size=shape).astype(tensor_l[0].dtype) for shape in shapes]
         out_npy = np.concatenate(data_npys, axis=axis)
@@ -117,7 +118,7 @@ def check_device(device):
     check_device("cuda")
     check_device("opencl")
     check_device("metal")
-
+    check_device("rocm")    
 
 def verify_split(src_shape, indices_or_sections, axis):
     A = tvm.placeholder(shape=src_shape, name="A")
@@ -127,7 +128,7 @@ def check_device(device):
         if not tvm.module.enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
+        ctx = tvm.context(device, 0)
         foo = tvm.build(s, [A] + tensor_l, device, name="split")
         data_npy = np.random.normal(size=src_shape).astype(A.dtype)
         out_npys = np.split(data_npy, indices_or_sections, axis=axis)
@@ -140,7 +141,8 @@ def check_device(device):
     check_device("cuda")
     check_device("opencl")
     check_device("metal")
-
+    check_device("rocm")
+    
 def test_expand_dims():
     verify_expand_dims((3, 10), (3, 10, 1, 1), 2, 2)
     verify_expand_dims((3, 10), (1, 3, 10), -3, 1)