Skip to content

Commit

Permalink
[TOPI][CUDA] Fix Winograd Kernel Size Support (#4276)
Browse files Browse the repository at this point in the history
* fix_winograd_cuda_kernel_size

* add unit test
  • Loading branch information
comaniac authored and vinx13 committed Nov 8, 2019
1 parent 5bcd331 commit 76b7967
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 3 deletions.
72 changes: 72 additions & 0 deletions tests/python/relay/test_op_level2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"""
import numpy as np
import tvm
from tvm import autotvm
from tvm import relay
from tvm.relay import transform
from tvm.relay.testing import ctx_list
Expand Down Expand Up @@ -174,6 +175,76 @@ def run_test_conv2d(dtype, out_dtype, scale, dshape, kshape,
run_test_conv2d("float32", "float32", 1, dshape, kshape,
padding=(1, 1), channels=10, kernel_size=(3 ,3), dilation=(3, 3))

def test_conv2d_winograd():
class WinogradFallback(autotvm.FallbackContext):
def _query_inside(self, target, workload):
key = (target, workload)
if key in self.memory:
return self.memory[key]
cfg = autotvm.task.space.FallbackConfigEntity()
cfg.template_key = 'winograd'
cfg.is_fallback = False
cfg['tile_b'] = autotvm.task.space.SplitEntity([-1, 1, 1, 1])
cfg['tile_y'] = autotvm.task.space.SplitEntity([-1, 1, 1, 1])
cfg['tile_x'] = autotvm.task.space.SplitEntity([-1, 1, 1, 1])
cfg['tile_rc'] = autotvm.task.space.SplitEntity([-1, 1])
cfg['auto_unroll_max_setp'] = autotvm.task.space.OtherOptionEntity(1500)
cfg['unroll_explicit'] = autotvm.task.space.OtherOptionEntity(1)
self.memory[key] = cfg
return cfg

def run_test_conv2d_cuda(dtype, out_dtype, scale, dshape, kshape,
padding=(1, 1),
groups=1,
dilation=(1, 1),
**attrs):

x = relay.var("x", shape=dshape, dtype=dtype)
w = relay.var("w", shape=kshape, dtype=dtype)
y = relay.nn.conv2d(x, w,
padding=padding,
dilation=dilation,
groups=groups,
**attrs)
func = relay.Function([x, w], y)
mod = relay.Module()
mod['main'] = func
mod = relay.transform.InferType()(mod)

data = np.random.uniform(-scale, scale, size=dshape).astype(dtype)
kernel = np.random.uniform(-scale, scale, size=kshape).astype(dtype)
ref_res = topi.testing.conv2d_nchw_python(
data.astype(out_dtype), kernel.astype(out_dtype), 1, padding,
groups=groups)

with WinogradFallback(), relay.build_config(opt_level=3):
for target, ctx in ctx_list():
if target != 'cuda':
continue
params = {'w': tvm.nd.array(kernel)}
graph, lib, params = relay.build_module.build(mod, target=target, params=params)
module = tvm.contrib.graph_runtime.create(graph, lib, ctx)
module.set_input('x', tvm.nd.array(data))
module.set_input(**params)
module.run()
op_res1 = module.get_output(0)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-3, atol=1e-3)

# normal winograd: stride 1, padding 1, kernel 3x3
dshape = (1, 80, 73, 73)
kshape = (192, 80, 3, 3)
run_test_conv2d_cuda("float32", "float32", 1, dshape, kshape,
padding=(1, 1), channels=192, kernel_size=(3, 3))
# extended winograd: stride 1, padding N, kernel 3x3
run_test_conv2d_cuda("float32", "float32", 1, dshape, kshape,
padding=(0, 0), channels=192, kernel_size=(3, 3))
run_test_conv2d_cuda("float32", "float32", 1, dshape, kshape,
padding=(2, 2), channels=192, kernel_size=(3, 3))
# extended winograd: stride 1, padding N, kernel NxN
kshape = (192, 80, 7, 7)
run_test_conv2d_cuda("float32", "float32", 1, dshape, kshape,
padding=(2, 2), channels=192, kernel_size=(7, 7))


def test_conv2d_transpose_infer_type():
# symbolic in batch dimension
Expand Down Expand Up @@ -702,6 +773,7 @@ def test_bitpack_infer_type():
test_conv2d_transpose_infer_type()
test_conv2d_transpose_run()
test_conv2d_run()
test_conv2d_winograd()
test_bitserial_conv2d_infer_type()
test_batch_flatten()
test_upsampling()
Expand Down
6 changes: 3 additions & 3 deletions topi/python/topi/cuda/conv2d_winograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,20 +55,20 @@ def winograd_cuda(cfg, data, kernel, strides, padding, dilation, layout, out_dty
if dilation_h != 1 or dilation_w != 1:
kernel = dilation(kernel, (1, 1, dilation_h, dilation_w))
CO, CI, KH, KW = get_const_tuple(kernel.shape)
alpha = KW + tile_size - 1
assert HSTR == 1 and WSTR == 1 and KH == KW
else:
# kernel tensor is pre-transfomred. this op is created by alter op layout.
# dilation is not supported
_, _, CI, CO = get_const_tuple(kernel.shape)
KH = KW = 3
alpha, _, CI, CO = get_const_tuple(kernel.shape)
KH = KW = alpha + 1 - tile_size
assert HSTR == 1 and WSTR == 1 and dilation_h == 1 and dilation_w == 1

HPAD, WPAD, _, _ = nn.get_pad_tuple(padding, kernel)
data_pad = nn.pad(data, (0, 0, HPAD, WPAD), (0, 0, HPAD, WPAD), name="data_pad")

r = KW
m = tile_size
alpha = m + r - 1
A, B, G = winograd_transform_matrices(m, r, out_dtype)

H = (H + 2 * HPAD - KH) // HSTR + 1
Expand Down

0 comments on commit 76b7967

Please sign in to comment.