From 6df148423372e079f8d5efb89128d0870412ae68 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sat, 27 Jun 2020 00:09:02 +0000 Subject: [PATCH 1/2] fix x86 conv2d and conv2d_transpose template --- topi/python/topi/x86/conv2d_avx_1x1.py | 2 ++ topi/python/topi/x86/conv2d_avx_common.py | 2 ++ topi/python/topi/x86/conv2d_transpose.py | 14 ++++++++------ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/topi/python/topi/x86/conv2d_avx_1x1.py b/topi/python/topi/x86/conv2d_avx_1x1.py index 978c4b9adb30..87c778ef795e 100644 --- a/topi/python/topi/x86/conv2d_avx_1x1.py +++ b/topi/python/topi/x86/conv2d_avx_1x1.py @@ -89,6 +89,8 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): s[kernel_vec].vectorize(oc_block) parallel_axis = s[kernel_vec].fuse(oc_chunk, oh) s[kernel_vec].parallel(parallel_axis) + else: + oc_bn = cfg['tile_oc'].size[-1] C, O = conv_out, last CC = s.cache_write(C, 'global') diff --git a/topi/python/topi/x86/conv2d_avx_common.py b/topi/python/topi/x86/conv2d_avx_common.py index a88d168194fc..e010916a4a6b 100644 --- a/topi/python/topi/x86/conv2d_avx_common.py +++ b/topi/python/topi/x86/conv2d_avx_common.py @@ -111,6 +111,8 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): s[kernel_vec].vectorize(oc_block) parallel_axis = s[kernel_vec].fuse(oc_chunk, oh) s[kernel_vec].parallel(parallel_axis) + else: + oc_bn = cfg['tile_oc'].size[-1] # schedule 5-D NCHW[x]c conv diff --git a/topi/python/topi/x86/conv2d_transpose.py b/topi/python/topi/x86/conv2d_transpose.py index f90edb5e2911..d3248c45cb42 100644 --- a/topi/python/topi/x86/conv2d_transpose.py +++ b/topi/python/topi/x86/conv2d_transpose.py @@ -37,14 +37,16 @@ def _callback(op): conv_out = op.input_tensors[0] # retrieve data data_vec = conv_out.op.input_tensors[0] - data_pad = data_vec.op.input_tensors[0] - data_dilate = data_pad.op.input_tensors[0] - s[data_dilate].compute_inline() - s[data_pad].compute_inline() + if isinstance(data_vec, te.ComputeOp): + data_pad = data_vec.op.input_tensors[0] + data_dilate = data_pad.op.input_tensors[0] + s[data_dilate].compute_inline() + s[data_pad].compute_inline() # retrieve kernel kernel_vec = conv_out.op.input_tensors[1] - kernel_transform = kernel_vec.op.input_tensors[0] - s[kernel_transform].compute_inline() + if isinstance(kernel_vec, te.ComputeOp): + kernel_transform = kernel_vec.op.input_tensors[0] + s[kernel_transform].compute_inline() traverse_inline(s, outs[0].op, _callback) return s From 57772927af584048941fdda3c9a5ea43a6494db0 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 30 Jun 2020 11:30:32 -0700 Subject: [PATCH 2/2] address comments --- topi/python/topi/x86/conv2d_avx_1x1.py | 4 +--- topi/python/topi/x86/conv2d_avx_common.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/topi/python/topi/x86/conv2d_avx_1x1.py b/topi/python/topi/x86/conv2d_avx_1x1.py index 87c778ef795e..c6ed83221555 100644 --- a/topi/python/topi/x86/conv2d_avx_1x1.py +++ b/topi/python/topi/x86/conv2d_avx_1x1.py @@ -73,6 +73,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): s[data_vec].parallel(parallel_axis) data_vec = data_vec.op.input_tensors[0] + oc_bn = cfg["tile_oc"].size[-1] if isinstance(kernel_vec.op, tvm.te.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. @@ -84,13 +85,10 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): oc_chunk, ic_chunk, oh, ow, ic_block, oc_block = s[kernel_vec].op.axis s[kernel_vec].reorder(oc_chunk, oh, ic_chunk, ow, ic_block, oc_block) - oc_bn = cfg["tile_oc"].size[-1] if oc_bn > 1: s[kernel_vec].vectorize(oc_block) parallel_axis = s[kernel_vec].fuse(oc_chunk, oh) s[kernel_vec].parallel(parallel_axis) - else: - oc_bn = cfg['tile_oc'].size[-1] C, O = conv_out, last CC = s.cache_write(C, 'global') diff --git a/topi/python/topi/x86/conv2d_avx_common.py b/topi/python/topi/x86/conv2d_avx_common.py index e010916a4a6b..aea954fe8d9e 100644 --- a/topi/python/topi/x86/conv2d_avx_common.py +++ b/topi/python/topi/x86/conv2d_avx_common.py @@ -95,6 +95,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): s[data_vec].parallel(parallel_axis) data_vec = data_vec.op.input_tensors[0] + oc_bn = cfg["tile_oc"].size[-1] if isinstance(kernel_vec.op, tvm.te.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. @@ -106,13 +107,10 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): oc_chunk, ic_chunk, oh, ow, ic_block, oc_block = s[kernel_vec].op.axis s[kernel_vec].reorder(oc_chunk, oh, ic_chunk, ow, ic_block, oc_block) - oc_bn = cfg["tile_oc"].size[-1] if oc_bn > 1: s[kernel_vec].vectorize(oc_block) parallel_axis = s[kernel_vec].fuse(oc_chunk, oh) s[kernel_vec].parallel(parallel_axis) - else: - oc_bn = cfg['tile_oc'].size[-1] # schedule 5-D NCHW[x]c conv