Skip to content

Commit

Permalink
[TOPI] Support int4/int8 conv2d tensor core with HWNC layout (apache#…
Browse files Browse the repository at this point in the history
…6121)

* int4 tensorcore

* a draft for new int4 schedule

* update layout

* add inline option

* clean code

* increase search space

* fix kernel shape

* update intrinsic

* update intrinsic

* support int4/int8 hwnc layout

* remove useless code

* remove useless code

* remove useless code

* remove useless code

* fix int8 transpose

* fix assert

* add asf header

* CI

* CI

* CI

* fix bug

fix bug

Co-authored-by: Leyuan Wang <[email protected]>
  • Loading branch information
2 people authored and Trevor Morris committed Sep 2, 2020
1 parent 06ccdef commit 67a0a96
Show file tree
Hide file tree
Showing 6 changed files with 629 additions and 2 deletions.
19 changes: 19 additions & 0 deletions python/tvm/relay/op/strategy/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,25 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
wrap_topi_schedule(topi.cuda.schedule_conv2d_nhwc_tensorcore),
name="conv2d_nhwc_tensorcore.cuda",
plevel=20)
elif layout == "HWNC":
assert kernel_layout in ["HWOI", "HWOI16o16i", "HWOI8o32i", "HWOI32o16i"]
_, _, N, in_channels = get_const_tuple(data.shape)
pre_computed = len(kernel.shape) == 6
if pre_computed:
_, _, oc_chunk, _, oc_block_factor, _ = get_const_tuple(kernel.shape)
out_channels = oc_chunk * oc_block_factor
else:
_, _, out_channels, _ = get_const_tuple(kernel.shape)
if topi.cuda.is_shape_tensorcore_direct_qualified(
batch=N, in_channels=in_channels, num_filter=out_channels, in_dtype=data.dtype):
strategy.add_implementation(
wrap_compute_conv2d(topi.cuda.conv2d_hwnc_tensorcore),
wrap_topi_schedule(topi.cuda.schedule_conv2d_hwnc_tensorcore),
name="conv2d_hwnc_tensorcore_direct.cuda",
plevel=20)
else:
raise RuntimeError("Unsupported shape for conv2d HWNC.\
Need to satisfy tensor core schedule.")
elif layout == "NCHW4c" and data.dtype in ["int8", "uint8"]:
assert kernel_layout == "OIHW4o4i"
strategy.add_implementation(
Expand Down
1 change: 1 addition & 0 deletions python/tvm/topi/cuda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,6 @@
from .conv2d_nhwc_tensorcore import *
from .conv3d_ndhwc_tensorcore import *
from .dense_tensorcore import *
from .conv2d_hwnc_tensorcore import *
from .correlation import *
from .sparse import *
30 changes: 30 additions & 0 deletions python/tvm/topi/cuda/conv2d_alter_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,36 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
dispatch_ctx.update(target, new_workload, cfg)
return relay.nn.conv2d(*inputs, **new_attrs)

if topi_tmpl == "conv2d_HWNCnc_tensorcore.cuda":
assert data_layout == "HWNC" and kernel_layout == "HWOI"
assert float(tvm.gpu(0).compute_version) >= 7.5
H, W, N, CI = get_const_tuple(data.shape)
KH, KW, CO, _ = get_const_tuple(kernel.shape)

if kernel.dtype in ['int4', 'uint4'] and (CI % 32 != 0 or CO % 8 != 0) or \
kernel.dtype in ['int8', 'uint8'] and (CI % 16 != 0 or CO % 32 != 0):
return relay.nn.conv2d(*inputs, **new_attrs)

new_attrs["channels"] = CO
if kernel.dtype in ['int4', 'uint4']:
new_attrs['kernel_layout'] = 'HWOI8o32i'
ic_block_factor = 32
oc_block_factor = 8
else:
new_attrs['kernel_layout'] = 'HWOI32o16i'
ic_block_factor = 16
oc_block_factor = 32

new_kernel = te.placeholder((KH, KW, CO // oc_block_factor, CI // ic_block_factor,
oc_block_factor, ic_block_factor), dtype=kernel.dtype)

new_workload = autotvm.task.args_to_workload(
[data, new_kernel, strides, padding, dilation, out_dtype],
"conv2d_HWNCnc_tensorcore.cuda")

dispatch_ctx.update(target, new_workload, cfg)
return relay.nn.conv2d(*inputs, **new_attrs)

return None

@conv2d_legalize.register("cuda")
Expand Down
Loading

0 comments on commit 67a0a96

Please sign in to comment.