Skip to content

Commit

Permalink
add ic_bn % 4 checker
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhliu committed Sep 30, 2019
1 parent 4bee735 commit ffd7ca0
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
2 changes: 1 addition & 1 deletion topi/python/topi/arm_cpu/tensor_intrin.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def dot_int8_int8_int32(int32_lanes, dtype='uint'):
intrin : TensorIntrin
The ARM uint8 TensorIntrin that can be used in tensorizing schedule
"""
num_int8_elements = 4 # 4 uint8 elements in int32
num_int8_elements = 4 # 4 int8 elements in int32

data = tvm.placeholder((num_int8_elements,), dtype='%s8' % dtype, name='data')
kernel = tvm.placeholder((int32_lanes, num_int8_elements), dtype='%s8' % dtype, name='kernel')
Expand Down
3 changes: 2 additions & 1 deletion topi/python/topi/generic/conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ def schedule_conv_NCHWc_cpu_common_int8(s, cfg, data, conv_out, last, int32_lane

ow_chunk, ow_block = s[CC].split(ow, factor=reg_n)

# Skylake and future processors have 16 vector lanes
assert oc_bn % int32_lanes == 0
assert ic_bn % 4 == 0 # 4 (u)int8 elements in (u)int32

oc_f_inner, oc_s_inner = s[CC].split(oc_block, factor=int32_lanes)

Expand Down Expand Up @@ -209,6 +209,7 @@ def schedule_conv_NCHWc_cpu_1x1_int8(s, cfg, data, conv_out, last, int32_lanes=1
kh, kw, ic_outer, ic_f_inner, ic_s_inner = s[CC].op.reduce_axis

assert oc_bn % int32_lanes == 0
assert ic_bn % 4 == 0 # 4 (u)int8 elements in (u)int32

oc_f_inner, oc_s_inner = s[CC].split(oc_block, factor=int32_lanes)

Expand Down

0 comments on commit ffd7ca0

Please sign in to comment.