From 5ef694f4a9c6db3a8a90ead91fec285a2b929d57 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 10 Mar 2020 23:13:04 +0000 Subject: [PATCH] Make 4D output work for task extraction. --- python/tvm/contrib/util.py | 31 ++++++++++++++++++++++++++++ topi/python/topi/cuda/conv2d_int8.py | 8 ++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/python/tvm/contrib/util.py b/python/tvm/contrib/util.py index 2ebe175e8160..25d361585f96 100644 --- a/python/tvm/contrib/util.py +++ b/python/tvm/contrib/util.py @@ -166,3 +166,34 @@ def which(exec_name): if os.path.isfile(full_path) and os.access(full_path, os.X_OK): return full_path return None + + +def get_lower_ir(s): + """Get lower ir code of a schedule. + This is useful for debug, since you don't have to find all inputs/outputs + for a schedule in a fused subgraph. + Parameters + ---------- + s: Schedule + Returns + ------- + ir: str + The lower ir + """ + from tvm.te import tensor + from tvm.driver.build_module import lower + + outputs = s.outputs + + inputs = [] + def find_all(op): + if isinstance(op, tensor.PlaceholderOp): + inputs.append(op.output(0)) + else: + for x in op.input_tensors: + find_all(x.op) + + for out in outputs: + find_all(out) + + return lower(s, inputs, simple_mode=True) diff --git a/topi/python/topi/cuda/conv2d_int8.py b/topi/python/topi/cuda/conv2d_int8.py index a04f4091126a..66fc927af33d 100644 --- a/topi/python/topi/cuda/conv2d_int8.py +++ b/topi/python/topi/cuda/conv2d_int8.py @@ -217,7 +217,13 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output): output = s.outputs[0].output(0) # tile and bind spatial axes - n, f, y, x, c = s[output].op.axis + if len(s[output].op.axis) == 5: + n, f, y, x, c = s[output].op.axis + else: + # For task extraction of auto-tuning, the expected output is 4D. Since auto-tuning tasks + # are created from scratch, therefore the real auto-tuning will still happen on 5D output. + n, f, y, x = s[output].op.axis + cfg.define_split("tile_n", cfg.axis(n), num_outputs=4) cfg.define_split("tile_f", cfg.axis(f), num_outputs=4) cfg.define_split("tile_y", cfg.axis(y), num_outputs=4)