From dcace1b6f43a6b24aafc219a34ed78e7a9b83e0a Mon Sep 17 00:00:00 2001 From: Menooker Date: Tue, 12 May 2020 22:36:53 +0800 Subject: [PATCH] [Relay] enable blocking format in x86 conv2d and fold scale axis (#5357) --- python/tvm/relay/op/strategy/x86.py | 10 + src/relay/op/tensor/transform.h | 2 + src/relay/transforms/fold_scale_axis.cc | 151 ++++-- .../python/relay/test_pass_fold_scale_axis.py | 484 ++++++++++++------ topi/python/topi/x86/conv2d_alter_op.py | 107 ++-- 5 files changed, 507 insertions(+), 247 deletions(-) diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index ba0b3d20b549..fbc2ed24548b 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -18,6 +18,7 @@ # pylint: disable=invalid-name,unused-argument,wildcard-import,unused-wildcard-import import logging +import re import topi from tvm.te import SpecializedCondition from .generic import * @@ -25,6 +26,9 @@ logger = logging.getLogger('strategy') +_NCHWc_matcher = re.compile("^NCHW[0-9]+c$") +_OIHWio_matcher = re.compile("^OIHW[0-9]+i[0-9]+o$") + @schedule_injective.register("cpu") def schedule_injective_cpu(attrs, outs, target): """schedule injective ops for x86""" @@ -96,6 +100,9 @@ def conv2d_strategy_cpu(attrs, inputs, out_type, target): wrap_compute_conv2d(topi.x86.conv2d_nchw), wrap_topi_schedule(topi.x86.schedule_conv2d_nchw), name="conv2d_nchw.x86") + elif _NCHWc_matcher.match(layout): # check if layout is NCHWxc + assert _OIHWio_matcher.match(kernel_layout) # check if kernel is OIHWio + return conv2d_NCHWc_strategy_cpu(attrs, inputs, out_type, target) elif layout == "NHWC": assert kernel_layout == "HWIO" logger.warning("For x86 target, NCHW layout is recommended for conv2d.") @@ -128,6 +135,9 @@ def conv2d_strategy_cpu(attrs, inputs, out_type, target): wrap_compute_conv2d(topi.nn.depthwise_conv2d_nchw), wrap_topi_schedule(topi.generic.schedule_depthwise_conv2d_nchw), name="depthwise_conv2d_nchw.generic") + elif _NCHWc_matcher.match(layout): # check if layout is NCHWxc + assert _OIHWio_matcher.match(kernel_layout) # check if kernel is OIHWio + return depthwise_conv2d_NCHWc_strategy_cpu(attrs, inputs, out_type, target) elif layout == "NHWC": assert kernel_layout == "HWOI" logger.warning("depthwise_conv2d NHWC layout is not optimized for x86.") diff --git a/src/relay/op/tensor/transform.h b/src/relay/op/tensor/transform.h index 62433c297e8e..1d1f9c0b64ee 100644 --- a/src/relay/op/tensor/transform.h +++ b/src/relay/op/tensor/transform.h @@ -38,6 +38,8 @@ namespace tvm { namespace relay { +extern Expr MakeReshape(Expr data, Array newshape); + template bool ConcatenateRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { diff --git a/src/relay/transforms/fold_scale_axis.cc b/src/relay/transforms/fold_scale_axis.cc index 57e3d6925b20..4c8025a8d382 100644 --- a/src/relay/transforms/fold_scale_axis.cc +++ b/src/relay/transforms/fold_scale_axis.cc @@ -29,6 +29,7 @@ #include #include +#include "../op/tensor/transform.h" #include "pass_util.h" #include "pattern_util.h" @@ -39,6 +40,7 @@ namespace relay { * * Use namespace to reduce potential naming conflict. */ + namespace fold_scale_axis { using runtime::TypedPackedFunc; @@ -305,6 +307,41 @@ class ForwardPrep : private ExprVisitor { } }; +static bool IsIntInArray(const Array& axis, int v) { + for (size_t i = 0; i < axis.size(); i++) { + if (axis[i] == v) return true; + } + return false; +} + +static Expr ReshapeToMatchAxis(Expr scale, const Array& shape, + const Array& axis) { + Array arr; + for (size_t i = 0; i < shape.size(); i++) { + if (IsIntInArray(axis, i)) { + auto node = shape[i].as(); + if (!node) { + // if the shape is not a constant, use normal transform + return Expr(); + } + arr.push_back(node->value); + } else { + arr.push_back(1); + } + } + return MakeReshape(scale, std::move(arr)); +} + +// if only one axis, use expand dim. Else, use reshape +static Expr ReshapeOrExpandToMatchAxis(Expr scale, const Array& shape, + const Array& axis) { + if (axis.size() > 1) { + return ReshapeToMatchAxis(scale, shape, axis); + } else { + return ExpandBiasToMatchAxis(scale, shape.size(), axis); + } +} + //---------------------------------------------- // Per operator defs for FScaleAxisForward //---------------------------------------------- @@ -365,7 +402,10 @@ Expr AddSubForwardRewrite(const Call& ref_call, const Array& new_args, if (slhs != nullptr) { CHECK(srhs == nullptr); CHECK(MatchBroadcastToLeftAxes(tlhs, trhs, slhs->axes)); - Expr scale = ExpandBiasToMatchAxis(slhs->scale, tlhs->shape.size(), slhs->axes); + Expr scale = ReshapeOrExpandToMatchAxis(slhs->scale, tlhs->shape, slhs->axes); + if (!scale.defined()) { + return Expr(); + } Expr rhs = Divide(new_args[1], scale); rnode->value = Call(ref_call->op, {slhs->value, rhs}, ref_call->attrs, ref_call->type_args); rnode->scale = slhs->scale; @@ -373,7 +413,10 @@ Expr AddSubForwardRewrite(const Call& ref_call, const Array& new_args, } else { CHECK(srhs != nullptr); CHECK(MatchBroadcastToLeftAxes(trhs, tlhs, srhs->axes)); - Expr scale = ExpandBiasToMatchAxis(srhs->scale, trhs->shape.size(), srhs->axes); + Expr scale = ReshapeOrExpandToMatchAxis(srhs->scale, trhs->shape, srhs->axes); + if (!scale.defined()) { + return Expr(); + } Expr lhs = Divide(new_args[0], scale); rnode->value = Call(ref_call->op, {lhs, srhs->value}, ref_call->attrs, ref_call->type_args); rnode->scale = srhs->scale; @@ -445,7 +488,6 @@ Array Conv2DForwardPrep(const Call& call, const Message& out_message) { CHECK_GE(c_big_axis, 0); Message none = NullValue(); - AxesSet data_axes = NullValue(); // For now, we only support simple pattern (no folded weight/data) // More general layout can be supported under the current framework. // By using a unified layout transformation. @@ -454,12 +496,17 @@ Array Conv2DForwardPrep(const Call& call, const Message& out_message) { // only handle depthwise or full conv2d. // TODO(tvm-team) handle grouped conv by reshape + bcast bool is_depthwise_conv2d = IsDepthwiseConv2D(call, param, kernel_layout); - if (kernel_layout.IndexOf(LayoutAxis::Get('i')) < 0 && c_small_axis < 0 && - (param->groups == 1 || is_depthwise_conv2d)) { - data_axes = {c_big_axis}; - } - if (data_axes.defined()) { - return {Message(data_axes, false), none}; + if (param->groups == 1 || is_depthwise_conv2d) { + auto ko_small_axis = kernel_layout.IndexOf(LayoutAxis::Get('o')); + auto ki_small_axis = kernel_layout.IndexOf(LayoutAxis::Get('i')); + if ((ko_small_axis < 0 && ki_small_axis < 0 && c_small_axis < 0) || // simple layout + (ko_small_axis >= 0 && ki_small_axis >= 0 && c_small_axis >= 0)) { // blocked layout + Array arr{c_big_axis}; + if (c_small_axis >= 0) { + arr.push_back(c_small_axis); + } + return {Message(arr, false), none}; + } } return {none, none}; } @@ -478,12 +525,14 @@ Expr Conv2DForwardRewrite(const Call& ref_call, const Array& new_args, Layout kernel_layout(param->kernel_layout); int c_big_axis = data_layout.IndexOf(LayoutAxis::Get('C')); CHECK_GE(c_big_axis, 0); - // For now, we only support simple pattern (no folded weight/data) - // TODO(tvm-team) support general data layout - CHECK_EQ(kernel_layout.IndexOf(LayoutAxis::Get('i')), -1); - CHECK(sdata->axes.size() == 1 && c_big_axis == sdata->axes[0]->value); - int big_oc_axis = kernel_layout.IndexOf(LayoutAxis::Get('O')); - int big_ic_axis = kernel_layout.IndexOf(LayoutAxis::Get('I')); + int small_ko_axis = kernel_layout.IndexOf(LayoutAxis::Get('o')); + int small_ki_axis = kernel_layout.IndexOf(LayoutAxis::Get('i')); + int big_ki_axis = kernel_layout.IndexOf(LayoutAxis::Get('I')); + int big_ko_axis = kernel_layout.IndexOf(LayoutAxis::Get('O')); + + bool is_simple = (small_ko_axis < 0 && small_ki_axis < 0 && big_ki_axis >= 0); + bool is_blocking = (small_ko_axis >= 0 && small_ki_axis >= 0 && big_ki_axis >= 0); + CHECK(is_simple || is_blocking); // Check it must be depthwise or full conv2d. bool is_depthwise_conv2d = IsDepthwiseConv2D(ref_call, param, kernel_layout); @@ -493,11 +542,26 @@ Expr Conv2DForwardRewrite(const Call& ref_call, const Array& new_args, // match the ic_axis if (is_depthwise_conv2d) { - Expr scale = ExpandBiasToMatchAxis(sdata->scale, kernel_layout.ndim(), {big_oc_axis}); - weight = Multiply(weight, scale); + if (is_simple) { + Expr scale = ExpandBiasToMatchAxis(sdata->scale, kernel_layout.ndim(), {big_ko_axis}); + weight = Multiply(weight, scale); + } else { + weight = Multiply(weight, + ReshapeToMatchAxis(sdata->scale, weight->type_as()->shape, + {big_ko_axis, small_ko_axis})); + if (!weight.defined()) return Expr(); + } + } else { - Expr scale = ExpandBiasToMatchAxis(sdata->scale, kernel_layout.ndim(), {big_ic_axis}); - weight = Multiply(weight, scale); + if (is_simple) { + Expr scale = ExpandBiasToMatchAxis(sdata->scale, kernel_layout.ndim(), {big_ki_axis}); + weight = Multiply(weight, scale); + } else { + weight = Multiply(weight, + ReshapeToMatchAxis(sdata->scale, weight->type_as()->shape, + {big_ki_axis, small_ki_axis})); + if (!weight.defined()) return Expr(); + } } // return transformed conv2d return Call(ref_call->op, {sdata->value, weight}, ref_call->attrs, ref_call->type_args); @@ -752,14 +816,20 @@ Expr AddSubBackwardTransform(const Call& call, const Message& message, const Exp CHECK(equal(message->axes, lhs_message->axes)); Expr lhs = transformer->Transform(call->args[0], message, scale); Expr rhs = transformer->Transform(call->args[1], NullValue(), NullValue()); - Expr rhs_scale = ExpandBiasToMatchAxis(scale, tlhs->shape.size(), message->axes); + Expr rhs_scale = ReshapeOrExpandToMatchAxis(scale, tlhs->shape, message->axes); + if (!rhs_scale.defined()) { + return transformer->NormalCallTransform(call.operator->()); + } rhs = Multiply(rhs, rhs_scale); return Call(call->op, {lhs, rhs}, call->attrs, call->type_args); } else if (rhs_message.defined()) { CHECK(equal(message->axes, rhs_message->axes)); Expr lhs = transformer->Transform(call->args[0], NullValue(), NullValue()); Expr rhs = transformer->Transform(call->args[1], message, scale); - Expr lhs_scale = ExpandBiasToMatchAxis(scale, trhs->shape.size(), message->axes); + Expr lhs_scale = ReshapeOrExpandToMatchAxis(scale, trhs->shape, message->axes); + if (!lhs_scale.defined()) { + return transformer->NormalCallTransform(call.operator->()); + } lhs = Multiply(lhs, lhs_scale); return Call(call->op, {lhs, rhs}, call->attrs, call->type_args); } else { @@ -829,13 +899,19 @@ Message Conv2DBackwardPrep(const Call& call, const Array& in_messages) // only handle depthwise or full conv2d. // TODO(tvm-team) handle grouped conv by reshape + bcast bool is_depthwise_conv2d = IsDepthwiseConv2D(call, param, kernel_layout); - if (kernel_layout.IndexOf(LayoutAxis::Get('o')) < 0 && - kernel_layout.IndexOf(LayoutAxis::Get('i')) < 0 && c_small_axis < 0 && - (param->groups == 1 || is_depthwise_conv2d)) { - return Message({c_big_axis}, false); - } else { - return NullValue(); + if (param->groups == 1 || is_depthwise_conv2d) { + auto ko_small_axis = kernel_layout.IndexOf(LayoutAxis::Get('o')); + auto ki_small_axis = kernel_layout.IndexOf(LayoutAxis::Get('i')); + if ((ko_small_axis < 0 && ki_small_axis < 0 && c_small_axis < 0) || // simple layout + (ko_small_axis >= 0 && ki_small_axis >= 0 && c_small_axis >= 0)) { // blocked layout + Array arr{c_big_axis}; + if (c_small_axis >= 0) { + arr.push_back(c_small_axis); + } + return Message(arr, false); + } } + return NullValue(); } // Conv2D consumes the scale axis during transformation. @@ -852,19 +928,28 @@ Expr Conv2DBackwardTransform(const Call& call, const Message& message, const Exp CHECK_GE(c_big_axis, 0); // For now, we only support simple pattern (no folded weight/data) // TODO(tvm-team) support general data layout - CHECK_EQ(kernel_layout.IndexOf(LayoutAxis::Get('o')), -1); - CHECK_EQ(kernel_layout.IndexOf(LayoutAxis::Get('i')), -1); - CHECK(message->axes.size() == 1 && c_big_axis == message->axes[0]->value); - - int big_oc_axis = kernel_layout.IndexOf(LayoutAxis::Get('O')); + int small_ko_axis = kernel_layout.IndexOf(LayoutAxis::Get('o')); + int small_ki_axis = kernel_layout.IndexOf(LayoutAxis::Get('i')); + int big_ki_axis = kernel_layout.IndexOf(LayoutAxis::Get('I')); + int big_ko_axis = kernel_layout.IndexOf(LayoutAxis::Get('O')); // Check it must be depthwise or full conv2d. bool is_depthwise_conv2d = IsDepthwiseConv2D(call, param, kernel_layout); CHECK(param->groups == 1 || is_depthwise_conv2d); + bool is_simple = (small_ko_axis < 0 && small_ki_axis < 0 && big_ki_axis >= 0); + bool is_blocking = (small_ko_axis >= 0 && small_ki_axis >= 0 && big_ki_axis >= 0); + CHECK(is_simple || is_blocking); Expr data = transformer->Transform(call->args[0], NullValue(), NullValue()); Expr weight = transformer->Transform(call->args[1], NullValue(), NullValue()); // scale on input for deptwise. - Expr wscale = ExpandBiasToMatchAxis(scale, kernel_layout.ndim(), {big_oc_axis}); + Expr wscale; + if (is_simple) { + wscale = ExpandBiasToMatchAxis(scale, kernel_layout.ndim(), {big_ko_axis}); + } else { + wscale = ReshapeToMatchAxis(scale, weight->type_as()->shape, + {big_ko_axis, small_ko_axis}); + if (!wscale.defined()) return transformer->NormalCallTransform(call.operator->()); + } weight = Multiply(weight, wscale); return Call(call->op, {data, weight}, call->attrs, call->type_args); } diff --git a/tests/python/relay/test_pass_fold_scale_axis.py b/tests/python/relay/test_pass_fold_scale_axis.py index d7c437adcc99..8aecf3f891f3 100644 --- a/tests/python/relay/test_pass_fold_scale_axis.py +++ b/tests/python/relay/test_pass_fold_scale_axis.py @@ -35,58 +35,75 @@ def run_opt_pass(expr, opt_pass): def test_fold_fwd_simple(): """Simple testcase.""" - def before(x, conv_weight, in_bias, in_scale, channels): + def before(x, conv_weight, in_bias, in_scale, channels, blocking): args = [x, conv_weight, in_bias] - in_bias = relay.expand_dims(in_bias, axis=1, num_newaxis=2) x = relay.multiply(x, in_scale) x = relay.nn.relu(x) x = relay.add(x, in_bias) y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW2i{}o".format(blocking[1]) if blocking else "OIHW") return relay.Function(args, y) - def expected(x, conv_weight, in_bias, in_scale, channels): + def expected(x, conv_weight, in_bias, in_scale, in_channels, channels, blocking): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight, in_bias] - in_bias = relay.expand_dims(in_bias, axis=1, num_newaxis=2) - squeezed_scale = relay.squeeze(in_scale, axis=[1,2]) - x = relay.nn.relu(x) - in_bias = relay.divide(in_bias, relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2)) - x = relay.add(x, in_bias) - conv_weight = relay.multiply( - conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2)) + if blocking: + squeezed_scale = relay.squeeze(in_scale, axis=[0,2,3]) + x = relay.nn.relu(x) + in_bias = relay.divide(in_bias, + relay.reshape(squeezed_scale, (1, in_channels // blocking[0], 1, 1, blocking[0]))) #NCHWc + x = relay.add(x, in_bias) + conv_weight = relay.multiply(conv_weight, + relay.reshape(squeezed_scale, (1, in_channels//2, 1, 1, 2, 1))) #OIHWio + else: + squeezed_scale = relay.squeeze(in_scale, axis=[1,2]) + x = relay.nn.relu(x) + in_bias = relay.divide(in_bias, relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2)) + x = relay.add(x, in_bias) + conv_weight = relay.multiply( + conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2)) + y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW2i{}o".format(blocking[1]) if blocking else "OIHW") return relay.Function(args, y) - def check(shape, channels): + def check(shape, channels, blocking): x = relay.var("x", shape=shape) - in_channels = shape[1] weight = relay.var("weight") - in_bias = relay.var("in_bias", shape=(in_channels,)) - in_scale = relay.const(_get_positive_scale((in_channels, 1, 1))) - y1 = before(x, weight, in_bias, in_scale, channels) + if blocking: + in_channels = shape[1] * shape[4] + in_bias = relay.var("in_bias", shape=(1, in_channels // blocking[0], 1, 1, blocking[0])) + in_scale = relay.const(_get_positive_scale((1, in_channels // blocking[0], 1, 1, blocking[0]))) + else: + in_channels = shape[1] + in_bias = relay.var("in_bias", shape=(in_channels, 1, 1)) + in_scale = relay.const(_get_positive_scale((in_channels, 1, 1))) + y1 = before(x, weight, in_bias, in_scale, channels, blocking) y1 = run_opt_pass(y1, transform.InferType()) type_dict = {x.name_hint:x.checked_type for x in y1.params} weight = relay.var("weight", type_dict["weight"]) y1_folded = run_opt_pass(y1, transform.ForwardFoldScaleAxis()) - y1_expected = expected(x, weight, in_bias, in_scale, channels) + y1_expected = expected(x, weight, in_bias, in_scale, in_channels, channels, blocking) y1_folded = run_opt_pass(y1_folded, transform.InferType()) y1_expected = run_opt_pass(y1_expected, transform.InferType()) assert tvm.ir.structural_equal(y1_folded, y1_expected) - check((2, 4, 10, 10), 2) - + check((2, 4, 10, 10), 2, None) + check((2, 2, 10, 10, 2), 8, (2, 4)) def test_fold_fwd_dual_path(): """scale axis being consumed by two consumers""" - def before(x, conv_weight, in_bias, in_scale, channels): + def before(x, conv_weight, in_bias, in_scale, channels, blocking): args = [x, conv_weight, in_bias] x = relay.multiply(in_scale, x) x = relay.nn.relu(x) @@ -94,363 +111,474 @@ def before(x, conv_weight, in_bias, in_scale, channels): y1 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - data_layout="NHWC", - kernel_layout="HWIO", + data_layout="NHWC{}c".format(blocking[0]) if blocking else "NHWC", + kernel_layout="HWIO1i{}o".format(blocking[1]) if blocking else "HWIO", groups=channels, padding=(1, 1)) y2 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - data_layout="NHWC", - kernel_layout="HWIO", + data_layout="NHWC{}c".format(blocking[0]) if blocking else "NHWC", + kernel_layout="HWIO1i{}o".format(blocking[1]) if blocking else "HWIO", groups=channels, padding=(1, 1)) z = relay.add(y1, y2) return relay.Function(args, z) - def expected(x, conv_weight, in_bias, in_scale, channels): + def expected(x, conv_weight, in_bias, in_scale, channels, blocking): args = [x, conv_weight, in_bias] x = relay.nn.relu(x) - in_bias = relay.divide(in_bias, in_scale) + if blocking: + _in_scale = relay.reshape(in_scale, (1, 1, 1, channels//blocking[0], blocking[0])) #NHWCc + else: + _in_scale = in_scale + in_bias = relay.divide(in_bias, _in_scale) x = relay.subtract(x, in_bias) + if blocking: + _in_scale = relay.reshape(in_scale, (1, 1, 1, channels//blocking[0], 1, blocking[0])) #HWIOio y1 = relay.nn.conv2d(x, - relay.multiply(conv_weight, in_scale), + relay.multiply(conv_weight, _in_scale), channels=channels, kernel_size=(3, 3), - data_layout="NHWC", - kernel_layout="HWIO", + data_layout="NHWC{}c".format(blocking[0]) if blocking else "NHWC", + kernel_layout="HWIO1i{}o".format(blocking[1]) if blocking else "HWIO", groups=channels, padding=(1, 1)) + if blocking: + _in_scale = relay.reshape(in_scale, (1, 1, 1, channels//blocking[0], 1, blocking[0])) #HWIOio y2 = relay.nn.conv2d(x, - relay.multiply(conv_weight, in_scale), + relay.multiply(conv_weight, _in_scale), channels=channels, kernel_size=(3, 3), - data_layout="NHWC", - kernel_layout="HWIO", + data_layout="NHWC{}c".format(blocking[0]) if blocking else "NHWC", + kernel_layout="HWIO1i{}o".format(blocking[1]) if blocking else "HWIO", groups=channels, padding=(1, 1)) z = relay.add(y1, y2) return relay.Function(args, z) - def check(dshape, channels): + def check(dshape, channels, blocking): x = relay.var("x", shape=dshape) - in_channels = dshape[-1] + if blocking: + in_channels = dshape[3] * dshape[4] + wshape = (3, 3, 1, channels//blocking[1], 1, blocking[1]) # HWIOio + weight = relay.var("weight", shape=wshape) + in_bias = relay.var("in_bias", shape=(in_channels//blocking[0],blocking[0])) + in_scale = relay.const(_get_positive_scale((in_channels//blocking[0],blocking[0]))) + else: + in_channels = dshape[-1] + wshape = (3, 3, 1, channels) # HWIO + weight = relay.var("weight", shape=wshape) + in_bias = relay.var("in_bias", shape=(in_channels,)) + in_scale = relay.const(_get_positive_scale(in_channels,)) + # test depthwise assert in_channels == channels - wshape = (3, 3, 1, channels) # HWIO - weight = relay.var("weight", shape=wshape) - in_bias = relay.var("in_bias", shape=(in_channels,)) - in_scale = relay.const(_get_positive_scale(in_channels,)) - y1 = before(x, weight, in_bias, in_scale, channels) + + y1 = before(x, weight, in_bias, in_scale, channels, blocking) y1 = run_opt_pass(y1, transform.InferType()) y1_folded = run_opt_pass(y1, transform.ForwardFoldScaleAxis()) type_dict = {x.name_hint:x.checked_type for x in y1.params} weight = relay.var("weight", type_dict["weight"]) - y1_expected = expected(x, weight, in_bias, in_scale, channels) + y1_expected = expected(x, weight, in_bias, in_scale, channels, blocking) y1_expected = run_opt_pass(y1_expected, transform.InferType()) assert tvm.ir.structural_equal(y1_folded, y1_expected) - check((2, 4, 10, 3), 3) - + check((2, 4, 10, 3), 3, None) + check((2, 4, 10, 2, 2), 4, (2, 2)) def test_fold_fwd_fail(): """testcase where we canont fold""" - def before(x, conv_weight, in_bias, in_scale, channels): + def before(x, conv_weight, in_bias, in_scale, channels, blocking): x = relay.multiply(x, in_scale) xx = relay.nn.leaky_relu(x, alpha=0.1) y1 = relay.nn.conv2d(xx, conv_weight, channels=channels, kernel_size=(3, 3), - data_layout="NHWC", + data_layout="NHWC{}c".format(blocking[0]) if blocking else "NHWC", + kernel_layout="HWIO1i{}o".format(blocking[1]) if blocking else "HWIO", padding=(1, 1)) z = relay.add(y1, x) return relay.Function(relay.analysis.free_vars(z), z) - def check(shape, channels): + def check(shape, channels, blocking): x = relay.var("x", shape=shape) - in_channels = shape[-1] + if blocking: + in_channels = shape[3] * shape[4] + in_bias = relay.var("in_bias", shape=(in_channels//blocking[0],blocking[0])) + in_scale = relay.const(_get_positive_scale((in_channels//blocking[0],blocking[0]))) + else: + in_channels = shape[-1] + in_bias = relay.var("in_bias", shape=(in_channels,)) + in_scale = relay.const(_get_positive_scale(size=(in_channels,))) # test depthwise assert in_channels == channels weight = relay.var("weight") - in_bias = relay.var("in_bias", shape=(in_channels,)) - in_scale = relay.const(_get_positive_scale(size=(in_channels,))) - y1 = before(x, weight, in_bias, in_scale, channels) + y1 = before(x, weight, in_bias, in_scale, channels, blocking) y1 = run_opt_pass(y1, transform.InferType()) y1_folded = run_opt_pass(y1, transform.ForwardFoldScaleAxis()) assert tvm.ir.structural_equal(y1, y1_folded) - check((2, 11, 10, 4), 4) - + check((2, 11, 10, 4), 4, None) + check((2, 11, 10, 2, 2), 4, (2,2)) def test_fold_fwd_relu_fail(): """testcase where we canont fold because scale can not pass relu""" - def before(x, conv_weight, in_bias, in_scale, channels): + def before(x, conv_weight, in_bias, in_scale, channels, blocking): x = relay.multiply(x, in_scale) xx = relay.nn.relu(x) y1 = relay.nn.conv2d(xx, conv_weight, channels=channels, kernel_size=(3, 3), - data_layout="NHWC", + data_layout="NHWC{}c".format(blocking[0]) if blocking else "NHWC", + kernel_layout="HWIO1i{}o".format(blocking[1]) if blocking else "HWIO", padding=(1, 1)) z = relay.add(y1, x) return relay.Function(relay.analysis.free_vars(z), z) - def check(shape, channels, in_scale): + def check(shape, channels, blocking, in_scale): x = relay.var("x", shape=shape) - in_channels = shape[-1] - # test depthwise - assert in_channels == channels weight = relay.var("weight") - in_bias = relay.var("in_bias", shape=(in_channels,)) - y1 = before(x, weight, in_bias, in_scale, channels) + if blocking: + in_channels = shape[3] * shape[4] + in_bias = relay.var("in_bias", shape=(1, in_channels // blocking[0], 1, 1, blocking[0])) + else: + in_channels = shape[-1] + in_bias = relay.var("in_bias", shape=(in_channels,)) + + assert in_channels == channels + y1 = before(x, weight, in_bias, in_scale, channels, blocking) y1 = run_opt_pass(y1, transform.InferType()) y1_folded = run_opt_pass(y1, transform.ForwardFoldScaleAxis()) assert tvm.ir.structural_equal(y1, y1_folded) in_scale = relay.var("in_scale", shape=(4,)) - check((2, 11, 10, 4), 4, in_scale) + check((2, 11, 10, 4), 4, None, in_scale) in_scale = relay.const(-_get_positive_scale((4,))) - check((2, 11, 10, 4), 4, in_scale) + check((2, 11, 10, 4), 4, None, in_scale) + + in_scale = relay.var("in_scale", shape=(1,1,1,2,2)) + check((2, 11, 10, 2, 2), 4, (2, 2), in_scale) + in_scale = relay.const(-_get_positive_scale((1,1,1,2,2))) + check((2, 11, 10, 2, 2), 4, (2, 2), in_scale) + + def test_fold_fwd_negative_scale(): """Testcase of folding negative scale""" - def before(x, conv_weight, in_scale, channels): + def before(x, conv_weight, in_scale, channels, blocking): args = [x, conv_weight] x = relay.multiply(x, in_scale) y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW4i{}o".format(blocking[1]) if blocking else "OIHW") return relay.Function(args, y) - def expected(x, conv_weight, in_scale, channels): + def expected(x, conv_weight, in_scale, in_channels, channels, blocking): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight] - squeezed_scale = relay.squeeze(in_scale, axis=[1,2]) - conv_weight = relay.multiply( - conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2)) + if blocking: + squeezed_scale = relay.squeeze(in_scale, axis=[0,2,3]) + conv_weight = relay.multiply( + conv_weight , relay.reshape(squeezed_scale, (1, in_channels//4, 1, 1, 4, 1))) + #blocking by "i" in OIHWio + else: + squeezed_scale = relay.squeeze(in_scale, axis=[1,2]) + conv_weight = relay.multiply( + conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2)) y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW4i{}o".format(blocking[1]) if blocking else "OIHW") return relay.Function(args, y) - def check(shape, channels): + def check(shape, channels, blocking): x = relay.var("x", shape=shape) - in_channels = shape[1] - in_scale = relay.const(-_get_positive_scale((in_channels, 1, 1))) + if blocking: + in_channels = shape[1] * shape[4] + in_scale = relay.const(-_get_positive_scale((1, shape[1], 1, 1, shape[4]))) + else: + in_channels = shape[1] + in_scale = relay.const(-_get_positive_scale((in_channels, 1, 1))) weight = relay.var("weight") - y1 = before(x, weight, in_scale, channels) + y1 = before(x, weight, in_scale, channels, blocking) y1 = run_opt_pass(y1, transform.InferType()) type_dict = {x.name_hint:x.checked_type for x in y1.params} weight = relay.var("weight", type_dict["weight"]) y1_folded = run_opt_pass(y1, transform.ForwardFoldScaleAxis()) - y1_expected = expected(x, weight, in_scale, channels) + y1_expected = expected(x, weight, in_scale, in_channels, channels, blocking) y1_expected = run_opt_pass(y1_expected, transform.InferType()) assert tvm.ir.structural_equal(y1_folded, y1_expected) - check((2, 4, 10, 10), 4) - + check((2, 4, 10, 10), 4, None) + check((2, 2, 10, 10, 2), 8, (2, 2)) def test_fold_bwd_simple(): """Simple testcase.""" - def before(x, conv_weight, out_bias, out_scale, channels): + def before(x, conv_weight, out_bias, out_scale, in_channels, channels, blocking): args = [x, conv_weight, out_bias] - out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) + if blocking: + out_bias = relay.reshape(out_bias, (1, channels//blocking[1], 1, 1, blocking[1])) + else: + out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y = relay.add(y, out_bias) y = relay.nn.relu(y) + if blocking: + out_scale = relay.reshape(out_scale, (1, channels//blocking[1], 1, 1, blocking[1])) y = relay.multiply(y, out_scale) return relay.Function(args, y) - def expected(x, conv_weight, out_bias, out_scale, channels): + def expected(x, conv_weight, out_bias, out_scale, in_channels, channels, blocking): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight, out_bias] - out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) - squeezed_scale = relay.squeeze(out_scale, axis=[1,2]) - conv_weight = relay.multiply( - conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3)) + if blocking: + out_bias = relay.reshape(out_bias, (1, channels//blocking[1], 1, 1, blocking[1])) + out_scale = relay.reshape(out_scale, (1, channels//blocking[1], 1, 1, blocking[1])) + squeezed_scale = relay.squeeze(out_scale, axis=[0, 2, 3]) + conv_weight = relay.multiply( + conv_weight , relay.reshape(squeezed_scale, (channels//blocking[1], 1, 1, 1, 1, blocking[1]))) + else: + out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) + squeezed_scale = relay.squeeze(out_scale, axis=[1,2]) + conv_weight = relay.multiply( + conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3)) y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) - out_bias = relay.multiply(out_bias, + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") + if blocking: + out_bias = relay.multiply(out_bias, + relay.reshape(squeezed_scale, (1, channels//blocking[1], 1, 1, blocking[1]))) + else: + out_bias = relay.multiply(out_bias, relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2)) y = relay.add(y, out_bias) y = relay.nn.relu(y) return relay.Function(args, y) - def check(shape, channels): + def check(shape, in_channels, channels, blocking): x = relay.var("x", shape=shape) - in_channels = shape[1] weight = relay.var("weight") out_bias = relay.var("out_bias", shape=(channels,)) - out_scale = relay.const(_get_positive_scale((channels, 1, 1))) - - y1 = before(x, weight, out_bias, out_scale, channels) + if blocking: + out_scale = relay.const(_get_positive_scale((channels,))) + else: + out_scale = relay.const(_get_positive_scale((channels,1, 1))) + y1 = before(x, weight, out_bias, out_scale, in_channels, channels, blocking) y1 = run_opt_pass(y1, transform.InferType()) type_dict = {x.name_hint:x.checked_type for x in y1.params} weight = relay.var("weight", type_dict["weight"]) y1_folded = run_opt_pass(y1, transform.BackwardFoldScaleAxis()) - y1_expected = expected(x, weight, out_bias, out_scale, channels) + y1_expected = expected(x, weight, out_bias, out_scale, in_channels, channels, blocking) y1_expected = run_opt_pass(y1_expected, transform.InferType()) assert tvm.ir.structural_equal(y1_folded, y1_expected) - check((2, 4, 10, 10), 8) + check((2, 4, 10, 10), 4, 8, None) + check((2, 2, 10, 10, 16), 32, 64, (16, 16)) def test_fold_bwd_dual_path(): """Dual path testcase.""" - def before(x, conv_weight, out_bias, out_scale, channels): + def before(x, conv_weight, out_bias, out_scale, in_channels, channels, blocking): args = [x, conv_weight, out_bias] y1 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y1 = relay.nn.relu(y1) y2 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y2 = relay.nn.relu(y2) y = relay.add(y1, y2) y = relay.multiply(y, out_scale) return relay.Function(args, y) - def expected(x, conv_weight, out_bias, out_scale, channels): + def expected(x, conv_weight, out_bias, out_scale, in_channels, channels, blocking): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight, out_bias] - out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) + if not blocking: + out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) squeezed_scale = relay.squeeze(out_scale, axis=[1,2]) def fold_conv_weight(): - return relay.multiply( - conv_weight , - relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3)) + if blocking: + return relay.multiply( + conv_weight , + relay.reshape(squeezed_scale, (channels//blocking[1], 1, 1, 1, 1, blocking[1]))) + else: + return relay.multiply( + conv_weight , + relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3)) y1 = relay.nn.conv2d(x, fold_conv_weight(), channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y1 = relay.nn.relu(y1) y2 = relay.nn.conv2d(x, fold_conv_weight(), channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y2 = relay.nn.relu(y2) y = relay.add(y1, y2) return relay.Function(args, y) - def check(shape, channels): + def check(shape, in_channels, channels, blocking): x = relay.var("x", shape=shape) - in_channels = shape[1] weight = relay.var("weight") - out_bias = relay.var("out_bias", shape=(channels,)) - out_scale = relay.const(_get_positive_scale((channels, 1, 1))) - - y1 = before(x, weight, out_bias, out_scale, channels) + if blocking: + out_bias = relay.var("out_bias", shape=(channels // blocking[1], 1, 1, blocking[1])) + out_scale = relay.const(_get_positive_scale((channels // blocking[1], 1, 1, blocking[1]))) + else: + out_bias = relay.var("out_bias", shape=(channels,)) + out_scale = relay.const(_get_positive_scale((channels, 1, 1))) + + y1 = before(x, weight, out_bias, out_scale, in_channels, channels, blocking) y1 = run_opt_pass(y1, transform.InferType()) type_dict = {x.name_hint:x.checked_type for x in y1.params} weight = relay.var("weight", type_dict["weight"]) y1_folded = run_opt_pass(y1, transform.BackwardFoldScaleAxis()) - y1_expected = expected(x, weight, out_bias, out_scale, channels) + y1_expected = expected(x, weight, out_bias, out_scale, in_channels, channels, blocking) y1_expected = run_opt_pass(y1_expected, transform.InferType()) assert tvm.ir.structural_equal(y1_folded, y1_expected) - check((2, 4, 10, 10), 8) - + check((2, 4, 10, 10), 4, 8, None) + check((2, 2, 10, 10, 2), 4, 8, (2, 2)) def test_fold_bwd_dual_consumer(): - def before(x, conv_weight, out_bias, out_scale, channels): + def before(x, conv_weight, out_bias, out_scale, in_channels, channels, blocking): args = [x, conv_weight, out_bias] y0 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y0 = relay.multiply(y0, out_scale) y0 = relay.nn.relu(y0) y1 = relay.nn.conv2d(y0, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y1 = relay.multiply(y1, out_scale) y1 = relay.nn.relu(y1) y2 = relay.nn.conv2d(y0, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y2 = relay.multiply(y2, out_scale) y2 = relay.nn.relu(y2) y = relay.add(y1, y2) return relay.Function(args, y) - def expected(x, conv_weight, out_bias, out_scale, channels): + def expected(x, conv_weight, out_bias, out_scale, in_channels, channels, blocking): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight, out_bias] def fold_conv_weight(): squeezed_scale = relay.squeeze(out_scale, axis=[1,2]) - return relay.multiply( - conv_weight , - relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3)) + if blocking: + return relay.multiply( + conv_weight , + relay.reshape(squeezed_scale, (channels//blocking[1], 1, 1, 1, 1, blocking[1]))) + else: + return relay.multiply( + conv_weight , + relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3)) y0 = relay.nn.conv2d(x, fold_conv_weight(), channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y0 = relay.nn.relu(y0) y1 = relay.nn.conv2d(y0, fold_conv_weight(), channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y1 = relay.nn.relu(y1) y2 = relay.nn.conv2d(y0, fold_conv_weight(), channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y2 = relay.nn.relu(y2) y = relay.add(y1, y2) return relay.Function(args, y) - def check(shape, channels): + def check(shape, in_channels, channels, blocking): x = relay.var("x", shape=shape) - in_channels = shape[1] weight = relay.var("weight") - out_bias = relay.var("out_bias", shape=(channels,)) - out_scale = relay.const(_get_positive_scale((channels,1, 1))) - - y1 = before(x, weight, out_bias, out_scale, channels) + if blocking: + out_bias = relay.var("out_bias", shape=(channels // blocking[1], 1, 1, blocking[1])) + out_scale = relay.const(_get_positive_scale((channels // blocking[1], 1, 1, blocking[1]))) + else: + out_bias = relay.var("out_bias", shape=(channels,)) + out_scale = relay.const(_get_positive_scale((channels, 1, 1))) + + y1 = before(x, weight, out_bias, out_scale, in_channels, channels, blocking) y1 = run_opt_pass(y1, transform.InferType()) type_dict = {x.name_hint:x.checked_type for x in y1.params} weight = relay.var("weight", type_dict["weight"]) y1_folded = run_opt_pass(y1, transform.BackwardFoldScaleAxis()) - y1_expected = expected(x, weight, out_bias, out_scale, channels) + y1_expected = expected(x, weight, out_bias, out_scale, in_channels, channels, blocking) y1_expected = run_opt_pass(y1_expected, transform.InferType()) assert tvm.ir.structural_equal(y1_folded, y1_expected) - check((2, 4, 10, 10), 4) - + check((2, 4, 10, 10), 4, 4, None) + check((2, 2, 10, 10, 2), 4, 4, (2, 2)) def test_fold_bwd_fail(): """Dual path testcase.""" - def fail1(x, conv_weight, out_bias, out_scale, channels): + def fail1(x, conv_weight, out_bias, out_scale, in_channels, channels, blocking): args = [x, conv_weight, out_bias] - out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) y1 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y1 = relay.nn.relu(y1) y2 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1), - out_layout="CNHW") + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW", + out_layout="CNHW{}c".format(blocking[1]) if blocking else "CNHW") # fold will fail because the axis from two path # differs from each other. y2 = relay.nn.relu(y2) @@ -458,99 +586,123 @@ def fail1(x, conv_weight, out_bias, out_scale, channels): y = relay.multiply(y, out_scale) return relay.Function(args, y) - def fail2(x, conv_weight, out_bias, out_scale, channels): + def fail2(x, conv_weight, out_bias, out_scale, in_channels, channels, blocking): args = [x, conv_weight, out_bias] - out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) y1 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y2 = relay.nn.relu(y1) # fold will fail because y1 is referred also by y2 y1 = relay.multiply(y1, out_scale) y = relay.add(y1, y2) return relay.Function(args, y) - def check(shape, channels, fbefore): + def check(shape, in_channels, channels, blocking, fbefore): x = relay.var("x", shape=shape) - in_channels = shape[1] weight = relay.var("weight") - out_bias = relay.var("out_bias", shape=(channels,)) - out_scale = relay.const(_get_positive_scale((channels, 1, 1))) - y1 = fbefore(x, weight, out_bias, out_scale, channels) + if blocking: + out_bias = relay.var("out_bias", shape=(channels // blocking[1], 1, 1, blocking[1])) + out_scale = relay.const(_get_positive_scale((channels // blocking[1], 1, 1, blocking[1]))) + else: + out_bias = relay.var("out_bias", shape=(channels, 1, 1)) + out_scale = relay.const(_get_positive_scale((channels, 1, 1))) + y1 = fbefore(x, weight, out_bias, out_scale, in_channels, channels, blocking) y1 = run_opt_pass(y1, transform.InferType()) y1_folded = run_opt_pass(y1, transform.BackwardFoldScaleAxis()) assert tvm.ir.structural_equal(y1_folded, y1) - check((4, 4, 10, 10), 4, fail1) - check((4, 4, 10, 10), 4, fail2) + check((4, 4, 10, 10), 4, 4, None, fail1) + check((2, 2, 10, 10, 2), 4, 4, (2, 2), fail1) + check((4, 4, 10, 10), 4, 4, None, fail2) + check((4, 2, 10, 10, 2), 4, 4, (2, 2), fail2) def test_fold_bwd_relu_fail(): """testcase where we canont fold because scale can not pass relu""" - def before(x, conv_weight, out_scale, channels): + def before(x, conv_weight, out_scale, channels, blocking): y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - data_layout="NCHW", - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y = relay.nn.relu(y) y = relay.multiply(x, out_scale) return relay.Function(relay.analysis.free_vars(y), y) - def check(shape, channels, out_scale): + def check(shape, channels, blocking, out_scale): x = relay.var("x", shape=shape) in_channels = shape[1] weight = relay.var("weight") - y1 = before(x, weight, out_scale, channels) + y1 = before(x, weight, out_scale, channels, blocking) y1 = run_opt_pass(y1, transform.InferType()) y1_folded = run_opt_pass(y1, transform.BackwardFoldScaleAxis()) assert tvm.ir.structural_equal(y1, y1_folded) out_scale = relay.var("in_scale", shape=(4, 1, 1)) - check((4, 4, 10, 10), 4, out_scale) + check((4, 4, 10, 10), 4, None, out_scale) out_scale = relay.const(np.random.uniform(size=(4, 1, 1), low=-1.0, high=0.0)).astype("float32") - check((4, 4, 10, 10), 4, out_scale) + check((4, 4, 10, 10), 4, None, out_scale) + + out_scale = relay.var("in_scale", shape=(1, 2, 1, 1, 2)) + check((4, 2, 10, 10, 2), 4, (2, 2), out_scale) + out_scale = relay.const(np.random.uniform(size=(1, 2, 1, 1, 2), low=-1.0, high=0.0)).astype("float32") + check((4, 2, 10, 10, 2), 4, (2, 2), out_scale) def test_fold_bwd_negative_scale(): """Testcase of folding negative scale""" - def before(x, conv_weight, out_scale, channels): + def before(x, conv_weight, out_scale, channels, blocking): args = [x, conv_weight] y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") y = relay.multiply(y, out_scale) return relay.Function(args, y) - def expected(x, conv_weight, out_scale, channels): + def expected(x, conv_weight, out_scale, channels, blocking): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight] - squeezed_scale = relay.squeeze(out_scale, axis=[1,2]) - conv_weight = relay.multiply( - conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3)) + if blocking: + squeezed_scale = relay.squeeze(out_scale, axis=[0,2,3]) + conv_weight = relay.multiply( + conv_weight , relay.reshape(squeezed_scale, (channels//blocking[1], 1, 1, 1, 1, blocking[1]))) + else: + squeezed_scale = relay.squeeze(out_scale, axis=[1,2]) + conv_weight = relay.multiply( + conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3)) y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), - padding=(1, 1)) + padding=(1, 1), + data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", + kernel_layout="OIHW1i{}o".format(blocking[1]) if blocking else "OIHW") return relay.Function(args, y) - def check(shape, channels): + def check(shape, channels, blocking): x = relay.var("x", shape=shape) weight = relay.var("weight") - out_scale = relay.const(-_get_positive_scale((channels, 1, 1))) - y1 = before(x, weight, out_scale, channels) + if blocking: + out_scale = relay.const(-_get_positive_scale((1,channels//blocking[1], 1, 1, blocking[1]))) + else: + out_scale = relay.const(-_get_positive_scale((channels, 1, 1))) + y1 = before(x, weight, out_scale, channels, blocking) y1 = run_opt_pass(y1, transform.InferType()) type_dict = {x.name_hint:x.checked_type for x in y1.params} weight = relay.var("weight", type_dict["weight"]) y1_folded = run_opt_pass(y1, transform.BackwardFoldScaleAxis()) - y1_expected = expected(x, weight, out_scale, channels) + y1_expected = expected(x, weight, out_scale, channels, blocking) y1_expected = run_opt_pass(y1_expected, transform.InferType()) assert tvm.ir.structural_equal(y1_folded, y1_expected) - check((2, 4, 10, 10), 8) - + check((2, 4, 10, 10), 8, None) + check((2, 2, 10, 10, 2), 8, (2, 2)) if __name__ == "__main__": test_fold_fwd_simple() diff --git a/topi/python/topi/x86/conv2d_alter_op.py b/topi/python/topi/x86/conv2d_alter_op.py index 5ee691b07362..d1c607f6a3e5 100644 --- a/topi/python/topi/x86/conv2d_alter_op.py +++ b/topi/python/topi/x86/conv2d_alter_op.py @@ -19,6 +19,7 @@ import logging +import re import tvm from tvm import te from tvm import relay @@ -31,6 +32,9 @@ logger = logging.getLogger('topi') +_NCHWc_matcher = re.compile("^NCHW[0-9]+c$") +_OIHWio_matcher = re.compile("^OIHW[0-9]+i[0-9]+o$") + @conv2d_alter_layout.register("cpu") def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): target = tvm.target.Target.current(allow_none=False) @@ -64,30 +68,33 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): if topi_tmpl == "conv2d_NCHWc.x86": # we only convert conv2d_NCHW to conv2d_NCHWc for x86 - assert data_layout == "NCHW" and kernel_layout == "OIHW" - if cfg.is_fallback: - _get_default_config(cfg, data_tensor, kernel_tensor, strides, padding, - out_dtype, False, data_layout) - batch_size, in_channel, height, width = get_const_tuple(data_tensor.shape) - out_channel, _, kh, kw = get_const_tuple(kernel_tensor.shape) - ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1] - - # update new attrs - new_attrs['channels'] = out_channel - new_attrs['data_layout'] = 'NCHW%dc' % ic_bn - # (oc, ic, h, w) -> (OC, IC, h, w, ic, oc) - new_attrs['kernel_layout'] = 'OIHW%di%do' % (ic_bn, oc_bn) - new_attrs['out_layout'] = 'NCHW%dc' % oc_bn - - # Store altered operator's config - new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), - dtype=data_dtype) - new_kernel = te.placeholder((out_channel//oc_bn, in_channel//ic_bn, - kh, kw, ic_bn, oc_bn), dtype=kernel_tensor.dtype) - new_workload = autotvm.task.args_to_workload( - [new_data, new_kernel, strides, padding, dilation, new_attrs["data_layout"], - new_attrs["out_layout"], out_dtype], topi_tmpl) - dispatch_ctx.update(target, new_workload, cfg) + if data_layout == "NCHW" and kernel_layout == "OIHW": + if cfg.is_fallback: + _get_default_config(cfg, data_tensor, kernel_tensor, strides, padding, + out_dtype, False, data_layout) + batch_size, in_channel, height, width = get_const_tuple(data_tensor.shape) + out_channel, _, kh, kw = get_const_tuple(kernel_tensor.shape) + ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1] + + # update new attrs + new_attrs['channels'] = out_channel + new_attrs['data_layout'] = 'NCHW%dc' % ic_bn + # (oc, ic, h, w) -> (OC, IC, h, w, ic, oc) + new_attrs['kernel_layout'] = 'OIHW%di%do' % (ic_bn, oc_bn) + new_attrs['out_layout'] = 'NCHW%dc' % oc_bn + + # Store altered operator's config + new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), + dtype=data_dtype) + new_kernel = te.placeholder((out_channel//oc_bn, in_channel//ic_bn, + kh, kw, ic_bn, oc_bn), dtype=kernel_tensor.dtype) + new_workload = autotvm.task.args_to_workload( + [new_data, new_kernel, strides, padding, dilation, new_attrs["data_layout"], + new_attrs["out_layout"], out_dtype], topi_tmpl) + dispatch_ctx.update(target, new_workload, cfg) + else: + assert _NCHWc_matcher.match(data_layout) + assert _OIHWio_matcher.match(kernel_layout) return relay.nn.contrib_conv2d_nchwc(*inputs, **new_attrs) if topi_tmpl == "conv2d_NCHWc_int8.x86": @@ -136,30 +143,34 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): return relay.nn.contrib_conv2d_nchwc(data_expr, kernel_OIHWioe, **new_attrs) if topi_tmpl == "depthwise_conv2d_NCHWc.x86": - assert data_layout == "NCHW" and kernel_layout == "OIHW" - if cfg.is_fallback: - _get_default_config(cfg, data_tensor, kernel_tensor, strides, padding, - out_dtype, True, data_layout) - - batch_size, in_channel, height, width = get_const_tuple(data_tensor.shape) - out_channel, channel_multiplier, kh, kw = get_const_tuple(kernel_tensor.shape) - ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1] - assert channel_multiplier == 1 - - # update new attrs - new_attrs['channels'] = out_channel - new_attrs['data_layout'] = 'NCHW%dc' % ic_bn - new_attrs['kernel_layout'] = 'OIHW1i%do' % oc_bn - new_attrs['out_layout'] = 'NCHW%dc' % oc_bn - - # Store altered operator's config. - new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), - dtype=data_dtype) - new_kernel = te.placeholder((out_channel//oc_bn, 1, kh, kw, 1, oc_bn), dtype=kernel_dtype) - new_workload = autotvm.task.args_to_workload( - [new_data, new_kernel, strides, padding, dilation, new_attrs['data_layout'], - new_attrs['out_layout'], out_dtype], topi_tmpl) - dispatch_ctx.update(target, new_workload, cfg) + if data_layout == "NCHW" and kernel_layout == "OIHW": + if cfg.is_fallback: + _get_default_config(cfg, data_tensor, kernel_tensor, strides, padding, + out_dtype, True, data_layout) + + batch_size, in_channel, height, width = get_const_tuple(data_tensor.shape) + out_channel, channel_multiplier, kh, kw = get_const_tuple(kernel_tensor.shape) + ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1] + assert channel_multiplier == 1 + + # update new attrs + new_attrs['channels'] = out_channel + new_attrs['data_layout'] = 'NCHW%dc' % ic_bn + new_attrs['kernel_layout'] = 'OIHW1i%do' % oc_bn + new_attrs['out_layout'] = 'NCHW%dc' % oc_bn + + # Store altered operator's config. + new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), + dtype=data_dtype) + new_kernel = te.placeholder((out_channel//oc_bn, 1, kh, kw, 1, oc_bn), + dtype=kernel_dtype) + new_workload = autotvm.task.args_to_workload( + [new_data, new_kernel, strides, padding, dilation, new_attrs['data_layout'], + new_attrs['out_layout'], out_dtype], topi_tmpl) + dispatch_ctx.update(target, new_workload, cfg) + else: + assert _NCHWc_matcher.match(data_layout) + assert _OIHWio_matcher.match(kernel_layout) return relay.nn.contrib_depthwise_conv2d_nchwc(*inputs, **new_attrs) return None