Skip to content

Commit

Permalink
[BYOC][ACL] Support asymmetric per-layer quantized operators (apache#…
Browse files Browse the repository at this point in the history
…6109)

* [BYOC][ACL] Support asymmetric per-layer quantization

Adds support for asymmetric per-layer quantization in the ACL runtime. This includes support for qnn.conv2d, nn.maxpool2d and reshape. Reflected these changes in codegen and runtime tests.

Change-Id: I8f610bd37af1e3740fd48c2d502bcc4727d9d712

* Address comments

Change-Id: I4f9e3e7dbf6053066927cf07c4c19ecc88572e9d

* Fix tutorial

Change-Id: I4371e9d97a120fb7776db40ffcde60f46927af4d

* Improve test infrastructure

* Doc-string for generate trials
* Output params on error

Change-Id: Ib2e2b1fcdf05cdc77f7f4fb4b46395f28c129957
  • Loading branch information
lhutton1 authored and Trevor Morris committed Aug 26, 2020
1 parent 2038fd6 commit 65b4b08
Show file tree
Hide file tree
Showing 12 changed files with 865 additions and 268 deletions.
27 changes: 27 additions & 0 deletions docs/deploy/arm_compute_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,33 @@ networks refer to the tests: `tests/python/contrib/test_arm_compute_lib`. Here y
`infrastructure.py` to use the remote device you have setup.


Operator support
----------------
+--------------+-------------------------------------------------------------------------+
| Relay Node | Remarks |
+==============+=========================================================================+
| nn.conv2d | fp32: |
| | Simple: nn.conv2d |
| | Composite: nn.pad?, nn.conv2d, nn.bias_add?, nn.relu? |
| | |
| | (only groups = 1 supported) |
+--------------+-------------------------------------------------------------------------+
| qnn.conv2d | uint8: |
| | Composite: nn.pad?, nn.conv2d, nn.bias_add?, nn.relu?, qnn.requantize |
| | |
| | (only groups = 1 supported) |
+--------------+-------------------------------------------------------------------------+
| nn.maxpool2d | fp32, uint8 |
+--------------+-------------------------------------------------------------------------+
| reshape | fp32, uint8 |
+--------------+-------------------------------------------------------------------------+

.. note::
A composite operator is a series of operators that map to a single Arm Compute Library operator. You can view this
as being a single fused operator from the view point of Arm Compute Library. '?' denotes an optional operator in
the series of operators that make up a composite operator.


Adding a new operator
---------------------
Adding a new operator requires changes to a series of places. This section will give a hint on
Expand Down
50 changes: 47 additions & 3 deletions python/tvm/relay/op/contrib/arm_compute_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,41 @@ def conv_pattern():
pattern = pattern.optional(is_op('nn.relu'))
return pattern

def qnn_conv_pattern():
"""Create a quantized convolution pattern.
Returns
-------
pattern : dataflow_pattern.AltPattern
Denotes the convolution pattern.
"""
pattern = is_op('nn.pad')(wildcard()) | wildcard()
pattern = is_op('qnn.conv2d')(
pattern, is_constant(), is_constant(), is_constant(), is_constant(), is_constant())
pattern = pattern.optional(lambda x: is_op('nn.bias_add')(x, is_constant()))
pattern = pattern.optional(is_op('nn.relu'))
pattern = is_op('qnn.requantize')(
pattern, wildcard(), wildcard(), is_constant(), is_constant())
return pattern

def check_conv(extract):
"""Check conv pattern is supported by ACL."""
call = extract
while call.op.name != "nn.conv2d":
call = call.args[0]
return conv2d(call.attrs, call.args)

return [('arm_compute_lib.conv2d', conv_pattern(), check_conv)]
def check_qnn_conv(extract):
"""Check qnn conv pattern is supported by ACL."""
if extract.attrs.out_dtype != "uint8":
return False
call = extract
while call.op.name != "qnn.conv2d":
call = call.args[0]
return qnn_conv2d(call.attrs, call.args)

return [('arm_compute_lib.conv2d', conv_pattern(), check_conv),
('arm_compute_lib.qnn_conv2d', qnn_conv_pattern(), check_qnn_conv)]


def _register_external_op_helper(op_name, supported=True):
Expand All @@ -115,7 +142,24 @@ def conv2d(attrs, args):
if len(data_typ.shape) != 4 or data_typ.shape[0] != 1 or data_typ.dtype != "float32":
return False
kernel_typ = args[1].checked_type
if kernel_typ.dtype != "float32":
if len(kernel_typ.shape) != 4 or kernel_typ.dtype != "float32":
return False
return True


def qnn_conv2d(attrs, args):
"""Check if the external ACL codegen for qnn.conv2d should be used."""
if attrs.groups != 1:
return False
if attrs.data_layout != "NHWC":
return False
if attrs.out_dtype != "int32" and attrs.out_dtype != "":
return False
data_typ = args[0].checked_type
if len(data_typ.shape) != 4 or data_typ.shape[0] != 1 or data_typ.dtype != "uint8":
return False
kernel_typ = args[1].checked_type
if len(kernel_typ.shape) != 4 or kernel_typ.dtype != "uint8":
return False
return True

Expand All @@ -126,6 +170,6 @@ def max_pool2d(attrs, args):
if attrs.layout != "NHWC":
return False
typ = args[0].checked_type
if typ.dtype != "float32":
if typ.dtype not in ["float32", "uint8"]:
return False
return True
17 changes: 14 additions & 3 deletions python/tvm/relay/qnn/op/layout_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

from tvm.relay.op import op as reg

from ...op.strategy.generic import is_depthwise_conv2d


@reg.register_convert_op_layout("qnn.conv2d")
def convert_qnn_conv2d(attrs, inputs, tinfos, desired_layouts):
Expand Down Expand Up @@ -51,11 +53,20 @@ def convert_qnn_conv2d(attrs, inputs, tinfos, desired_layouts):
new_attrs = dict(attrs)
new_attrs['data_layout'] = desired_data_layout

if desired_kernel_layout != "default":
new_attrs['kernel_layout'] = desired_kernel_layout
return relay.qnn.op.conv2d(*inputs, **new_attrs)

if desired_data_layout == 'NCHW':
if desired_kernel_layout != "default":
new_attrs['kernel_layout'] = desired_kernel_layout
new_attrs['kernel_layout'] = 'OIHW'
return relay.qnn.op.conv2d(*inputs, **new_attrs)
if desired_data_layout == 'NHWC':
# Check for depthwise convolution.
if is_depthwise_conv2d(inputs[0].shape, attrs['data_layout'], inputs[1].shape,
attrs['kernel_layout'], attrs['groups']):
new_attrs['kernel_layout'] = 'HWOI'
else:
new_attrs['kernel_layout'] = 'OIHW'
new_attrs['kernel_layout'] = 'HWIO'
return relay.qnn.op.conv2d(*inputs, **new_attrs)

raise ValueError('Layout %s is not yet supported' % desired_data_layout)
94 changes: 68 additions & 26 deletions src/relay/backend/contrib/arm_compute_lib/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
public:
ACLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {}

/*!
* \brief A series of operators that form a composite
* convolution. Supports both nn.conv2d and qnn.conv2d.
*/
struct CompositeConvNode {
const CallNode* pad = nullptr;
const CallNode* conv = nullptr;
const CallNode* bias = nullptr;
const CallNode* activation = nullptr;
const CallNode* requantize = nullptr;
};

/*!
* \brief Visit call nodes and generate appropriate JSON node.
*
Expand All @@ -68,7 +80,7 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
CHECK(comp.defined()) << "Arm Compute Library JSON runtime only supports composite functions.";
const std::string name = comp.value();
std::shared_ptr<JSONGraphNode> json_node;
if (name == "arm_compute_lib.conv2d") {
if (name == "arm_compute_lib.conv2d" || name == "arm_compute_lib.qnn_conv2d") {
json_node = CreateCompositeConvJSONNode(cn);
} else {
LOG(FATAL) << "Unrecognized Arm Compute Library pattern: " << name;
Expand All @@ -78,57 +90,86 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {

private:
/*!
* \brief Create a JSON representation of a composite convolution.
* \brief Extract convolution nodes from a composite function.
*
* \param call The call to be represented.
* \return A JSON representation of a specific operator.
* \param cn The call node of the composite function.
* \return Extracted composite convolution nodes.
*/
std::shared_ptr<JSONGraphNode> CreateCompositeConvJSONNode(const CallNode* cn) {
const std::string name = "nn.conv2d";
const CallNode* pad = nullptr;
const CallNode* conv = nullptr;
const CallNode* bias = nullptr;
bool has_activation = false;

// Unpack composite function
static CompositeConvNode UnpackCompositeConvolution(const CallNode* cn) {
CompositeConvNode nodes{};
const auto* fn = cn->op.as<FunctionNode>();
CHECK(fn);

// Traverse composite convolution function from child to parent
const auto* current_call = fn->body.as<CallNode>();
if (backend::IsOp(current_call, "qnn.requantize")) {
nodes.requantize = current_call;
current_call = current_call->args[0].as<CallNode>();
}
if (backend::IsOp(current_call, "nn.relu")) {
has_activation = true;
nodes.activation = current_call;
current_call = current_call->args[0].as<CallNode>();
}
if (backend::IsOp(current_call, "nn.bias_add")) {
bias = current_call;
nodes.bias = current_call;
current_call = current_call->args[0].as<CallNode>();
}
CHECK(backend::IsOp(current_call, "nn.conv2d"));
conv = current_call;
// Enforce a convolution node exists at this point during traversal
if (nodes.requantize) {
CHECK(backend::IsOp(current_call, "qnn.conv2d"));
} else {
CHECK(backend::IsOp(current_call, "nn.conv2d"));
}
nodes.conv = current_call;
if (!current_call->args.empty() && current_call->args[0]->IsInstance<CallNode>()) {
current_call = current_call->args[0].as<CallNode>();
if (backend::IsOp(current_call, "nn.pad")) {
pad = current_call;
nodes.pad = current_call;
}
}
return nodes;
}

/*!
* \brief Create a JSON representation of a composite convolution.
*
* \param cn The call to be represented.
* \return A JSON representation of a specific operator.
*/
std::shared_ptr<JSONGraphNode> CreateCompositeConvJSONNode(const CallNode* cn) {
CompositeConvNode nodes = UnpackCompositeConvolution(cn);
std::string name = "nn.conv2d";

const auto* conv_attr = conv->attrs.as<Conv2DAttrs>();
const auto* conv_attr = nodes.conv->attrs.as<Conv2DAttrs>();
CHECK(conv_attr);
CHECK(conv_attr->kernel_layout == "OHWI")
<< "Kernel layout must be OHWI, has the module been pre-processed correctly?";

// Inputs must be added in the same order they appear in the relay graph.
std::vector<JSONGraphNodeEntry> inputs;
inputs.push_back(VisitExpr(cn->args[0])[0]);
inputs.push_back(VisitExpr(conv->args[1])[0]);
if (bias) {
inputs.push_back(VisitExpr(bias->args[1])[0]);
inputs.push_back(VisitExpr(nodes.conv->args[1])[0]);
if (nodes.requantize) {
name = "qnn.conv2d";
inputs.push_back(VisitExpr(nodes.conv->args[2])[0]); // input zero-point
inputs.push_back(VisitExpr(nodes.conv->args[3])[0]); // kernel zero-point
inputs.push_back(VisitExpr(nodes.conv->args[4])[0]); // input scale
inputs.push_back(VisitExpr(nodes.conv->args[5])[0]); // kernel scale
}
if (nodes.bias) {
inputs.push_back(VisitExpr(nodes.bias->args[1])[0]);
}
if (nodes.requantize) {
inputs.push_back(VisitExpr(nodes.requantize->args[3])[0]); // output scale
inputs.push_back(VisitExpr(nodes.requantize->args[4])[0]); // output zero-point
}

auto json_node = std::make_shared<JSONGraphNode>(name, "kernel", inputs, 1);
SetCallNodeAttribute(json_node, conv);
SetCallNodeAttribute(json_node, nodes.conv);

// Override attributes
if (pad) {
const auto* pad_attr = pad->attrs.as<PadAttrs>();
if (nodes.pad) {
const auto* pad_attr = nodes.pad->attrs.as<PadAttrs>();
CHECK(pad_attr);
auto p = pad_attr->pad_width;
// Convert to TVM layout for now, conversion to ACL layout takes place in runtime.
Expand All @@ -141,7 +182,7 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
padding_attr.emplace_back(padding);
json_node->SetAttr("padding", padding_attr);
}
if (has_activation) {
if (nodes.activation) {
std::vector<std::string> activation_type = {"relu"};
std::vector<dmlc::any> act_attr;
act_attr.emplace_back(activation_type);
Expand All @@ -161,7 +202,8 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
*/
IRModule PreProcessModule(const IRModule& mod) {
IRModule preprocessed_module;
tvm::Map<String, Array<String>> desired_layouts = {{"nn.conv2d", {"NHWC", "OHWI"}}};
tvm::Map<String, Array<String>> desired_layouts = {{"nn.conv2d", {"NHWC", "OHWI"}},
{"qnn.conv2d", {"NHWC", "OHWI"}}};
preprocessed_module = transform::ConvertLayout(desired_layouts)(mod);
preprocessed_module = transform::FoldConstant()(preprocessed_module);
return preprocessed_module;
Expand Down
Loading

0 comments on commit 65b4b08

Please sign in to comment.