Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BYOC][ETHOSN] Add support for quantized convolution #6335

Merged
merged 4 commits into from
Aug 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions python/tvm/relay/op/contrib/ethosn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
"""Arm(R) Ethos(TM) -N NPU supported operators."""
from enum import Enum
import tvm.ir
from ...dataflow_pattern import wildcard, is_op, is_constant
from ... import qnn as _qnn
from .register import register_pattern_table
from . import _ethosn as support


Expand All @@ -40,6 +42,30 @@ def ethosn_available():
return Available.SW_AND_HW if hw else Available.SW_ONLY


@register_pattern_table("ethos-n")
def pattern_table():
"""Get the Ethos-N compiler pattern table."""
def qnn_conv_pattern():
pattern = is_op('nn.pad')(wildcard()) | wildcard()
pattern = is_op('qnn.conv2d')(
pattern, is_constant(), is_constant(), is_constant(), is_constant(), is_constant())
pattern = is_op('nn.bias_add')(pattern, is_constant())
pattern = is_op('qnn.requantize')(
pattern, is_constant(), is_constant(), is_constant(), is_constant())
return pattern

def check_conv2d(extract):
"""Check if a conv2d is supported by Ethos-N."""
if not ethosn_available():
return False

return support.conv2d(extract)

return [
("ethos-n.qnn_conv2d", qnn_conv_pattern(), check_conv2d),
]


@tvm.ir.register_op_attr("qnn.concatenate", "target.ethos-n")
def qnn_concatenate(attrs, args):
"""Check if a concatenate is supported by Ethos-N."""
Expand Down
43 changes: 41 additions & 2 deletions src/relay/backend/contrib/ethosn/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ bool IsEthosnOp(const Call& call, const std::string& op_name) {
}
}

bool IsEthosnFunc(const Call& call, const std::string& op_name) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to me that this function is not just for checking if a call is for an Ethos-N composite function. Maybe we should move it to a common place, so does IsEthosnOp.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have a suggestion of a good common location? We could maybe handle this in a follow-up.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No idea... Let's leave them here for now.

if (call->op->IsInstance<FunctionNode>()) {
Function func = Downcast<Function>(call->op);
CHECK(func.defined());
auto name_node = func->GetAttr<String>(attr::kComposite);
return name_node.value() == op_name;
}
return false;
}

std::map<Expr, std::vector<sl::TensorInfo>> InferTensorsVisitor::Infer(const Expr& expr) {
tensor_table_.clear();
CHECK(expr->checked_type().defined());
Expand All @@ -69,7 +79,11 @@ void InferTensorsVisitor::InferCall(const CallNode* cn) {
EthosnError err;
Call call = GetRef<Call>(cn);
// Determine call -> NPU mapping
if (IsEthosnOp(call, "qnn.concatenate")) {
if (IsEthosnFunc(call, "ethos-n.qnn_conv2d")) {
ConvolutionParams params;
err += EthosnAPI::QnnConv2d(cn->op.as<FunctionNode>()->body, &params);
tensor_table_[cn->args[0]] = {params.activation_info};
} else if (IsEthosnOp(call, "qnn.concatenate")) {
ConcatenateParams params;
err = EthosnAPI::Concatenate(call, &params);
tensor_table_[cn->args[0]] = params.input_infos;
Expand Down Expand Up @@ -181,7 +195,10 @@ sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) {
sl::TensorAndId<sl::Operand> tensor;
sl::TensorsAndId tensors;
// Determine call -> NPU mapping
if (IsEthosnOp(call, "qnn.concatenate")) {
if (IsEthosnFunc(call, "ethos-n.qnn_conv2d")) {
if ((err = MakeConvolutionLayer(call, &tensor))) ReportFatalError(call, err);
return MakeOps(tensor);
} else if (IsEthosnOp(call, "qnn.concatenate")) {
if ((err = MakeConcatenateLayer(call, &tensor))) ReportFatalError(call, err);
return MakeOps(tensor);
} else if (IsEthosnOp(call, "split")) {
Expand Down Expand Up @@ -227,6 +244,28 @@ void ConstructNetworkVisitor::VisitLeaf(const Expr& expr) {
if (!expr->IsInstance<FunctionNode>()) MixedModeVisitor::VisitLeaf(expr);
}

EthosnError ConstructNetworkVisitor::MakeConvolutionLayer(const Call& call,
sl::TensorAndId<sl::Operand>* out) {
ConvolutionParams params;
if (auto err = EthosnAPI::QnnConv2d(call->op.as<FunctionNode>()->body, &params)) {
return err;
}

auto activation = operand_table_[call->args[0]][0];
auto weights = AddConstant(network_, params.weights_info, params.raw_weights).tensor;
auto bias = AddConstant(network_, params.bias_info, params.raw_bias).tensor;
try {
if (params.is_depthwise) {
*out = AddDepthwiseConvolution(network_, *activation, *bias, *weights, params.conv_info);
} else {
*out = AddConvolution(network_, *activation, *bias, *weights, params.conv_info);
}
} catch (const sl::NotSupportedException& e) {
return EthosnError(e.what());
}
return EthosnError();
}

EthosnError ConstructNetworkVisitor::MakeConcatenateLayer(const Call& call,
sl::TensorAndId<sl::Operand>* out) {
ConcatenateParams params;
Expand Down
1 change: 1 addition & 0 deletions src/relay/backend/contrib/ethosn/codegen_ethosn.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ class ConstructNetworkVisitor : public MixedModeVisitor, private ErrorReportingP
void VisitLeaf(const Expr& expr) final;

// Make a support library operand from a Call
EthosnError MakeConvolutionLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
EthosnError MakeConcatenateLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
EthosnError MakeSplitLayer(const Call& call, sl::TensorsAndId* outs);

Expand Down
190 changes: 190 additions & 0 deletions src/relay/backend/contrib/ethosn/ethosn_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,105 @@ namespace relay {
namespace contrib {
namespace ethosn {

EthosnError EthosnAPI::QnnConv2d(const Expr& expr, ConvolutionParams* params) {
Call requantize = Downcast<Call>(expr);
Call bias_add = Downcast<Call>(requantize->args[0]);
Call conv = Downcast<Call>(bias_add->args[0]);
Call pad;
if (conv->args[0]->IsInstance<CallNode>() &&
Downcast<Call>(conv->args[0])->op == Op::Get("nn.pad"))
pad = Downcast<Call>(conv->args[0]);
const auto& conv_attr = conv->attrs.as<Conv2DAttrs>();

// Extract the quantization params from the arguments
int input_zero_point;
int kernel_zero_point;
int output_zero_point;
float input_scale;
float kernel_scale;
float output_scale;
EthosnError err = AsConstant<int>(conv->args[2], &input_zero_point);
err += AsConstant<int>(conv->args[3], &kernel_zero_point);
err += AsConstant<int>(requantize->args[4], &output_zero_point);
err += AsConstant<float>(conv->args[4], &input_scale);
err += AsConstant<float>(conv->args[5], &kernel_scale);
err += AsConstant<float>(requantize->args[3], &output_scale);

// Convert quantization params
sl::QuantizationInfo data_q_info;
sl::QuantizationInfo weights_q_info;
sl::QuantizationInfo bias_q_info;
sl::QuantizationInfo output_q_info;
err += Tvm2Npu(input_zero_point, input_scale, &data_q_info);
err += Tvm2Npu(kernel_zero_point, kernel_scale, &weights_q_info);
err += Tvm2Npu(0, data_q_info.m_Scale * weights_q_info.m_Scale, &bias_q_info);
err += Tvm2Npu(output_zero_point, output_scale, &output_q_info);

// Convert convolution attributes
sl::Padding padding;
if (pad.defined()) {
Tvm2Npu(conv_attr->padding, &padding);
// Don't support both standalone operator padding and attribute defined padding
if (padding != sl::Padding({0, 0, 0, 0})) {
err += EthosnError(
ErrStrm() << "both op and attr padding exist, must be either op/attr only or no padding");
}
err += Tvm2Npu(pad->attrs.as<PadAttrs>()->pad_width, &padding);
} else {
err += Tvm2Npu(conv_attr->padding, &padding);
}
sl::Stride stride;
err += Tvm2Npu(conv_attr->strides, &stride);
// Dilation is not supported
std::array<uint32_t, 4> dilation = {1, 1, 1, 1};
AsArray(conv_attr->dilation, &dilation);
if (conv_attr->dilation.size() != 2 || dilation[0] != 1 || dilation[1] != 1) {
err +=
EthosnError(ErrStrm() << "dilation=" << conv_attr->dilation << ", dilation must = [1, 1]");
}
// Create convolution info
params->conv_info = sl::ConvolutionInfo(padding, stride, output_q_info);

// Create data info
const TensorTypeNode* data_dtype;
if (pad.defined()) {
data_dtype = pad->args[0]->checked_type().as<TensorTypeNode>();
} else {
data_dtype = conv->args[0]->checked_type().as<TensorTypeNode>();
}
sl::TensorShape activation_tensor_shape;
sl::DataType activation_data_type;
err += Tvm2Npu(data_dtype->shape, &activation_tensor_shape);
err += Tvm2Npu(data_dtype->dtype, &activation_data_type);
params->activation_info = sl::TensorInfo(activation_tensor_shape, activation_data_type,
sl::DataFormat::NHWC, data_q_info);

// Create weights info
params->is_depthwise = conv_attr->channels.defined() &&
tvm::tir::ExprDeepEqual()(conv_attr->channels, conv_attr->groups) &&
conv_attr->groups != 1;

const auto* weights_dtype = conv->args[1]->checked_type().as<TensorTypeNode>();
sl::TensorShape weights_tensor_shape;
sl::DataType weights_data_type;
sl::DataFormat weights_data_format;
// Ignore the error here because weights don't have a batch axis
Tvm2Npu(weights_dtype->shape, &weights_tensor_shape);
err += Tvm2Npu(weights_dtype->dtype, &weights_data_type);
err += Tvm2Npu(params->is_depthwise ? "HWIM" : "HWIO", &weights_data_format);
params->weights_info =
sl::TensorInfo(weights_tensor_shape, weights_data_type, weights_data_format, weights_q_info);
params->raw_weights = conv->args[1].as<ConstantNode>()->data->data;

// Create bias info
params->bias_info = sl::TensorInfo(
{1, 1, 1, params->is_depthwise ? weights_tensor_shape[2] : weights_tensor_shape[3]},
sl::DataType::INT32_QUANTIZED, sl::DataFormat::NHWC, bias_q_info);
params->raw_bias = bias_add->args[1].as<ConstantNode>()->data->data;

return err;
}

EthosnError EthosnAPI::Concatenate(const Expr& expr, ConcatenateParams* params) {
Call call = Downcast<Call>(expr);
const auto& attrs = call->attrs.as<ConcatenateAttrs>();
Expand Down Expand Up @@ -107,6 +206,60 @@ EthosnError EthosnAPI::Split(const Expr& expr, SplitParams* params) {
return err;
}

EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& padding, sl::Padding* npu_padding) {
std::array<uint32_t, 4> dim;
if (EthosnError err = AsArray<IndexExpr, uint32_t>(padding, &dim)) {
return err;
}
switch (padding.size()) {
case 1:
*npu_padding = sl::Padding(dim[0], dim[0], dim[0], dim[0]);
break;
case 2:
// Height, width -> top, bottom, left, right
*npu_padding = sl::Padding(dim[0], dim[0], dim[1], dim[1]);
break;
case 4:
// Top, left, bottom, right -> top, bottom, left, right
*npu_padding = sl::Padding(dim[0], dim[2], dim[1], dim[3]);
break;
default:
return EthosnError(ErrStrm() << "padding tuple size=" << padding.size()
<< ", padding tuple size must be {1, 2, 4}");
}
return EthosnError();
}

EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& strides, sl::Stride* npu_stride) {
if (strides.size() != 2) {
return EthosnError(ErrStrm() << "stride size=" << strides.size() << ", stride size must = 2");
}
std::array<uint32_t, 4> dim;
if (EthosnError err = AsArray<IndexExpr, uint32_t>(strides, &dim)) {
return err;
}
*npu_stride = sl::Stride(dim[1], dim[0]);
return EthosnError();
}

EthosnError EthosnAPI::Tvm2Npu(const std::string& dformat, sl::DataFormat* data_format) {
if (dformat == "NCHW") {
*data_format = sl::DataFormat::NCHW;
return EthosnError();
} else if (dformat == "NHWC") {
*data_format = sl::DataFormat::NHWC;
return EthosnError();
} else if (dformat == "HWIO") {
*data_format = sl::DataFormat::HWIO;
return EthosnError();
} else if (dformat == "HWIM") {
*data_format = sl::DataFormat::HWIM;
return EthosnError();
}
return EthosnError(ErrStrm() << "format=" << dformat
<< ", format must be {NCHW, NHWC, HWIO, HWIM}");
}

EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& shape, sl::TensorShape* npu_shape) {
EthosnError err = AsArray<IndexExpr, uint32_t>(shape, npu_shape);
if (npu_shape->front() != 1) {
Expand All @@ -128,6 +281,29 @@ EthosnError EthosnAPI::Tvm2Npu(const tvm::DataType& dtype, sl::DataType* data_ty
return EthosnError(ErrStrm() << "dtype=\'" << dtype << "\', dtype must be either uint8 or int32");
}

EthosnError EthosnAPI::Tvm2Npu(int32_t zero_point, float scale, sl::QuantizationInfo* npu_qinfo) {
*npu_qinfo = sl::QuantizationInfo(zero_point, scale);
return EthosnError();
}

EthosnError EthosnAPI::Tvm2Npu(const Array<Array<Integer>>& padding, sl::Padding* npu_padding) {
if (padding.size() != 4) {
return EthosnError(ErrStrm() << "padding tuple size=" << padding.size()
<< ", padding tuple size must = 4");
}
Array<IndexExpr> reduced_padding;
reduced_padding.push_back(padding[1][0]);
reduced_padding.push_back(padding[1][1]);
reduced_padding.push_back(padding[2][0]);
reduced_padding.push_back(padding[2][1]);
std::array<uint32_t, 4> dim;
if (EthosnError err = AsArray<IndexExpr, uint32_t>(reduced_padding, &dim)) {
return err;
}
*npu_padding = sl::Padding(dim[0], dim[1], dim[2], dim[3]);
return EthosnError();
}

// Convert an array of IntImmNodes into ValueT
// IndexT type of Array indexing variable
// ValueT type of resulting value
Expand Down Expand Up @@ -158,6 +334,20 @@ EthosnError EthosnAPI::AsConstant(const Expr& expr, T* out) {
return EthosnError();
}

TVM_REGISTER_GLOBAL("relay.ethos-n.support.conv2d")
.set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
Call call = args[0];
ConvolutionParams params;
auto err = EthosnAPI::QnnConv2d(call, &params);
if (params.is_depthwise) {
*rv = !err && sl::IsDepthwiseConvolutionSupported(params.bias_info, params.weights_info,
params.conv_info, params.activation_info);
} else {
*rv = !err && sl::IsConvolutionSupported(params.bias_info, params.weights_info,
params.conv_info, params.activation_info);
}
});

TVM_REGISTER_GLOBAL("relay.ethos-n.support.concatenate")
.set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
Call call = args[0];
Expand Down
22 changes: 22 additions & 0 deletions src/relay/backend/contrib/ethosn/ethosn_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ namespace ethosn {

namespace sl = ::ethosn::support_library;

struct ConvolutionParams {
sl::ConvolutionInfo conv_info;
sl::TensorInfo activation_info;
sl::TensorInfo weights_info;
sl::TensorInfo bias_info;
void* raw_weights = nullptr;
void* raw_bias = nullptr;
bool is_depthwise = false;
};

struct ConcatenateParams {
sl::QuantizationInfo qInfo;
sl::ConcatenationInfo concat_info = sl::ConcatenationInfo(1, qInfo);
Expand Down Expand Up @@ -115,6 +125,8 @@ class EthosnError {
*/
class EthosnAPI {
public:
/*! \brief Extract the Support Library convolution params from an ethos-n.qnn_conv2d func */
static EthosnError QnnConv2d(const Expr& expr, ConvolutionParams* params);
/*! \brief Extract the Support Library concatenate params from a Relay qnn.concatenate call */
static EthosnError Concatenate(const Expr& expr, ConcatenateParams* params);
/*! \brief Extract the Support Library split params from a Relay split call */
Expand All @@ -125,6 +137,16 @@ class EthosnAPI {
static EthosnError Tvm2Npu(const Array<IndexExpr>& shape, sl::TensorShape* npu_shape);
/*! \brief Convert a TVM data type to a SL data type */
static EthosnError Tvm2Npu(const tvm::DataType& dtype, sl::DataType* data_type);
/*! \brief Convert TVM 1D padding to SL padding */
static EthosnError Tvm2Npu(const Array<IndexExpr>& padding, sl::Padding* npu_padding);
/*! \brief Convert TVM 1D striding to SL striding */
static EthosnError Tvm2Npu(const Array<IndexExpr>& strides, sl::Stride* npu_stride);
/*! \brief Convert TVM data format to SL data format */
static EthosnError Tvm2Npu(const std::string& dformat, sl::DataFormat* data_format);
/*! \brief Convert TVM quantization info to SL quantization info */
static EthosnError Tvm2Npu(int32_t zero_point, float scale, sl::QuantizationInfo* npu_qinfo);
/*! \brief Convert TVM 2D padding to SL padding */
static EthosnError Tvm2Npu(const Array<Array<Integer>>& padding, sl::Padding* npu_padding);

// Convert an array of IntImmNodes into ValueT
// IndexT type of Array indexing variable
Expand Down
2 changes: 2 additions & 0 deletions tests/python/contrib/test_ethosn/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ def build(mod, params, npu=True, expected_host_ops=0, npu_partitions=1):
f = relay.build_module.bind_params_by_name(mod["main"], params)
mod = tvm.IRModule()
mod["main"] = f
pattern = get_pattern_table("ethos-n")
mod = relay.transform.MergeComposite(pattern)(mod)
mod = relay.transform.AnnotateTarget("ethos-n")(mod)
mod = relay.transform.MergeCompilerRegions()(mod)
mod = relay.transform.PartitionGraph()(mod)
Expand Down
Loading