Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QNN] Enable constant folding for QNN operations. #11228

Merged
merged 4 commits into from
May 13, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion include/tvm/relay/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,11 @@ TVM_DLL Pass LazyGradientInit();
/*!
* \brief Fold constant expressions.
*
* \param fold_qnn Whether to fold constants for QNN operations.
manupak marked this conversation as resolved.
Show resolved Hide resolved
*
* \return The pass.
*/
TVM_DLL Pass FoldConstant();
TVM_DLL Pass FoldConstant(bool fold_qnn = false);
masahi marked this conversation as resolved.
Show resolved Hide resolved

/*!
* \brief Split function with huge number of arguments to smaller pieces.
Expand Down
15 changes: 11 additions & 4 deletions python/tvm/relay/transform/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,32 +261,39 @@ def LazyGradientInit():
return _ffi_api.LazyGradientInit()


def FoldConstantExpr(expr, mod):
def FoldConstantExpr(expr, mod, fold_qnn=False):
"""Fold the constant expressions in a Relay program.
Parameters
----------
expr: Expr
The expression to fold
mod: IRModule
The module the expr lives in (for global calls)
fold_qnn: bool
Whether to fold constants for QNN operations.

Returns
-------
new_expr: Expr
The expr after Constant Folding
"""
return _ffi_api.FoldConstantExpr(expr, mod)
return _ffi_api.FoldConstantExpr(expr, mod, fold_qnn)


def FoldConstant():
def FoldConstant(fold_qnn=False):
"""Fold the constant expressions in a Relay program.

Parameters
----------
fold_qnn: bool
manupak marked this conversation as resolved.
Show resolved Hide resolved
Whether to fold constants for QNN operations.

Returns
-------
ret : tvm.transform.Pass
The registered pass for constant folding.
"""
return _ffi_api.FoldConstant()
return _ffi_api.FoldConstant(fold_qnn)


def FuseOps(fuse_opt_level=-1):
Expand Down
3 changes: 2 additions & 1 deletion src/relay/backend/interpreter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <tvm/relay/feature.h>
#include <tvm/relay/interpreter.h>
#include <tvm/relay/pattern_functor.h>
#include <tvm/relay/qnn/transform.h>
#include <tvm/relay/transform.h>
#include <tvm/runtime/container/map.h>
#include <tvm/runtime/device_api.h>
Expand Down Expand Up @@ -948,7 +949,7 @@ IRModule Prepare(IRModule mod, CompilationConfig config) {
VirtualDevice host_virtual_device = config->host_virtual_device;
// Run minimal transforms on module to establish invariants needed by interpreter.
transform::Sequential seq(
{transform::SimplifyInference(),
{transform::SimplifyInference(), qnn::transform::Legalize(),
// Figure out which devices should be used to execute.
// TODO(mbs): Should ignore all existing annotations when constant folding
transform::PlanDevices(std::move(config)),
Expand Down
27 changes: 18 additions & 9 deletions src/relay/transforms/fold_constant.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ bool IsComplexConstant(const Expr& expr) {
// or make a more powerful partial evaluator.
class ConstantFolder : public MixedModeMutator {
public:
explicit ConstantFolder(IRModule module)
explicit ConstantFolder(IRModule module, bool fold_qnn)
: module_(std::move(module)),
fold_qnn_(fold_qnn),
device_copy_op_(Op::Get("device_copy")),
shape_of_op_(Op::Get("shape_of")),
vm_shape_of_op_(Op::Get("vm.shape_of")),
Expand Down Expand Up @@ -158,8 +159,6 @@ class ConstantFolder : public MixedModeMutator {
return std::move(pre_call);
}

static auto fnoncomputational = Op::GetAttrMap<TNonComputational>("TNonComputational");

const auto* op_node = post_call->op.as<OpNode>();
if (op_node == nullptr) {
// Only evaluate primitives.
Expand All @@ -182,8 +181,15 @@ class ConstantFolder : public MixedModeMutator {
if (Optional<Expr> opt_result = EvaluateNdarraySize(pre_call)) {
return opt_result.value();
}
if ((fnoncomputational.count(op) && fnoncomputational[op]) || op == device_copy_op_ ||
op == shape_of_op_ || op == vm_shape_of_op_ || op == ndarray_size_op_) {
static auto fnoncomputational = Op::GetAttrMap<TNonComputational>("TNonComputational");
static auto qnn_canonicalize = Op::GetAttrMap<FTVMLegalize>("FTVMQnnCanonicalize");
bool is_no_qnn_canonicalized = !qnn_canonicalize.count(op);
bool is_no_computational = fnoncomputational.count(op) && fnoncomputational[op];
if (is_no_computational && (is_no_qnn_canonicalized || !fold_qnn_)) {
return std::move(post_call);
}
if (op == device_copy_op_ || op == shape_of_op_ || op == vm_shape_of_op_ ||
op == ndarray_size_op_) {
// We should think about potentially constant evaluation over these ops too.
return std::move(post_call);
}
Expand Down Expand Up @@ -387,6 +393,9 @@ class ConstantFolder : public MixedModeMutator {
// Module
IRModule module_;

// Whether to fold constants for QNN operations.
bool fold_qnn_;

// The kDLCPU device assumed to be available to the compiler. Used only when evaluating
// sub-expressions.
Device eval_cpu_dev_{kDLCPU, /*device_id=*/0};
Expand Down Expand Up @@ -417,20 +426,20 @@ TVM_REGISTER_GLOBAL("relay.analysis.check_constant").set_body_typed(IsComplexCon
* from their p.o.v. Furthermore, this function can be called before conversion to ANF so
* we must avoid all recursion.
*/
Expr FoldConstantExpr(const Expr& expr, const IRModule& mod) {
Expr FoldConstantExpr(const Expr& expr, const IRModule& mod, bool fold_qnn) {
VLOG_CONTEXT << "FoldConstantExpr";
VLOG(1) << "folding:" << std::endl << PrettyPrint(expr);
Expr result = ConstantFolder(mod).VisitExpr(expr);
Expr result = ConstantFolder(mod, fold_qnn).VisitExpr(expr);
VLOG(1) << "folded to:" << std::endl << PrettyPrint(result);
return result;
}

TVM_REGISTER_GLOBAL("relay._transform.FoldConstantExpr").set_body_typed(FoldConstantExpr);

Pass FoldConstant() {
Pass FoldConstant(bool fold_qnn) {
runtime::TypedPackedFunc<Function(Function, IRModule, PassContext)> pass_func =
[=](Function f, IRModule m, PassContext pc) {
return Downcast<Function>(FoldConstantExpr(f, m));
return Downcast<Function>(FoldConstantExpr(f, m, fold_qnn));
};
return CreateFunctionPass(pass_func, 2, "FoldConstant", {});
}
Expand Down
29 changes: 29 additions & 0 deletions tests/python/relay/test_pass_fold_constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,35 @@ def before():
tvm.ir.assert_structural_equal(run_infer_type(before_mod["main"]), after_mod["main"])


def test_fold_qnn_const():
manupak marked this conversation as resolved.
Show resolved Hide resolved
qx = relay.var("x", shape=[2, 3], dtype="int8")

def before():
# Quantized INT8 weights
qw = relay.const(np.array([[1, 3, 5], [2, 4, 6]], dtype="int8"), "int8")
op = relay.op.nn.dense(
relay.qnn.op.dequantize(qx, relay.const(2.0), relay.const(0)),
relay.qnn.op.dequantize(qw, relay.const(2.0), relay.const(0)),
)
return relay.Function([qx], op)

def expected():
# FP32 weights
w = relay.const(np.array([[2.0, 6.0, 10.0], [4.0, 8.0, 12.0]], dtype="float32"), "float32")
op = relay.op.nn.dense(relay.qnn.op.dequantize(qx, relay.const(2.0), relay.const(0)), w)
return relay.Function([qx], op)

# Nothing changed after applying FoldConstant
a = run_opt_pass(before(), transform.FoldConstant())
b = run_opt_pass(before(), transform.InferType())
tvm.ir.assert_structural_equal(a, b)

# Fold QNN constants
a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True))
b = run_opt_pass(expected(), transform.InferType())
tvm.ir.assert_structural_equal(a, b)


def test_pass_link_params():
"""
This test checks ensures that proper executor is passed to interpreter instance
Expand Down