Skip to content

Commit

Permalink
[PASS] FoldScaleAxis (apache#55)
Browse files Browse the repository at this point in the history
* [PASS] FoldScaleAxis

* Move FoldAxis to O3

* Set unroll to 0 when ready
  • Loading branch information
tqchen committed May 26, 2018
1 parent 57e5553 commit 9f92e70
Show file tree
Hide file tree
Showing 17 changed files with 457 additions and 43 deletions.
2 changes: 2 additions & 0 deletions nnvm/include/nnvm/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ class IndexedGraph {
array_view<NodeEntry> inputs;
/*! \brief control flow dependencies to the node */
array_view<uint32_t> control_deps;
/*! \brief weak reference to node */
std::weak_ptr<nnvm::Node> weak_ref;
};
/*! \return number of nodes in the graph */
inline size_t num_nodes() const {
Expand Down
10 changes: 8 additions & 2 deletions nnvm/python/nnvm/compiler/build_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
OPT_PASS_LEVEL = {
"SimplifyInference": 0,
"PrecomputePrune": 2,
"OpFusion": 1
"OpFusion": 1,
"FoldScaleAxis": 3
}

# List of optimization pass and level when switch on
Expand Down Expand Up @@ -144,6 +145,10 @@ def optimize(graph, shape, dtype="float32"):
if cfg.pass_enabled("SimplifyInference"):
graph = graph_attr.set_shape_inputs(graph, shape)
graph = graph.apply(["InferShape", "SimplifyInference"])

if cfg.pass_enabled("FoldScaleAxis"):
graph = graph_attr.set_shape_inputs(graph, shape)
graph = graph.apply(["InferShape", "FoldScaleAxis"])
return graph


Expand Down Expand Up @@ -291,5 +296,6 @@ def precompute_prune(graph, params):
out_names = pre_graph.json_attr("output_names")
if not pre_graph.symbol.list_output_names():
return graph, params
out_arrs = _run_graph(pre_graph, params)
with tvm.build_config(auto_unroll_max_step=0):
out_arrs = _run_graph(pre_graph, params)
return graph, dict(zip(out_names, out_arrs))
1 change: 0 additions & 1 deletion nnvm/python/nnvm/testing/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def __init__(self, rnd_type="uniform", factor_type="avg", magnitude=3):
self.factor_type = factor_type
self.magnitude = float(magnitude)


def _init_weight(self, name, arr):
shape = arr.shape
hw_scale = 1.
Expand Down
3 changes: 2 additions & 1 deletion nnvm/python/nnvm/testing/mobilenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def separable_conv_block(data, name, depthwise_channels,
# depthwise convolution + bn + relu
conv1 = sym.conv2d(data=data, channels=depthwise_channels,
groups=depthwise_channels, kernel_size=kernel_size, strides=strides,
padding=padding, use_bias=False, layout="NCHW", name=name + "_depthwise_conv1")
padding=padding, use_bias=False, layout="NCHW",
name=name + "_depthwise_conv1")
bn1 = sym.batch_norm(data=conv1, epsilon=epsilon, name=name + "_bn1")
act1 = sym.relu(data=bn1, name=name + "_relu1")
# pointwise convolution + bn + relu
Expand Down
2 changes: 1 addition & 1 deletion nnvm/python/nnvm/testing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def create_workload(net, batch_size, image_shape=(3, 224, 224),
input_shapes, _ = graph_util.infer_shape(g, data=data_shape)
shape_dict = dict(zip(g.index.input_names, input_shapes))
np.random.seed(seed)
initializer = initializer if initializer else Xavier(magnitude=3)
initializer = initializer if initializer else Xavier()
for k, v in shape_dict.items():
if k == "data":
continue
Expand Down
7 changes: 3 additions & 4 deletions nnvm/src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ The following components are operator invariant.
- core: NNVM core data structure
- pass: NNVM pass

The following components are generic graph compiler for NNVM-TOP
The following components are generic NNVM compiler and defines tensor operator set

- top: NNVM-TOP core operator defs
- tvm: NNVM-TOP to TVM compiler toolchain
- runtime: NNVM-TOP runtime
- top: NNVM core tensor operators
- compiler: NNVM compiler toolchain
2 changes: 1 addition & 1 deletion nnvm/src/compiler/compile_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class CompileEngine {
return it->second->graph_func;
}
GraphFunc f = DoLower(key->graph, key->inputs, key->target,
schedule_op_key, schedule_op_attr);
schedule_op_key, schedule_op_attr);
std::shared_ptr<GraphCacheEntryNode> n = std::make_shared<GraphCacheEntryNode>();
n->graph_func = f;
n->use_count = 1;
Expand Down
271 changes: 271 additions & 0 deletions nnvm/src/compiler/fold_scale_axis.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
/*!
* Copyright (c) 2017 by Contributors
* \file fold_scale_axis.cc
* \author Fold scaling parameter of axis into weight of conv/dense
*/
#include <nnvm/graph.h>
#include <nnvm/op_attr_types.h>
#include <nnvm/graph_attr_types.h>
#include <nnvm/pass.h>
#include <nnvm/compiler/op_attr_types.h>
#include <nnvm/top/nn.h>
#include "./pattern_util.h"
#include "./graph_transform.h"

namespace nnvm {
namespace compiler {

enum FoldScaleKind {
// No folding is applied
kNone,
// The folding decision is pending
kPending,
// The original operator that contains the scale.
kProvider,
// Pass through the scale to parent/child to the first axis.
kPassTroughFirst,
// The final conumer of axis scale using multiply
// Likely be a conv or dense operator.
kMulConsumer,
// The final conumer of axis scale using division
kDivConsumer
};

// Input fold information
struct FoldScaleInput {
uint32_t index;
int axis;
};

// The entry of folding chains on which
// we should perform folding on
struct FoldChainEntry {
// Entry kind
FoldScaleKind kind{kNone};
// The output axis to be folded
int axis{0};
// Source node in the fold chain
int source{0};
// Following field only used by provider.
// The input index
int fold_input_index{1};
// The scale entry
NodeEntry scale_entry;
};

// Try to pass axis scaling to backward,
// Given that we we know the status of current fold axis.
using FScaleAxisBackward = std::function<
FoldScaleKind(const NodeAttrs& attrs,
int axis,
const std::vector<TShape>& in_shape,
const std::vector<TShape>& out_shape,
std::vector<std::pair<uint32_t, int> >* in_axis)>;

// Detect if there is a scaling axis happening
bool DetectScaleAxis(const IndexedGraph& idx,
uint32_t nid,
const ShapeVector& shape_vec,
const std::vector<uint32_t>& ref_count,
bool is_forward,
std::vector<FoldChainEntry>* chain) {
const IndexedGraph::Node& inode = idx[nid];
static const Op* bcast_mul = Op::Get("broadcast_mul");
static const Op* expand_dims = Op::Get("expand_dims");
if (inode.source->op() != bcast_mul) return false;
const TShape& oshape = shape_vec[idx.entry_id(nid, 0)];
CHECK_NE(oshape.ndim(), 0);
if (oshape.ndim() <= 1) return false;
for (int i = 0; i < 2; ++i) {
const IndexedGraph::NodeEntry& a = inode.inputs[i];
const IndexedGraph::NodeEntry& b = inode.inputs[1 - i];
std::pair<int, int> axis =
MatchBroadcast1DAxis(oshape, shape_vec[idx.entry_id(a)]);
if (axis.first != -1 &&
shape_vec[idx.entry_id(b)] == oshape) {
if (ref_count[a.node_id] != 1) return false;
if (is_forward && ref_count[nid] != 1) return false;
if (!is_forward && ref_count[b.node_id] != 1) return false;
const IndexedGraph::Node& anode = idx[a.node_id];
// mark the current entry.
FoldChainEntry& e = (*chain)[nid];
if (anode.source->is_variable()) {
e.fold_input_index = 1 - i;
e.scale_entry = inode.source->inputs[1 - i];
} else if (anode.source->op() == expand_dims &&
shape_vec[idx.entry_id(anode.source->inputs[0])].ndim() == 1) {
e.fold_input_index = 1 - i;
e.scale_entry = anode.source->inputs[0];
} else {
return false;
}
e.axis = axis.first;
e.kind = kPending;
e.source = nid;
if (!is_forward) {
// pass message to another input
FoldChainEntry& enext = (*chain)[b.node_id];
enext.axis = e.axis;
enext.kind = kPending;
enext.source = nid;
}
return true;
}
}
return false;
}

Graph FoldScaleAxis(Graph src) {
// Operator pattern
static auto& fbackward =
nnvm::Op::GetAttr<FScaleAxisBackward>("FScaleAxisBackward");
const IndexedGraph& idx = src.indexed_graph();
const ShapeVector& shape_vec = src.GetAttr<ShapeVector>("shape");
std::vector<uint32_t> ref_count = GetNodeRefCounts(idx);
std::vector<FoldChainEntry> bwd_chain(idx.num_nodes());
// shape hint for the inference.
std::vector<TShape> in_shape, out_shape;
// perform backward folding.
for (uint32_t i = idx.num_nodes(); i != 0; --i) {
uint32_t nid = i - 1;
const auto& inode = idx[nid];
if (inode.source->is_variable()) continue;
if (DetectScaleAxis(idx, nid, shape_vec,
ref_count, false, &bwd_chain)) continue;
if (bwd_chain[nid].kind != kPending) continue;
if (ref_count[nid] != 1 || !fbackward.count(inode.source->op())) {
bwd_chain[nid].kind = kNone; continue;
}
// get input shape and output shape.
in_shape.clear(); out_shape.clear();
for (const IndexedGraph::NodeEntry& e : inode.inputs) {
in_shape.push_back(shape_vec[idx.entry_id(e)]);
}
for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
out_shape.push_back(shape_vec[idx.entry_id(nid, i)]);
}
std::vector<std::pair<uint32_t, int> > in_axis;
FoldScaleKind kind =
fbackward[inode.source->op()](
inode.source->attrs, bwd_chain[nid].axis,
in_shape, out_shape, &in_axis);
bwd_chain[nid].kind = kind;
if (kind == kNone) continue;
CHECK_GE(in_axis.size(), 1U);
CHECK(kind == kPassTroughFirst || kMulConsumer);
// propagate back.
bool can_prop = true;
for (size_t i = 0; i < in_axis.size(); ++i) {
const IndexedGraph::NodeEntry& e = inode.inputs[in_axis[0].first];
if (ref_count[e.node_id] != 1 ||
idx[e.node_id].source->num_outputs() != 1) {
can_prop = false; break;
}
}
if (!can_prop) continue;
for (size_t i = 0; i < in_axis.size(); ++i) {
const IndexedGraph::NodeEntry& e = inode.inputs[in_axis[i].first];
if (kind == kPassTroughFirst && i == 0) {
bwd_chain[e.node_id].kind = kPending;
} else {
bwd_chain[nid].kind = kNone;
bwd_chain[e.node_id].kind = kMulConsumer;
}
bwd_chain[e.node_id].axis = in_axis[i].second;
bwd_chain[e.node_id].source = bwd_chain[nid].source;
}
if (kind == kMulConsumer) {
bwd_chain[bwd_chain[nid].source].kind = kProvider;
}
}
auto transform = [&](uint32_t nid, const NodePtr& n, std::vector<NodeEntry>* ret) {
const FoldChainEntry& e = bwd_chain[nid];
if (e.kind == kMulConsumer && bwd_chain[e.source].kind == kProvider) {
const FoldChainEntry& se = bwd_chain[e.source];
CHECK_EQ(n->num_outputs(), 1);
NodeEntry scale = ExpandBiasToMatchAxis(
se.scale_entry,
shape_vec[idx.entry_id(nid, 0)].ndim(),
shape_vec[idx.entry_id(se.scale_entry)].ndim(),
e.axis);
*ret = {MakeNode("broadcast_mul", n->attrs.name + "_sc",
{NodeEntry{n, 0, 0}, scale})};
return true;
} else if (e.kind == kProvider) {
*ret = {n->inputs[e.fold_input_index]};
return true;
} else {
return false;
}
};
return GraphTransform(src, transform);
}

NNVM_REGISTER_PASS(FoldScaleAxis)
.set_body(FoldScaleAxis);

// property registration.
FoldScaleKind ReluScaleAxisBackward(
const NodeAttrs& attrs,
int axis,
const std::vector<TShape>& in_shape,
const std::vector<TShape>& out_shape,
std::vector<std::pair<uint32_t, int> >* in_axis) {
in_axis->emplace_back(0, axis);
return kPassTroughFirst;
}

NNVM_REGISTER_OP(relu)
.set_attr<FScaleAxisBackward>("FScaleAxisBackward", ReluScaleAxisBackward);

NNVM_REGISTER_OP(leaky_relu)
.set_attr<FScaleAxisBackward>("FScaleAxisBackward", ReluScaleAxisBackward);

FoldScaleKind BroadcastAddSubScaleAxisBackward(
const NodeAttrs& attrs,
int axis,
const std::vector<TShape>& in_shape,
const std::vector<TShape>& out_shape,
std::vector<std::pair<uint32_t, int> >* in_axis) {
for (int i = 0; i < 2; ++i) {
std::pair<int, int> m = MatchBroadcast1DAxis(out_shape[0], in_shape[i]);
if (m.second != -1 && in_shape[1 - i] == out_shape[0]) {
in_axis->emplace_back(i, axis);
in_axis->emplace_back(1 - i, m.second);
return kPassTroughFirst;
}
}
return kNone;
}

NNVM_REGISTER_OP(broadcast_add)
.set_attr<FScaleAxisBackward>("FScaleAxisBackward", BroadcastAddSubScaleAxisBackward);

NNVM_REGISTER_OP(broadcast_sub)
.set_attr<FScaleAxisBackward>("FScaleAxisBackward", BroadcastAddSubScaleAxisBackward);

FoldScaleKind Conv2DScaleAxisBackward(
const NodeAttrs& attrs,
int axis,
const std::vector<TShape>& in_shape,
const std::vector<TShape>& out_shape,
std::vector<std::pair<uint32_t, int> >* in_axis) {
using top::Conv2DParam;
const Conv2DParam& param = nnvm::get<Conv2DParam>(attrs.parsed);
// only optimize for nchw for now
if (param.layout == top::kNCHW) {
in_axis->emplace_back(1, 0);
if (param.use_bias) {
in_axis->emplace_back(2, 0);
}
return kMulConsumer;
} else {
return kNone;
}
}

NNVM_REGISTER_OP(conv2d)
.set_attr<FScaleAxisBackward>("FScaleAxisBackward", Conv2DScaleAxisBackward);

} // namespace compiler
} // namespace nnvm
12 changes: 3 additions & 9 deletions nnvm/src/compiler/graph_fuse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <dmlc/parameter.h>
#include "./compile_engine.h"
#include "./graph_runtime.h"
#include "./pattern_util.h"

namespace nnvm {
namespace compiler {
Expand Down Expand Up @@ -56,17 +57,10 @@ nnvm::Graph GraphFusePartition(nnvm::Graph g) {

// Reference counter of each op node
// For now, always store result when an op is referred more than once.
std::vector<uint32_t> ref_count(idx.num_nodes(), 0);
for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
const auto& inode = idx[nid];
if (inode.source->is_variable()) continue;
for (const auto& e : inode.inputs) {
++ref_count[e.node_id];
}
}
std::vector<uint32_t> ref_count = GetNodeRefCounts(idx);
for (const auto& e : idx.outputs()) {
// this line will realize all the outputs
ref_count[e.node_id] += 2;
ref_count[e.node_id] += 1;
}
// Pattern for the subgraph
std::vector<TOpPattern> pattern_vec(idx.num_nodes(), kOpaque);
Expand Down
Loading

0 comments on commit 9f92e70

Please sign in to comment.