Skip to content

Commit

Permalink
[GNA] Add POT/FakeQuatize support
Browse files Browse the repository at this point in the history
  • Loading branch information
sirzabek committed Sep 2, 2021
1 parent 2d6cdaa commit 9e30085
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1300,6 +1300,7 @@ class ScaleFactorCalculator {
if (!frontend::ScaleFactorPerLayer<T>()(ptr, weightsBytesSize, inputsBytesSize, result, isFakeQuantize, infiniteLoopCount)) {
return false;
}

if (result) {
idx++;
return true;
Expand All @@ -1309,7 +1310,6 @@ class ScaleFactorCalculator {
if (!result) {
return result.restartLayer == cnnLayer.get();
}
return ptr == cnnLayer.get();
});
if (idx != net.end()) {
idx++;
Expand Down
1 change: 1 addition & 0 deletions inference-engine/src/gna_plugin/gna_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
manager.register_pass<ConvertDWSCBiasToScaleShifts>();
manager.register_pass<ConvertDWSCToScaleShifts>();
manager.register_pass<ConvertDWSCWithFqToScaleShifts>();
manager.register_pass<ConvertPaddedToValidConv>();
if (config.gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
manager.register_pass<Decompose2DConvTransposedWithBiasAF>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
#include <ngraph/rt_info.hpp>
#include <ie_common.h>
#include "utils/transformation_helper.hpp"
//#include "backend/gna_limitations.hpp"


using namespace GNAPluginNS;

NGRAPH_RTTI_DEFINITION(ConvertDWSCToScaleShifts, "ConvertDWSCToScaleShifts", 0);
NGRAPH_RTTI_DEFINITION(ConvertDWSCBiasToScaleShifts, "ConvertDWSCBiasToScaleShifts", 0);
NGRAPH_RTTI_DEFINITION(ConvertDWSCWithFqToScaleShifts, "ConvertDWSCWithFqToScaleShifts", 0);

static bool VerifyDWSC(std::shared_ptr<ngraph::opset7::GroupConvolution> dwsc) {
// Verify it's a 1D convolution
Expand All @@ -32,7 +32,8 @@ static bool VerifyDWSC(std::shared_ptr<ngraph::opset7::GroupConvolution> dwsc) {
return true;
}

static std::shared_ptr<ngraph::Node> DecomposeDWSC(std::shared_ptr<ngraph::opset7::GroupConvolution> dwsc, std::shared_ptr<ngraph::opset7::Constant> bias_const,
static std::shared_ptr<ngraph::Node> DecomposeDWSC(std::shared_ptr<ngraph::opset7::GroupConvolution> dwsc,
std::shared_ptr<ngraph::opset7::Constant> bias_const, std::shared_ptr<ngraph::opset7::FakeQuantize> fq_bias,
std::shared_ptr<ngraph::opset7::Reshape> flat_input_plane, std::shared_ptr<ngraph::Node> flat_filters_plane) {
std::shared_ptr<ngraph::opset7::Constant> const_zero_padding;
std::shared_ptr<ngraph::Node> reshaped_bias;
Expand Down Expand Up @@ -78,6 +79,7 @@ static std::shared_ptr<ngraph::Node> DecomposeDWSC(std::shared_ptr<ngraph::opset
if (bias_const) {
previous_layer_output = std::make_shared<ngraph::opset7::Add>(previous_layer_output, reshaped_bias);
copy_runtime_info(dwsc, previous_layer_output);
previous_layer_output = InsertFQLayer(fq_bias, previous_layer_output);
}
last_layer_output = previous_layer_output;
} else {
Expand Down Expand Up @@ -111,11 +113,15 @@ static std::shared_ptr<ngraph::Node> DecomposeDWSC(std::shared_ptr<ngraph::opset
}

static bool Convert(std::shared_ptr<ngraph::Node> dwsc_node,
std::shared_ptr<ngraph::Node> reshape_filters_const_node,
std::shared_ptr<ngraph::Node> bias_node,
std::shared_ptr<ngraph::Node> bias_const_node) {
std::shared_ptr<ngraph::Node> bias_const_node,
std::shared_ptr<ngraph::Node> fq_bias_node) {
auto dwsc = std::dynamic_pointer_cast<ngraph::opset7::GroupConvolution>(dwsc_node);
auto reshape_filters_const = std::dynamic_pointer_cast<ngraph::opset7::Reshape>(reshape_filters_const_node);
auto bias = std::dynamic_pointer_cast<ngraph::opset7::Add>(bias_node);
auto bias_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias_const_node);
auto fq_bias = std::dynamic_pointer_cast<ngraph::opset7::FakeQuantize>(fq_bias_node);

if (!VerifyDWSC(dwsc))
return false;
Expand All @@ -124,7 +130,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> dwsc_node,
auto input_width = dwsc->get_input_shape(0)[3];
auto output_channel_count = dwsc->get_output_shape(0)[1];
auto output_width = dwsc->get_output_shape(0)[3];
auto original_last_node = (bias_const ? bias_node : dwsc_node);
auto original_last_node = (fq_bias ? fq_bias_node : (bias_const ? bias_node : dwsc_node));

// Prepare flat input data
auto reshaped_input_plane = std::make_shared<ngraph::opset7::Reshape>(dwsc->input_value(0),
Expand All @@ -139,7 +145,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> dwsc_node,
ngraph::Shape{1, shape_size(dwsc->input_value(0).get_shape())}), false);

// Prepare flat filter data
auto filters_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(dwsc->input_value(1).get_node_shared_ptr());
auto filters_const = std::dynamic_pointer_cast<ngraph::Node>(dwsc->get_input_node_shared_ptr(1));
auto filters_size = shape_size(filters_const->get_shape());

auto transposed_filters_const = ngraph::op::util::make_try_fold<ngraph::opset7::Transpose>(filters_const,
Expand All @@ -151,7 +157,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> dwsc_node,
copy_runtime_info(dwsc, {reshaped_input_plane, transposed_input_plane, flat_input_plane, transposed_filters_const, flat_filters_plane});

// Convert DWSC to a set of diagonal layers
auto output_plane = DecomposeDWSC(dwsc, bias_const, flat_input_plane, flat_filters_plane);
auto output_plane = DecomposeDWSC(dwsc, bias_const, fq_bias, flat_input_plane, flat_filters_plane);

// Restore the original output shape
auto result = std::make_shared<ngraph::opset7::Reshape>(output_plane,
Expand All @@ -176,7 +182,7 @@ ConvertDWSCToScaleShifts::ConvertDWSCToScaleShifts() {

ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
return Convert(pattern_map.at(dwsc).get_node_shared_ptr(), nullptr, nullptr);
return Convert(pattern_map.at(dwsc).get_node_shared_ptr(), nullptr, nullptr, nullptr, nullptr);
};

auto m = std::make_shared<ngraph::pattern::Matcher>(dwsc, matcher_name);
Expand All @@ -201,9 +207,40 @@ ConvertDWSCBiasToScaleShifts::ConvertDWSCBiasToScaleShifts() {
if (bias_node && (bias_const = VerifyBiasGetConst(pattern_map.at(dwsc).get_node_shared_ptr(), bias_node)) == nullptr)
return false;

return Convert(pattern_map.at(dwsc).get_node_shared_ptr(), bias_node, bias_const);
return Convert(pattern_map.at(dwsc).get_node_shared_ptr(), nullptr, bias_node, bias_const, nullptr);
};

auto m = std::make_shared<ngraph::pattern::Matcher>(bias, matcher_name);
this->register_matcher(m, callback);
}

ConvertDWSCWithFqToScaleShifts::ConvertDWSCWithFqToScaleShifts() {
MATCHER_SCOPE(ConvertDWSCWithFqToScaleShifts);

auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
auto filters_const_fq = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::rank_equals(4));
auto fq_filters_const = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({filters_const_fq, const_input, const_input, const_input, const_input},
consumers_and_rank(1, 4));
auto reshape_filters_const = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>({fq_filters_const, const_input});
auto dwsc = ngraph::pattern::wrap_type<ngraph::opset7::GroupConvolution>(
{ngraph::pattern::any_input(), reshape_filters_const}, consumers_and_rank(1, 4));
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Add>({dwsc, const_input});
auto fq_bias = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({bias, const_input, const_input, const_input, const_input},
consumers_and_rank(1, 4));

ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
auto bias_it = pattern_map.find(bias);
auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());
std::shared_ptr<ngraph::Node> bias_const = nullptr;

if (bias_node && (bias_const = VerifyBiasGetConst(pattern_map.at(dwsc).get_node_shared_ptr(), bias_node)) == nullptr)
return false;

return Convert(pattern_map.at(dwsc).get_node_shared_ptr(), pattern_map.at(reshape_filters_const).get_node_shared_ptr(),
bias_node, bias_const, pattern_map.at(fq_bias).get_node_shared_ptr());
};

auto m = std::make_shared<ngraph::pattern::Matcher>(fq_bias, matcher_name);
this->register_matcher(m, callback);
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,14 @@ class ConvertDWSCBiasToScaleShifts : public ngraph::pass::MatcherPass {
ConvertDWSCBiasToScaleShifts();
};

/**
* @brief Convert a depthwise separable convolution + potential bias (represented by a GroupConvolution + Add), processed by POT,
* to a set of ScaleShift layers (MatMul + Add)
*/
class ConvertDWSCWithFqToScaleShifts : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
ConvertDWSCWithFqToScaleShifts();
};

} // namespace GNAPluginNS
Original file line number Diff line number Diff line change
Expand Up @@ -262,16 +262,6 @@ static void TransformInput(const GraphData& graph_data, const ConvData& conv_dat
split_input_plane = flattened_dilated_transposed_input;
}

static void InsertFQLayer(const std::shared_ptr<ngraph::opset7::FakeQuantize> fqLayer,
std::shared_ptr<ngraph::Node> lastNode) {
if (fqLayer != nullptr) {
lastNode = fqLayer->clone_with_new_inputs({lastNode,
fqLayer->input_value(1), fqLayer->input_value(2),
fqLayer->input_value(3), fqLayer->input_value(4)});
ngraph::copy_runtime_info(fqLayer, lastNode);
}
}

// Valid 1D (decomposed 2D) convolution wrapped with transposes NHWC => NCHW => conv => NCHW => NHWC
static std::shared_ptr<ngraph::Node> Create1DConv(const GraphData& graph_data, const ConvData& conv_data, const ngraph::Output<ngraph::Node>& input,
std::shared_ptr<ngraph::Node> filters, const size_t conv_index, const size_t h_index) {
Expand All @@ -280,7 +270,7 @@ static std::shared_ptr<ngraph::Node> Create1DConv(const GraphData& graph_data, c
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 1, 2})->output(0));

// Fake quantize
InsertFQLayer(graph_data.fq_conv, filters);
filters = InsertFQLayer(graph_data.fq_conv, filters);

// 1D Convolution
auto conv = std::make_shared<ngraph::opset7::Convolution>(nchw_input, filters,
Expand All @@ -297,7 +287,7 @@ static std::shared_ptr<ngraph::Node> Create1DConv(const GraphData& graph_data, c
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{1, bias_size, 1, 1}), false);
last_conv_block_op = std::make_shared<ngraph::opset7::Add>(conv, reshaped_bias_const);
copy_runtime_info(graph_data.conv, last_conv_block_op);
InsertFQLayer(graph_data.fq_bias, last_conv_block_op);
last_conv_block_op = InsertFQLayer(graph_data.fq_bias, last_conv_block_op);
}

// Max pooling
Expand All @@ -311,7 +301,7 @@ static std::shared_ptr<ngraph::Node> Create1DConv(const GraphData& graph_data, c
if (graph_data.af && graph_data.conv_count == 1) {
last_conv_block_op = graph_data.af->copy_with_new_inputs({last_conv_block_op});
copy_runtime_info(conv, last_conv_block_op);
InsertFQLayer(graph_data.fq_af, last_conv_block_op);
last_conv_block_op = InsertFQLayer(graph_data.fq_af, last_conv_block_op);
}

// Transpose NCHW => NHWC
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include <ngraph/opsets/opset7.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/rt_info.hpp>
#include "transformation_helper.hpp"


Expand Down Expand Up @@ -90,4 +91,16 @@ std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> c
return nullptr;
}

std::shared_ptr<ngraph::Node> InsertFQLayer(const std::shared_ptr<ngraph::opset7::FakeQuantize> fq_layer,
std::shared_ptr<ngraph::Node> last_node) {
if (fq_layer != nullptr) {
auto new_fq = fq_layer->clone_with_new_inputs({last_node,
fq_layer->input_value(1), fq_layer->input_value(2),
fq_layer->input_value(3), fq_layer->input_value(4)});
ngraph::copy_runtime_info(new_fq, fq_layer);
return new_fq;
}
return last_node;
}

} // namespace GNAPluginNS
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,12 @@ std::shared_ptr<ngraph::opset7::StridedSlice> FlatCrop(ngraph::Output<ngraph::No
*/
std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> bias);

/**
* @brief inserts a new fake quantize layer (if it exists) copied from an existing fake quantize layer and conncts it to the output of a given layer
* @param fq_layer existing fake quantize layer to be copied
* @param last_node the node to which output the new fake quantize layer will be connected
* @return new fake quantize layer or the last node
*/
std::shared_ptr<ngraph::Node> InsertFQLayer(const std::shared_ptr<ngraph::opset7::FakeQuantize> fq_layer, std::shared_ptr<ngraph::Node> last_node);

} // namespace GNAPluginNS
Loading

0 comments on commit 9e30085

Please sign in to comment.