-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
mandrono
committed
Jan 25, 2021
1 parent
96b2ffa
commit 15b3708
Showing
9 changed files
with
850 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 29 additions & 0 deletions
29
...ce-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
// Copyright (C) 2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <transformations_visibility.hpp> | ||
#include <ngraph/pass/graph_rewrite.hpp> | ||
|
||
namespace ngraph { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API FakeQuantizeDecomposition; | ||
|
||
} // namespace pass | ||
} // namespace ngraph | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief FakeQuantizeDecomposition transformation into sub-graph if: | ||
* 1. input nodes have rank > 5 | ||
* 2. 'range' nodes have more than one dimension != 1 or this dimension is not batch or channel | ||
* 'range' nodes should be Constant and IL values should be less than IH | ||
*/ | ||
class ngraph::pass::FakeQuantizeDecomposition: public ngraph::pass::MatcherPass { | ||
public: | ||
NGRAPH_RTTI_DECLARATION; | ||
FakeQuantizeDecomposition(); | ||
}; |
182 changes: 182 additions & 0 deletions
182
inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
// Copyright (C) 2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "itt.hpp" | ||
#include "transformations/op_conversions/fq_decomposition.hpp" | ||
|
||
#include <ngraph/opsets/opset1.hpp> | ||
#include <ngraph/opsets/opset5.hpp> | ||
#include <ngraph/rt_info.hpp> | ||
#include <ngraph/pattern/op/wrap_type.hpp> | ||
#include <ngraph/builder/autobroadcast.hpp> | ||
|
||
#include <numeric> | ||
|
||
NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeDecomposition, "FakeQuantizeDecomposition", 0); | ||
|
||
bool isValidRangesInputs(const std::shared_ptr<ngraph::opset1::FakeQuantize> &fq) { | ||
for (size_t i = 1; i < fq->get_input_size(); i++) { | ||
if (!std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(i))) | ||
return false; | ||
} | ||
|
||
auto il_node = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(1)); | ||
auto ih_node = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(2)); | ||
|
||
auto broadcasted_shape = ngraph::builder::get_numpy_broadcast_shapes({il_node->get_shape(), ih_node->get_shape()}); | ||
auto result_shape = broadcasted_shape.first; | ||
auto il_padded_shape = broadcasted_shape.second.at(0); | ||
auto ih_padded_shape = broadcasted_shape.second.at(1); | ||
|
||
auto get_strides = [](const ngraph::Shape& in_shape, const ngraph::Shape& res_shape) -> std::vector<size_t> { | ||
std::vector<size_t> strides(res_shape.size()); | ||
size_t k = 1; | ||
for (int i = in_shape.size() - 1; i >= 0; i--) { | ||
strides[i] = (in_shape[i] == res_shape[i]) ? k : 0; | ||
k *= in_shape[i]; | ||
} | ||
return strides; | ||
}; | ||
|
||
auto il_strides = get_strides(il_padded_shape, result_shape); | ||
auto ih_strides = get_strides(ih_padded_shape, result_shape); | ||
|
||
auto get_index = [](const ngraph::Coordinate& in_coord, const std::vector<size_t>& strides) -> size_t { | ||
size_t index = 0; | ||
for (size_t i = 0; i < in_coord.size(); i++) { | ||
index += in_coord[i]*strides[i]; | ||
} | ||
return index; | ||
}; | ||
|
||
const std::vector<float> il = il_node->cast_vector<float>(); | ||
const std::vector<float> ih = ih_node->cast_vector<float>(); | ||
|
||
auto step = [](ngraph::Coordinate& _iter, const ngraph::Shape& _dims) { | ||
auto iter = _iter.rbegin(); | ||
auto dims = _dims.rbegin(); | ||
|
||
while (iter != _iter.rend()) { | ||
*iter = (*iter + 1) % *dims; | ||
if (*iter != 0) { | ||
break; | ||
} | ||
++iter; | ||
++dims; | ||
} | ||
}; | ||
|
||
const size_t work_amount = std::accumulate(result_shape.begin(), result_shape.end(), 1, std::multiplies<size_t>()); | ||
ngraph::Coordinate iter(result_shape.size(), 0); | ||
for (size_t i = 0; i < work_amount; i++) { | ||
if (il[get_index(iter, il_strides)] >= ih[get_index(iter, ih_strides)]) | ||
return false; | ||
step(iter, result_shape); | ||
} | ||
|
||
return true; | ||
} | ||
|
||
/** | ||
* Expression from specification: | ||
* if x <= min(input_low, input_high): | ||
* output = output_low | ||
* elif x > max(input_low, input_high): | ||
* output = output_high | ||
* else: | ||
* output = round((x - input_low) / (input_high - input_low) * (levels-1)) / (levels-1) * (output_high - output_low) + output_low | ||
* | ||
* expand brackets into round: | ||
* round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) | ||
* div on (levels-1) and mult on (output_high - output_low) => mult on (output_high - output_low) / (levels-1) | ||
* | ||
* => | ||
* round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + output_low | ||
*/ | ||
ngraph::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { | ||
MATCHER_SCOPE(FakeQuantizeDecomposition); | ||
auto fake_quantize = ngraph::pattern::wrap_type<ngraph::opset1::FakeQuantize>(); | ||
|
||
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { | ||
auto &pattern_to_output = m.get_pattern_value_map(); | ||
const auto fake_quantize_node = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(pattern_to_output.at(fake_quantize).get_node_shared_ptr()); | ||
|
||
if (fake_quantize_node == nullptr || transformation_callback(fake_quantize_node) || !isValidRangesInputs(fake_quantize_node)) { | ||
return false; | ||
} | ||
|
||
Output<Node> data{fake_quantize_node->input_value(0)}; | ||
const Output<Node> input_low{fake_quantize_node->input_value(1)}; | ||
const Output<Node> input_high{fake_quantize_node->input_value(2)}; | ||
const Output<Node> output_low{fake_quantize_node->input_value(3)}; | ||
const Output<Node> output_high{fake_quantize_node->input_value(4)}; | ||
auto input_type = data.get_element_type(); | ||
|
||
ngraph::NodeVector decomp_ops; | ||
if (input_type != input_low.get_element_type()) { | ||
input_type = input_low.get_element_type(); | ||
data = std::make_shared<ngraph::opset1::Convert>(data, input_type); | ||
decomp_ops.push_back(data.get_node_shared_ptr()); | ||
} | ||
|
||
// if we set input_low or input_high in formula we got output = output_low and output = output_high respectively | ||
// so we just clamp x | ||
const auto max = std::make_shared<ngraph::opset1::Maximum>(data, input_low); | ||
const auto min = std::make_shared<ngraph::opset1::Minimum>(max, input_high); | ||
decomp_ops.push_back(max); | ||
decomp_ops.push_back(min); | ||
|
||
// (levels-1) | ||
const auto levels_minus_one = std::make_shared<ngraph::opset1::Constant>(input_type, Shape{}, fake_quantize_node->get_levels() - 1); | ||
decomp_ops.push_back(levels_minus_one); | ||
// (input_high - input_low) | ||
const auto subInHighLow = std::make_shared<ngraph::opset1::Subtract>(input_high, input_low); | ||
// (levels-1) / (input_high - input_low) | ||
const auto isc = std::make_shared<ngraph::opset1::Divide>(levels_minus_one, subInHighLow); | ||
// input_low * (levels-1) / (input_high - input_low) | ||
const auto ish = std::make_shared<ngraph::opset1::Multiply>(input_low, isc); | ||
decomp_ops.push_back(subInHighLow); | ||
decomp_ops.push_back(isc); | ||
decomp_ops.push_back(ish); | ||
|
||
// x * (levels-1) / (input_high - input_low) | ||
const auto after_isc_apply = std::make_shared<ngraph::opset1::Multiply>(min, isc); | ||
// x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low) | ||
const auto after_ish_apply = std::make_shared<ngraph::opset1::Subtract>(after_isc_apply, ish); | ||
decomp_ops.push_back(after_isc_apply); | ||
decomp_ops.push_back(after_ish_apply); | ||
|
||
// round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) | ||
const auto round = std::make_shared<ngraph::opset5::Round>(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN); | ||
decomp_ops.push_back(round); | ||
|
||
// (output_high - output_low) | ||
const auto sub_out_high_low = std::make_shared<ngraph::opset1::Subtract>(output_high, output_low); | ||
// (output_high - output_low) / (levels-1) | ||
const auto osc = std::make_shared<ngraph::opset1::Divide>(sub_out_high_low, levels_minus_one); | ||
decomp_ops.push_back(sub_out_high_low); | ||
decomp_ops.push_back(osc); | ||
|
||
// round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) | ||
const auto after_osc_apply = std::make_shared<ngraph::opset1::Multiply>(round, osc); | ||
// round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + | ||
// output_low | ||
std::shared_ptr<Node> result = std::make_shared<ngraph::opset1::Add>(after_osc_apply, output_low); | ||
decomp_ops.push_back(after_osc_apply); | ||
decomp_ops.push_back(result); | ||
|
||
if (result->get_output_element_type(0) != fake_quantize_node->get_output_element_type(0)) { | ||
result = std::make_shared<ngraph::opset1::Convert>(result, fake_quantize_node->get_output_element_type(0)); | ||
decomp_ops.push_back(result); | ||
} | ||
|
||
result->set_friendly_name(m.get_match_root()->get_friendly_name()); | ||
ngraph::copy_runtime_info(fake_quantize_node, decomp_ops); | ||
ngraph::replace_node(m.get_match_root(), result); | ||
return true; | ||
}; | ||
|
||
auto m = std::make_shared<ngraph::pattern::Matcher>(fake_quantize, matcher_name); | ||
register_matcher(m, callback); | ||
} |
Oops, something went wrong.