Skip to content

Commit

Permalink
[GNA] Fixed accuracy degradation caused by the input quantization res…
Browse files Browse the repository at this point in the history
…triction (openvinotoolkit#7258)

* [GNA] Fixed accuracy degradation caused by the input quantization restriction

* [GNA] Put scale factor calculation into a separate function
  • Loading branch information
elilobanova authored and dood-apo committed Aug 24, 2023
1 parent 702c37c commit 80de4fb
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 14 deletions.
24 changes: 17 additions & 7 deletions inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ struct ScaleFactorUpdateResult {
}
};

/**
* @brief Calculates a scale factor from FakeQuantize statistics according to the formula:
* scale factor = max representable value / max absolute input value
* @param levels Number of integer quants
* @param minValue Minimum value to be quantized
* @param maxValue Maximum value to be quantized
*/
inline float CalculateScaleFactorFromStats(size_t levels, float minValue, float maxValue) {
return maxValue == minValue ? 1.0f : (levels - 1) / (maxValue - minValue);
}

/**
* @brief Compares two float values and returns if they are equal
* @param p1 First float value
Expand Down Expand Up @@ -372,7 +383,7 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
auto maxOutValue = quantizedParams->_dst_quant.GetMaxValues().front();
auto absMax = std::max(std::abs(minOutValue), std::abs(maxOutValue));

result = (quantizedParams->_dst_quant.GetLevels() - 1) / (maxOutValue - minOutValue);
result = CalculateScaleFactorFromStats(quantizedParams->_dst_quant.GetLevels(), minOutValue, maxOutValue);
if (std::isinf(result) || fp32eq(absMax, 0.0f)) {
result = max_activation_scale_factor;
}
Expand Down Expand Up @@ -452,7 +463,7 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
if (CNNNetHasPrevLayer(cnnLayer) && quant->_dst_quant.IsStatsSet() && !quant->_dst_quant.IsScaleSet()) {
auto minOutValue = quant->_dst_quant.GetMinValues().front();
auto maxOutValue = quant->_dst_quant.GetMaxValues().front();
auto scale = (quant->_dst_quant.GetLevels() - 1) / (maxOutValue - minOutValue);
auto scale = CalculateScaleFactorFromStats(quant->_dst_quant.GetLevels(), minOutValue, maxOutValue);
quant->_dst_quant.SetScale(scale);
quant->_src_quant = quant->_dst_quant;
}
Expand Down Expand Up @@ -1068,8 +1079,8 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
quant->_src_quant = quantDataForInputLayer->_dst_quant;
if (quant->_weights_quant.IsStatsSet() && !quant->_weights_quant.IsScaleSet()) {
auto getScale = [&quant](size_t i) {
auto valuesDiff = quant->_weights_quant.GetMaxValues(false)[i] - quant->_weights_quant.GetMinValues(false)[i];
return valuesDiff == 0 ? 1.0f : (quant->_weights_quant.GetLevels() - 1) / valuesDiff;
return CalculateScaleFactorFromStats(quant->_weights_quant.GetLevels(),
quant->_weights_quant.GetMinValues(false)[i], quant->_weights_quant.GetMaxValues(false)[i]);
};

float min_channel_scale = getScale(0);
Expand Down Expand Up @@ -1222,9 +1233,8 @@ class ScaleFactorPerLayer<InferenceEngine::GemmLayer*> {
quantData->_weights_quant.SetScale(quantParams1->_dst_quant.GetScale());
if (quantData->_src_quant.IsStatsSet()) {
auto getScale = [&quantParams0](size_t i) {
return (quantParams0->_dst_quant.GetLevels() - 1) /
(quantParams0->_dst_quant.GetMaxValues(false)[i] -
quantParams0->_dst_quant.GetMinValues(false)[i]);
return CalculateScaleFactorFromStats(quantParams0->_dst_quant.GetLevels(),
quantParams0->_dst_quant.GetMinValues(false)[i], quantParams0->_dst_quant.GetMaxValues(false)[i]);
};
float min_channel_scale = getScale(0);
quantParams0->_dst_quant.SetScale(min_channel_scale);
Expand Down
10 changes: 3 additions & 7 deletions inference-engine/src/gna_plugin/gna_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,13 +489,9 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
};
// GNA input is always quantized to int16, so number of levels can't be greater than max uint16
size_t levels = std::min(fqLayer.getLevels(), static_cast<size_t>(std::numeric_limits<uint16_t>::max()));
float scaleInput = (levels - 1) / (inputRange.second[0] - inputRange.first[0]);
auto minAbsVal = std::min(std::abs(inputRange.second[0]), std::abs(inputRange.first[0]));
auto maxAbsVal = std::max(std::abs(inputRange.second[0]), std::abs(inputRange.first[0]));
if (fp32eq(minAbsVal, 0.0f) && !fp32eq(maxAbsVal, 0.0f)) {
scaleInput = (fqLayer.getLevels() - 1) / (2 * maxAbsVal);
}
// todo: should be solved in POT (issue 63330)
size_t levels = std::min(fqLayer.getLevels(), static_cast<size_t>(std::numeric_limits<uint16_t>::max() + 1));
auto scaleInput = frontend::CalculateScaleFactorFromStats(levels, inputRange.first[0], inputRange.second[0]);

IE_ASSERT(config.inputScaleFactors.size() > inputIdx);
IE_ASSERT(inputsDesc->inputScaleFactors.size() > inputIdx);
Expand Down

0 comments on commit 80de4fb

Please sign in to comment.