diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp index 40099b34f8ca19..40feeda4cb5fd6 100644 --- a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp +++ b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp @@ -699,5 +699,53 @@ using QuantI8_I8 = frontend::QuantPair; using FakeQuantI8 = frontend::QuantPair; +enum class QuantizedDataType { + input, + output, + weights, + bias +}; + +/** + * @brief Returns a scale factor for specific layer data + * @param layer Layer to be quantized + * @param data_type Type of data to be quantized + * @return scale factor + */ +inline float getScaleFactor(InferenceEngine::CNNLayerPtr layer, QuantizedDataType data_type) { + IE_ASSERT(layer != nullptr); + auto quantized = InferenceEngine::getInjectedData(layer); + float scale_factor; + if (!quantized) { + scale_factor = 1.0f; + } else { + switch (data_type) { + case QuantizedDataType::input: + scale_factor = quantized->_src_quant.GetScale(); + break; + case QuantizedDataType::output: + scale_factor = quantized->_dst_quant.GetScale(); + break; + case QuantizedDataType::weights: + scale_factor = quantized->_weights_quant.GetScale(); + break; + case QuantizedDataType::bias: + scale_factor = quantized->_bias_quant.GetScale(); + break; + default: + THROW_GNA_LAYER_EXCEPTION(layer) << "Unsupported data type for quantization: " << static_cast(data_type); + } + } + + auto isZero = [](float p1) { + return std::abs(p1) <= 0.00001f; + }; + + if (scale_factor < 0.0 || isZero(scale_factor) || std::isinf(scale_factor)) { + THROW_GNA_LAYER_EXCEPTION(layer) << "Invalid scale factor: " << scale_factor; + } + + return scale_factor; +} } // namespace GNAPluginNS diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp index a986a4b60e2b62..d5af4050292fd2 100644 --- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp +++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp @@ -479,7 +479,8 @@ class ScaleFactorPerLayer { if ((!fakeQuantize && quantSibling->_dst_quant.IsScaleSet()) || (fakeQuantize && quantSibling->_dst_quant.IsScaleSet() && !fp32eq(quantSibling->_dst_quant.GetScale(), 1.0) && - quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) || infiniteLoopCount > 0) { + quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) || + quantSibling->_dst_quant.IsScaleSet() && infiniteLoopCount > 0) { // means we already restarted propagation input memory layer // need to search for requantiseable layer prior memory output layer InferenceEngine::CNNLayerPtr restartedLayer; @@ -646,6 +647,73 @@ class ScaleFactorPerLayer { template<> class ScaleFactorPerLayer { + private: + bool requantizeEltwiseInput(InferenceEngine::EltwiseLayer* eltwiseLayer, uint8_t inputIx, int16_t maxValue, + bool fakeQuantize, ScaleFactorUpdateResult &result) { + auto quantData = InferenceEngine::getInjectedData(*eltwiseLayer); + auto in = InferenceEngine::CNNNetPrevLayer(eltwiseLayer, inputIx); + bool has8BOr16BOut = LayerInfo(in).has8BOr16BOutput(); + auto quantParams = + InferenceEngine::getInjectedData(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, inputIx)); + // trick to get opposite index (for 0 -> 1 for 1 -> 0) by inversing i. + auto quantParamsOpposite = + InferenceEngine::getInjectedData(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, !inputIx)); + + while (in && !LayerInfo(in).isInput() && !LayerInfo(in).isMemory() && !LayerInfo(in).isCopy()) { + auto info = LayerInfo(in); + if (info.isActivation() || info.isConst()) { + auto quantDataForInputLayer = InferenceEngine::getInjectedData(*in); + float newOutputScale; + if (has8BOr16BOut) { + newOutputScale = quantParamsOpposite->_dst_quant.GetScale() / maxValue; + } else { + newOutputScale = quantDataForInputLayer->_dst_quant.GetScale() * + quantParamsOpposite->_dst_quant.GetScale() * maxValue / + quantParams->_dst_quant.GetScale(); + } + if (info.isActivation() && newOutputScale > static_cast(std::numeric_limits::max()) / 2) { + return false; + } + gnawarn() << "[WARNING] saturated weights for " << eltwiseLayer->name + << ". Layer new output scale: " << in->name << ", output_scale=" << newOutputScale + << ", was " << quantDataForInputLayer->_dst_quant.GetScale() <<"\n" << std::flush; + quantDataForInputLayer->_dst_quant.SetScale(newOutputScale); + result = ScaleFactorUpdateResult(in.get()); + return true; + } + + if (fakeQuantize && info.isWeightableIdentity()) { + auto quantDataForInputLayer = InferenceEngine::getInjectedData(*in); + if (!fp32eq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) { + auto reducer = quantData->_weights_quant.GetScale() / maxValue; + reducer = std::max(1.0f, reducer); + auto newWeightsScale = quantDataForInputLayer->_weights_quant.GetScale() / reducer; + newWeightsScale = std::max(1.0f, newWeightsScale); + quantDataForInputLayer->_weights_quant.SetScale(static_cast(newWeightsScale)); + quantDataForInputLayer->_dst_quant.SetScale(quantDataForInputLayer->_weights_quant.GetScale() * + quantDataForInputLayer->_src_quant.GetScale()); + + result = ScaleFactorUpdateResult(in.get()); + return true; + } + } + + // if we are here it means that we are in the port 1 + if (info.isFullyConnected() || info.isConvolution()) { + auto quantDataForInputLayer = InferenceEngine::getInjectedData(*in); + auto newOutputScale = quantParamsOpposite->_dst_quant.GetScale() * maxValue; + auto newWeightScale = newOutputScale / quantDataForInputLayer->_src_quant.GetScale(); + quantDataForInputLayer->_dst_quant.SetScale(newOutputScale); + quantDataForInputLayer->_weights_quant.SetScale(newWeightScale); + result = ScaleFactorUpdateResult(in.get()); + return true; + } + + in = InferenceEngine::CNNNetHasPrevLayer(in.get()) ? InferenceEngine::CNNNetPrevLayer(in) : nullptr; + } + return false; + } + public: bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, bool fakeQuantize, int infiniteLoopCount) { @@ -723,7 +791,7 @@ class ScaleFactorPerLayer { } } - if (!fp32eq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) { + if (bestWeightsScale > 0.0f && !fp32eq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) { quantParams1->_weights_quant.SetScale(bestWeightsScale); quantParams1->_dst_quant.SetScale(quantParams1->_weights_quant.GetScale() * quantParams1->_src_quant.GetScale()); result = ScaleFactorUpdateResult(in1.get()); @@ -735,79 +803,22 @@ class ScaleFactorPerLayer { quantData->_dst_quant.SetScale(quantParams1->_dst_quant.GetScale()); // eltwise will work in int16 or int8 if low precision inputs are used - auto maxValue = lowPrecision ? (std::numeric_limits::max() - 1) : (std::numeric_limits::max() - 1); - if (quantData->_weights_quant.GetScale() > maxValue + 1) { - // rescaling it's activation input - // iterating thru previous layers of eltwise - for (uint8_t i = 0; i < 2; ++i) { - InferenceEngine::CNNLayerPtr in = InferenceEngine::CNNNetPrevLayer(eltwiseLayer, i); - bool has8BOr16BOut = LayerInfo(in).has8BOr16BOutput(); - auto quantParams = - InferenceEngine::getInjectedData(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, i)); - // trick to get opposite index (for 0 -> 1 for 1 -> 0) by inversing i. - auto quantParamsOpposite = - InferenceEngine::getInjectedData(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, !i)); - - for (; InferenceEngine::CNNNetHasPrevLayer(in.get()); in = CNNNetPrevLayer(in)) { - auto info = LayerInfo(in); - if (info.isSplit() || info.isSlice() || info.isConcat() || info.isNonFunctional()) { - continue; - } else if (info.has8BOr16BOutput() && info.isActivation()) { - auto quantDataForActivation = InferenceEngine::getInjectedData(*in); - float newOutputScale; - if (has8BOr16BOut) { - newOutputScale = quantParamsOpposite->_dst_quant.GetScale() / maxValue; - } else { - newOutputScale = quantDataForActivation->_dst_quant.GetScale() * - quantParamsOpposite->_dst_quant.GetScale() * maxValue / - quantParams->_dst_quant.GetScale(); - } - if (newOutputScale > static_cast(std::numeric_limits::max()) / 2) { - break; - } - gnawarn() << "[WARNING] saturated weights for " << eltwiseLayer->name - << ". Layer new output scale: " << in->name << ", output_scale=" << newOutputScale - << ", was " << quantDataForActivation->_dst_quant.GetScale() <<"\n" << std::flush; - quantDataForActivation->_dst_quant.SetScale(newOutputScale); - result = ScaleFactorUpdateResult(in.get()); - return true; - } else if (info.has8BOr16BOutput()) { - break; - } - - if (fakeQuantize && info.isWeightableIdentity()) { - auto quantDataForInputLayer = InferenceEngine::getInjectedData(*in); - if (!fp32eq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) { - auto reducer = quantData->_weights_quant.GetScale() / std::numeric_limits::max(); - reducer = std::max(1.0f, reducer); - auto newWeightsScale = quantDataForInputLayer->_weights_quant.GetScale() / reducer; - newWeightsScale = std::max(1.0f, newWeightsScale); - quantDataForInputLayer->_weights_quant.SetScale(static_cast(newWeightsScale)); - quantDataForInputLayer->_dst_quant.SetScale(quantDataForInputLayer->_weights_quant.GetScale() * - quantDataForInputLayer->_src_quant.GetScale()); - - result = ScaleFactorUpdateResult(in.get()); - return true; - } - } + auto maxValue = lowPrecision ? std::numeric_limits::max() : std::numeric_limits::max(); + if (quantData->_weights_quant.GetScale() <= maxValue) { + return true; + } - // if we are here it means that we are in the port 1 - if (info.isFullyConnected() || info.isConvolution()) { - auto quantDataForInputLayer = InferenceEngine::getInjectedData(*in); - auto newOutputScale = quantParamsOpposite->_dst_quant.GetScale() * maxValue; - auto newWeightScale = newOutputScale / quantDataForInputLayer->_src_quant.GetScale(); - quantDataForInputLayer->_dst_quant.SetScale(newOutputScale); - quantDataForInputLayer->_weights_quant.SetScale(newWeightScale); - result = ScaleFactorUpdateResult(in.get()); - return true; - } - } + // rescaling it's activation input + // iterating thru previous layers of eltwise + for (uint8_t i = 0; i < 2; ++i) { + if (requantizeEltwiseInput(eltwiseLayer, i, maxValue - 1, fakeQuantize, result)) { + return true; } - // we unable to rescale the input - results might be bad - gnawarn() << "[INFO] weights saturated for " << eltwiseLayer->name << "\n"; } - break; + // we unable to rescale the input - results might be bad + gnawarn() << "[INFO] weights saturated for " << eltwiseLayer->name << "\n"; } + break; default : THROW_GNA_EXCEPTION << "Unsupported Eltwise layer for quantisation: " << eltwiseLayer->_operation; } return true; @@ -1142,7 +1153,6 @@ class ScaleFactorPerLayer { } quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer); } - double tmp_dst_quant_scale = quant->_weights_quant.GetScale() * quant->_src_quant.GetScale(); if (weightsSize == 1) { auto itt = thresholds.begin(); diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index 0360d6b755d0ab..e33768dbde7980 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -414,13 +414,9 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP uint32_t num_bytes_per_weight = convolution._weights->getTensorDesc().getPrecision().size(); uint32_t num_bytes_per_bias = biasPrecision.size(); - float weight_scale_factor = 1.0f; - float output_scale_factor = 1.0f; - auto quantized = InferenceEngine::getInjectedData(convolution); - if (quantized != nullptr) { - weight_scale_factor = quantized->_weights_quant.GetScale(); - output_scale_factor = quantized->_dst_quant.GetScale(); - } + float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights); + float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output); + auto& currentComponent = dnnComponents.addComponent(convolution.name, "convolution"); dnn->InitConvolutional1DComponent(currentComponent, num_columns_in, @@ -594,13 +590,8 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x, filter_n, convolution._stride_y, convolution._stride_x, inputPrec); - float weight_scale_factor = 1.0f; - float output_scale_factor = 1.0f; - auto quantized = InferenceEngine::getInjectedData(convolution); - if (quantized != nullptr) { - weight_scale_factor = quantized->_weights_quant.GetScale(); - output_scale_factor = quantized->_dst_quant.GetScale(); - } + float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights); + float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output); auto& currentComponent = dnnComponents.addComponent(convolution.name, "convolution"); dnn->InitConvolutional2DComponent(currentComponent, @@ -681,9 +672,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { auto& power = dynamic_cast(*layer.get()); - auto quantized = InferenceEngine::getInjectedData(layer); - IE_ASSERT(gnaFlags->sw_fp32 ? (quantized == nullptr) : (quantized != nullptr)); - if (power.power < 0.0f || power.power > 2.8f) { IE_THROW() << "[GNA plugin] unsupported power factor, expected be in <0, 2.8> range but was " << power.power; } @@ -713,6 +701,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { auto& currentComponent = dnnComponents.addComponent(layer->name, "power"); + auto quantized = InferenceEngine::getInjectedData(layer); + IE_ASSERT(gnaFlags->sw_fp32 ? (quantized == nullptr) : (quantized != nullptr)); dnn->InitAffineComponent(currentComponent, num_rows_in + num_padding, num_columns_in, @@ -772,8 +762,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { gna_pwl_segment_t* ptr_pwl_segments_target = nullptr; - float output_pwl_scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f; - float input_pwl_scale_factor = quantized != nullptr ? quantized->_src_quant.GetScale() : 1.0f; + float output_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::output); + float input_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::input); if (!gnaFlags->sw_fp32) { if (gnaFlags->uniformPwlDesign) { @@ -831,7 +821,6 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) { auto& pooling = dynamic_cast(*layer.get()); - auto quantized = InferenceEngine::getInjectedData(layer); IE_ASSERT(!layer->insData.empty()); IE_ASSERT(!layer->outData.empty()); @@ -891,7 +880,7 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) { outputs->getPrecision().size(), { pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS] }, { pooling._stride[X_AXIS], pooling._stride[Y_AXIS] }, - quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(), + getScaleFactor(layer, QuantizedDataType::output), ptr_inputs, ptr_outputs); @@ -909,8 +898,6 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) { } void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) { - auto quantized = InferenceEngine::getInjectedData(layer); - IE_ASSERT(!layer->insData.empty()); IE_ASSERT(!layer->outData.empty()); auto inputs = layer->insData.begin()->lock(); @@ -936,7 +923,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) { num_columns_out, inputs->getPrecision().size(), outputs->getPrecision().size(), - quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(), + getScaleFactor(layer, QuantizedDataType::output), num_rows_out + num_padding_out, num_columns_out, ptr_inputs, @@ -1066,7 +1053,6 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) { << axis.size() << "."; } - auto quantized = InferenceEngine::getInjectedData(layer); size_t cropOffset = offset.front() * cropLayer->precision.size(); size_t cropOutputSize = dim.front() * cropLayer->precision.size(); const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ? @@ -1124,6 +1110,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) { auto& currentComponent = dnnComponents.addComponent(layer->name, "crop"); + auto quantized = InferenceEngine::getInjectedData(layer); dnn->InitAffineComponent(currentComponent, num_rows_in + num_padding, num_columns_in, @@ -1132,8 +1119,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) { outputs->getPrecision().size(), quantized == nullptr ? inputs->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 2), gnaFlags->input_low_precision ? 1 : 4, - quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(), - quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(), + getScaleFactor(layer, QuantizedDataType::weights), + getScaleFactor(layer, QuantizedDataType::output), ptr_inputs, ptr_outputs, ptr_weights, @@ -1267,8 +1254,8 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { // TODO: only fp32 and Int16 tested quantized == nullptr ? inputs2Bytes->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 2), quantized == nullptr ? inputs4Bytes->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 4), - quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(), - quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(), + getScaleFactor(layer, QuantizedDataType::weights), + getScaleFactor(layer, QuantizedDataType::output), ptr_inputs, ptr_outputs, ptr_weights, @@ -1376,8 +1363,8 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) { outputs->getPrecision().size(), quantized == nullptr ? input_2->getPrecision().size() : 2, quantized == nullptr ? input_2->getPrecision().size() : 4, - quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(), - quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(), + getScaleFactor(layer, QuantizedDataType::weights), + getScaleFactor(layer, QuantizedDataType::output), ptr_input_1, ptr_outputs, ptr_input_2, @@ -1465,8 +1452,8 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool outputs->getPrecision().size(), weightable._weights->getTensorDesc().getPrecision().size(), biasPrecisionSize, - quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(), - quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(), + getScaleFactor(layer, QuantizedDataType::weights), + getScaleFactor(layer, QuantizedDataType::output), ptr_inputs, ptr_outputs, ptr_weights, @@ -1605,8 +1592,6 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l return; } - auto quantized = InferenceEngine::getInjectedData(layer); - void* ptr_inputs = nullptr; void* ptr_outputs = nullptr; void* ptr_weights = nullptr; @@ -1645,7 +1630,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l num_columns_in, inputs->getPrecision().size(), inputs->getPrecision().size(), - quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(), + getScaleFactor(layer, QuantizedDataType::output), num_rows_copied, num_columns_in, ptr_inputs, @@ -1682,8 +1667,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l outputs->getPrecision().size(), filterLayer->_weights->getTensorDesc().getPrecision().size(), biasPrecisionSize, - quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(), - quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(), + getScaleFactor(layer, QuantizedDataType::weights), + getScaleFactor(layer, QuantizedDataType::output), ptr_inputs, ptr_outputs, ptr_weights, @@ -1739,8 +1724,6 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l return; } - auto quantized = InferenceEngine::getInjectedData(layer); - auto prevLayer = CNNNetPrevLayer(layer.get(), 0); if (!LayerInfo(prevLayer).isSplit() && !LayerInfo(prevLayer).isSlice()) { THROW_GNA_EXCEPTION << "Case with Affine Aligning Filter for not Split/Slice layers is not implemented yet!"; @@ -1787,8 +1770,8 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l numberOfFilters, filterWidth, convolutionStride, - quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(), - quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(), + getScaleFactor(layer, QuantizedDataType::weights), + getScaleFactor(layer, QuantizedDataType::output), ptr_inputs, ptr_outputs, ptr_weights, @@ -1847,9 +1830,8 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) { auto inputs = layer->insData.begin()->lock(); auto outputs = *layer->outData.begin(); - auto quantized = InferenceEngine::getInjectedData(layer); - float output_pwl_scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f; - float input_pwl_scale_factor = quantized != nullptr ? quantized->_src_quant.GetScale() : 1.0f; + float output_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::output); + float input_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::input); auto orientation = kDnnInterleavedOrientation; @@ -1916,6 +1898,7 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) { } auto activation_type = DnnActivation::fromType(it->second); activation_type.fqParams.set = false; + auto quantized = InferenceEngine::getInjectedData(layer); if (quantized != nullptr && quantized->_dst_quant.IsStatsSet()) { activation_type.fqParams.set = true; activation_type.fqParams.levels = quantized->_dst_quant.GetLevels(); @@ -2057,7 +2040,6 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) { return; } auto layerOrder = layer->GetParamAsInts("order"); - auto quantized = InferenceEngine::getInjectedData(layer); if (layer->insData.empty()) { THROW_GNA_LAYER_EXCEPTION(layer) << "Input layer pointer is unexpectedly absent"; } @@ -2101,7 +2083,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) { squeezedInputOrder[1], inputs->getPrecision().size(), outputs->getPrecision().size(), - (quantized == nullptr) ? 1.0f : quantized->_dst_quant.GetScale(), + getScaleFactor(layer, QuantizedDataType::output), ptr_inputs, ptr_outputs); } @@ -2116,7 +2098,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) { squeezedInputOrder[1], inputs->getPrecision().size(), outputs->getPrecision().size(), - quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(), + getScaleFactor(layer, QuantizedDataType::output), ptr_inputs, ptr_outputs); } @@ -2608,4 +2590,4 @@ GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint } } return temp_buffer; -} +} \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/gna/scale_factors_tests/const_input_add.cpp b/inference-engine/tests/functional/plugin/gna/scale_factors_tests/const_input_add.cpp new file mode 100644 index 00000000000000..1663201337931d --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/scale_factors_tests/const_input_add.cpp @@ -0,0 +1,117 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +#include "ngraph_functions/pass/convert_prc.hpp" + +typedef std::tuple< + InferenceEngine::Precision, // Network Precision + std::string, // Target Device + std::map, // Configuration + std::pair, // Input min/max values + std::pair // Constant min/max values +> constInputAddParams; + +namespace LayerTestsDefinitions { + +class ConstInputAddTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { + public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + std::string targetDevice; + std::map configuration; + std::pair inputRange; + std::pair constRange; + std::tie(netPrecision, targetDevice, configuration, inputRange, constRange) = obj.param; + + std::ostringstream result; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice << "_"; + for (auto const& configItem : configuration) { + result << "_configItem=" << configItem.first << "_" << configItem.second; + } + result << "_IR=" << inputRange.first << "," << inputRange.second << "_"; + result << "IR=" << constRange.first << "," << constRange.second; + return result.str(); + } + + InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override { + return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputMax - inputMin, inputMin, (inputMax - inputMin) / 10); + } + + protected: + void SetUp() override { + InferenceEngine::Precision netPrecision; + std::pair inputRange; + std::pair constRange; + std::tie(netPrecision, targetDevice, configuration, inputRange, constRange) = this->GetParam(); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + std::tie(inputMin, inputMax) = inputRange; + + ngraph::Shape shape = {1, 72}; + auto params = ngraph::builder::makeParams(ngPrc, { shape }); + + auto constant = ngraph::builder::makeConstant(ngPrc, shape, {}, true, constRange.second, constRange.first); + auto eltwise = ngraph::builder::makeEltwise(constant, params[0], ngraph::helpers::EltwiseTypes::ADD); + + ngraph::ResultVector results{ std::make_shared(eltwise) }; + function = std::make_shared(results, params, "InputConstAdd"); + } + + private: + float inputMin = 0.0; + float inputMax = 0.0; +}; + + TEST_P(ConstInputAddTest, CompareWithRefImpl) { + Run(); + }; + + const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 + }; + + const std::vector> configs = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"} + } + }; + + const std::vector> inputRange = { + {-10, 10}, + {-100, 100} + }; + + const std::vector> constRange = { + {-10, 10}, + {-0.1, 0.1}, + {-1.0e-5, 1.0e-5} + }; + + INSTANTIATE_TEST_CASE_P(smoke_const_input_add, ConstInputAddTest, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(configs), + ::testing::ValuesIn(inputRange), + ::testing::ValuesIn(constRange)), + ConstInputAddTest::getTestCaseName); + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/unit/gna/gna_get_scale_factor.cpp b/inference-engine/tests/unit/gna/gna_get_scale_factor.cpp new file mode 100644 index 00000000000000..0a34eabe609633 --- /dev/null +++ b/inference-engine/tests/unit/gna/gna_get_scale_factor.cpp @@ -0,0 +1,54 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include +// to suppress deprecated definition errors +#define IMPLEMENT_INFERENCE_ENGINE_PLUGIN +#include "legacy/layer_transform.hpp" +#include "frontend/layer_quantizer.hpp" + +namespace { + +class GnaGetScaleFactorTest : public ::testing::Test { + protected: + void GetScaleFactorAndCheck(float src_scale, float dst_scale, float weights_scale, float bias_scale) const { + InferenceEngine::LayerParams params("fc", "FullyConnected", InferenceEngine::Precision::FP32); + InferenceEngine::CNNLayerPtr layer = std::make_shared(params); + layer = InferenceEngine::injectData(*layer); + auto quant = InferenceEngine::getInjectedData(*layer); + quant->_src_quant.SetScale(src_scale); + quant->_dst_quant.SetScale(dst_scale); + quant->_weights_quant.SetScale(weights_scale); + quant->_bias_quant.SetScale(bias_scale); + ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::input), src_scale); + ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::output), dst_scale); + ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::weights), weights_scale); + ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::bias), bias_scale); + } +}; + +TEST_F(GnaGetScaleFactorTest, validSF) { + EXPECT_NO_THROW(GetScaleFactorAndCheck(100, 200, 300, 400)); +} + +TEST_F(GnaGetScaleFactorTest, invalidSF) { + EXPECT_ANY_THROW(GetScaleFactorAndCheck(0, 200, 300, 400)); + EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 0, 300, 400)); + EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 0, 400)); + EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, 0)); + EXPECT_ANY_THROW(GetScaleFactorAndCheck(-100, 200, 300, 400)); + EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, -200, 300, 400)); + EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, -300, 400)); + EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, -400)); + double inf = std::numeric_limits::infinity(); + EXPECT_ANY_THROW(GetScaleFactorAndCheck(inf, 200, 300, 400)); + EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, inf, 300, 400)); + EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, inf, 400)); + EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, inf)); +} + +} // namespace \ No newline at end of file