diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
index 40099b34f8ca19..40feeda4cb5fd6 100644
--- a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
@@ -699,5 +699,53 @@ using QuantI8_I8 = frontend::QuantPair<frontend::QuantI8_I8, frontend::QuantI8_I
 using FakeQuantI16 = frontend::QuantPair<frontend::FakeQuantI16, frontend::FakeQuantI16>;
 using FakeQuantI8 = frontend::QuantPair<frontend::FakeQuantI8, frontend::FakeQuantI16>;
 
+enum class QuantizedDataType {
+    input,
+    output,
+    weights,
+    bias
+};
+
+/**
+ * @brief Returns a scale factor for specific layer data
+ * @param layer Layer to be quantized
+ * @param data_type Type of data to be quantized
+ * @return scale factor
+ */
+inline float getScaleFactor(InferenceEngine::CNNLayerPtr layer, QuantizedDataType data_type) {
+    IE_ASSERT(layer != nullptr);
+    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+    float scale_factor;
+    if (!quantized) {
+        scale_factor = 1.0f;
+    } else {
+        switch (data_type) {
+            case QuantizedDataType::input:
+                scale_factor = quantized->_src_quant.GetScale();
+                break;
+            case QuantizedDataType::output:
+            scale_factor = quantized->_dst_quant.GetScale();
+                break;
+            case QuantizedDataType::weights:
+                scale_factor = quantized->_weights_quant.GetScale();
+                break;
+            case QuantizedDataType::bias:
+                scale_factor = quantized->_bias_quant.GetScale();
+                break;
+            default:
+                THROW_GNA_LAYER_EXCEPTION(layer) << "Unsupported data type for quantization: " << static_cast<int>(data_type);
+        }
+    }
+
+    auto isZero = [](float p1) {
+        return std::abs(p1) <= 0.00001f;
+    };
+
+    if (scale_factor < 0.0 || isZero(scale_factor) || std::isinf(scale_factor)) {
+        THROW_GNA_LAYER_EXCEPTION(layer) << "Invalid scale factor: " << scale_factor;
+    }
+
+    return scale_factor;
+}
 
 }  // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
index a986a4b60e2b62..d5af4050292fd2 100644
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -479,7 +479,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
 
                     if ((!fakeQuantize && quantSibling->_dst_quant.IsScaleSet()) ||
                         (fakeQuantize && quantSibling->_dst_quant.IsScaleSet() && !fp32eq(quantSibling->_dst_quant.GetScale(), 1.0) &&
-                        quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) || infiniteLoopCount > 0) {
+                        quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) ||
+                        quantSibling->_dst_quant.IsScaleSet() && infiniteLoopCount > 0) {
                         // means we already restarted propagation input memory layer
                         // need to search for requantiseable layer prior memory output layer
                         InferenceEngine::CNNLayerPtr restartedLayer;
@@ -646,6 +647,73 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
 
 template<>
 class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
+ private:
+    bool requantizeEltwiseInput(InferenceEngine::EltwiseLayer* eltwiseLayer, uint8_t inputIx, int16_t maxValue,
+        bool fakeQuantize, ScaleFactorUpdateResult &result) {
+        auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*eltwiseLayer);
+        auto in = InferenceEngine::CNNNetPrevLayer(eltwiseLayer, inputIx);
+        bool has8BOr16BOut = LayerInfo(in).has8BOr16BOutput();
+        auto quantParams =
+                InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, inputIx));
+        // trick to get opposite index (for 0 -> 1 for 1 -> 0) by inversing i.
+        auto quantParamsOpposite =
+                InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, !inputIx));
+
+        while (in && !LayerInfo(in).isInput() && !LayerInfo(in).isMemory() && !LayerInfo(in).isCopy()) {
+            auto info = LayerInfo(in);
+            if (info.isActivation() || info.isConst()) {
+                auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
+                float newOutputScale;
+                if (has8BOr16BOut) {
+                    newOutputScale = quantParamsOpposite->_dst_quant.GetScale() / maxValue;
+                } else {
+                    newOutputScale = quantDataForInputLayer->_dst_quant.GetScale() *
+                                     quantParamsOpposite->_dst_quant.GetScale() * maxValue /
+                                     quantParams->_dst_quant.GetScale();
+                }
+                if (info.isActivation() && newOutputScale > static_cast<float>(std::numeric_limits<int16_t>::max()) / 2) {
+                    return false;
+                }
+                gnawarn() << "[WARNING] saturated weights for " << eltwiseLayer->name
+                            << ". Layer new output scale: " << in->name << ", output_scale=" << newOutputScale
+                            << ", was " << quantDataForInputLayer->_dst_quant.GetScale() <<"\n" << std::flush;
+                quantDataForInputLayer->_dst_quant.SetScale(newOutputScale);
+                result = ScaleFactorUpdateResult(in.get());
+                return true;
+            }
+
+            if (fakeQuantize && info.isWeightableIdentity()) {
+                auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
+                if (!fp32eq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) {
+                    auto reducer = quantData->_weights_quant.GetScale() / maxValue;
+                    reducer = std::max(1.0f, reducer);
+                    auto newWeightsScale = quantDataForInputLayer->_weights_quant.GetScale() / reducer;
+                    newWeightsScale = std::max(1.0f, newWeightsScale);
+                    quantDataForInputLayer->_weights_quant.SetScale(static_cast<int32_t>(newWeightsScale));
+                    quantDataForInputLayer->_dst_quant.SetScale(quantDataForInputLayer->_weights_quant.GetScale() *
+                        quantDataForInputLayer->_src_quant.GetScale());
+
+                    result = ScaleFactorUpdateResult(in.get());
+                    return true;
+                }
+            }
+
+            // if we are here it means that we are in the port 1
+            if (info.isFullyConnected() || info.isConvolution()) {
+                auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
+                auto newOutputScale = quantParamsOpposite->_dst_quant.GetScale() * maxValue;
+                auto newWeightScale = newOutputScale / quantDataForInputLayer->_src_quant.GetScale();
+                quantDataForInputLayer->_dst_quant.SetScale(newOutputScale);
+                quantDataForInputLayer->_weights_quant.SetScale(newWeightScale);
+                result = ScaleFactorUpdateResult(in.get());
+                return true;
+            }
+
+            in = InferenceEngine::CNNNetHasPrevLayer(in.get()) ? InferenceEngine::CNNNetPrevLayer(in) : nullptr;
+        }
+        return false;
+    }
+
  public:
     bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result,
         bool fakeQuantize, int infiniteLoopCount) {
@@ -723,7 +791,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                             }
                         }
 
-                        if (!fp32eq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) {
+                        if (bestWeightsScale > 0.0f && !fp32eq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) {
                             quantParams1->_weights_quant.SetScale(bestWeightsScale);
                             quantParams1->_dst_quant.SetScale(quantParams1->_weights_quant.GetScale() * quantParams1->_src_quant.GetScale());
                             result = ScaleFactorUpdateResult(in1.get());
@@ -735,79 +803,22 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                 quantData->_dst_quant.SetScale(quantParams1->_dst_quant.GetScale());
 
                 // eltwise will work in int16 or int8 if low precision inputs are used
-                auto maxValue = lowPrecision ? (std::numeric_limits<int8_t>::max() - 1) : (std::numeric_limits<int16_t>::max() - 1);
-                if (quantData->_weights_quant.GetScale() > maxValue + 1) {
-                    // rescaling it's activation input
-                    // iterating thru previous layers of eltwise
-                    for (uint8_t i = 0; i < 2; ++i) {
-                        InferenceEngine::CNNLayerPtr in = InferenceEngine::CNNNetPrevLayer(eltwiseLayer, i);
-                        bool has8BOr16BOut = LayerInfo(in).has8BOr16BOutput();
-                        auto quantParams =
-                                InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, i));
-                        // trick to get opposite index (for 0 -> 1 for 1 -> 0) by inversing i.
-                        auto quantParamsOpposite =
-                                InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, !i));
-
-                        for (; InferenceEngine::CNNNetHasPrevLayer(in.get()); in = CNNNetPrevLayer(in)) {
-                            auto info = LayerInfo(in);
-                            if (info.isSplit() || info.isSlice() || info.isConcat() || info.isNonFunctional()) {
-                                continue;
-                            } else if (info.has8BOr16BOutput() && info.isActivation()) {
-                                auto quantDataForActivation = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
-                                float newOutputScale;
-                                if (has8BOr16BOut) {
-                                    newOutputScale = quantParamsOpposite->_dst_quant.GetScale() / maxValue;
-                                } else {
-                                    newOutputScale = quantDataForActivation->_dst_quant.GetScale() *
-                                                     quantParamsOpposite->_dst_quant.GetScale() * maxValue /
-                                                     quantParams->_dst_quant.GetScale();
-                                }
-                                if (newOutputScale > static_cast<float>(std::numeric_limits<int16_t>::max()) / 2) {
-                                    break;
-                                }
-                                gnawarn() << "[WARNING] saturated weights for " << eltwiseLayer->name
-                                         << ". Layer new output scale: " << in->name << ", output_scale=" << newOutputScale
-                                         << ", was " << quantDataForActivation->_dst_quant.GetScale() <<"\n" << std::flush;
-                                quantDataForActivation->_dst_quant.SetScale(newOutputScale);
-                                result = ScaleFactorUpdateResult(in.get());
-                                return true;
-                            } else if (info.has8BOr16BOutput()) {
-                                break;
-                            }
-
-                            if (fakeQuantize && info.isWeightableIdentity()) {
-                                auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
-                                if (!fp32eq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) {
-                                    auto reducer = quantData->_weights_quant.GetScale() / std::numeric_limits<int16_t>::max();
-                                    reducer = std::max(1.0f, reducer);
-                                    auto newWeightsScale = quantDataForInputLayer->_weights_quant.GetScale() / reducer;
-                                    newWeightsScale = std::max(1.0f, newWeightsScale);
-                                    quantDataForInputLayer->_weights_quant.SetScale(static_cast<int32_t>(newWeightsScale));
-                                    quantDataForInputLayer->_dst_quant.SetScale(quantDataForInputLayer->_weights_quant.GetScale() *
-                                        quantDataForInputLayer->_src_quant.GetScale());
-
-                                    result = ScaleFactorUpdateResult(in.get());
-                                    return true;
-                                }
-                            }
+                auto maxValue = lowPrecision ? std::numeric_limits<int8_t>::max() : std::numeric_limits<int16_t>::max();
+                if (quantData->_weights_quant.GetScale() <= maxValue) {
+                    return true;
+                }
 
-                            // if we are here it means that we are in the port 1
-                            if (info.isFullyConnected() || info.isConvolution()) {
-                                auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
-                                auto newOutputScale = quantParamsOpposite->_dst_quant.GetScale() * maxValue;
-                                auto newWeightScale = newOutputScale / quantDataForInputLayer->_src_quant.GetScale();
-                                quantDataForInputLayer->_dst_quant.SetScale(newOutputScale);
-                                quantDataForInputLayer->_weights_quant.SetScale(newWeightScale);
-                                result = ScaleFactorUpdateResult(in.get());
-                                return true;
-                            }
-                        }
+                // rescaling it's activation input
+                // iterating thru previous layers of eltwise
+                for (uint8_t i = 0; i < 2; ++i) {
+                    if (requantizeEltwiseInput(eltwiseLayer, i, maxValue - 1, fakeQuantize, result)) {
+                        return true;
                     }
-                    // we unable to rescale the input - results might be bad
-                    gnawarn() << "[INFO] weights saturated for " << eltwiseLayer->name << "\n";
                 }
-                break;
+                // we unable to rescale the input - results might be bad
+                gnawarn() << "[INFO] weights saturated for " << eltwiseLayer->name << "\n";
             }
+            break;
             default : THROW_GNA_EXCEPTION << "Unsupported Eltwise layer for quantisation: " << eltwiseLayer->_operation;
         }
         return true;
@@ -1142,7 +1153,6 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
             }
             quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer);
         }
-
         double tmp_dst_quant_scale = quant->_weights_quant.GetScale() * quant->_src_quant.GetScale();
         if (weightsSize == 1) {
             auto itt = thresholds.begin();
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index 0360d6b755d0ab..e33768dbde7980 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -414,13 +414,9 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
     uint32_t num_bytes_per_weight = convolution._weights->getTensorDesc().getPrecision().size();
     uint32_t num_bytes_per_bias = biasPrecision.size();
 
-    float weight_scale_factor = 1.0f;
-    float output_scale_factor = 1.0f;
-    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(convolution);
-    if (quantized != nullptr) {
-        weight_scale_factor = quantized->_weights_quant.GetScale();
-        output_scale_factor = quantized->_dst_quant.GetScale();
-    }
+    float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights);
+    float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
+
     auto& currentComponent = dnnComponents.addComponent(convolution.name, "convolution");
     dnn->InitConvolutional1DComponent(currentComponent,
         num_columns_in,
@@ -594,13 +590,8 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
         in_height, in_width, in_channels,
         convolution._kernel_y, convolution._kernel_x, filter_n, convolution._stride_y, convolution._stride_x, inputPrec);
 
-    float weight_scale_factor = 1.0f;
-    float output_scale_factor = 1.0f;
-    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(convolution);
-    if (quantized != nullptr) {
-        weight_scale_factor = quantized->_weights_quant.GetScale();
-        output_scale_factor = quantized->_dst_quant.GetScale();
-    }
+    float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights);
+    float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
 
     auto& currentComponent = dnnComponents.addComponent(convolution.name, "convolution");
     dnn->InitConvolutional2DComponent(currentComponent,
@@ -681,9 +672,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
 
 void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
     auto& power = dynamic_cast<PowerLayer&>(*layer.get());
-    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-    IE_ASSERT(gnaFlags->sw_fp32 ? (quantized == nullptr) : (quantized != nullptr));
-
     if (power.power < 0.0f || power.power > 2.8f) {
         IE_THROW() << "[GNA plugin] unsupported power factor, expected be in <0, 2.8> range but was " << power.power;
     }
@@ -713,6 +701,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
         auto& currentComponent = dnnComponents.addComponent(layer->name, "power");
 
+        auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+        IE_ASSERT(gnaFlags->sw_fp32 ? (quantized == nullptr) : (quantized != nullptr));
         dnn->InitAffineComponent(currentComponent,
             num_rows_in + num_padding,
             num_columns_in,
@@ -772,8 +762,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
         gna_pwl_segment_t* ptr_pwl_segments_target = nullptr;
 
-        float output_pwl_scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
-        float input_pwl_scale_factor = quantized != nullptr ? quantized->_src_quant.GetScale() : 1.0f;
+        float output_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
+        float input_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::input);
 
         if (!gnaFlags->sw_fp32) {
             if (gnaFlags->uniformPwlDesign) {
@@ -831,7 +821,6 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
 void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
     auto& pooling = dynamic_cast<PoolingLayer&>(*layer.get());
-    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
 
     IE_ASSERT(!layer->insData.empty());
     IE_ASSERT(!layer->outData.empty());
@@ -891,7 +880,7 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
         outputs->getPrecision().size(),
         { pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS] },
         { pooling._stride[X_AXIS], pooling._stride[Y_AXIS] },
-        quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
+        getScaleFactor(layer, QuantizedDataType::output),
         ptr_inputs,
         ptr_outputs);
 
@@ -909,8 +898,6 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
 }
 
 void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
-    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-
     IE_ASSERT(!layer->insData.empty());
     IE_ASSERT(!layer->outData.empty());
     auto inputs = layer->insData.begin()->lock();
@@ -936,7 +923,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
         num_columns_out,
         inputs->getPrecision().size(),
         outputs->getPrecision().size(),
-        quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
+        getScaleFactor(layer, QuantizedDataType::output),
         num_rows_out + num_padding_out,
         num_columns_out,
         ptr_inputs,
@@ -1066,7 +1053,6 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
             << axis.size() << ".";
     }
 
-    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
     size_t cropOffset = offset.front() * cropLayer->precision.size();
     size_t cropOutputSize = dim.front() * cropLayer->precision.size();
     const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
@@ -1124,6 +1110,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
         auto& currentComponent = dnnComponents.addComponent(layer->name, "crop");
 
+        auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
         dnn->InitAffineComponent(currentComponent,
             num_rows_in + num_padding,
             num_columns_in,
@@ -1132,8 +1119,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
             outputs->getPrecision().size(),
             quantized == nullptr ? inputs->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 2),
             gnaFlags->input_low_precision ? 1 : 4,
-            quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
-            quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
+            getScaleFactor(layer, QuantizedDataType::weights),
+            getScaleFactor(layer, QuantizedDataType::output),
             ptr_inputs,
             ptr_outputs,
             ptr_weights,
@@ -1267,8 +1254,8 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
         // TODO: only fp32 and Int16 tested
         quantized == nullptr ? inputs2Bytes->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 2),
         quantized == nullptr ? inputs4Bytes->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 4),
-        quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
-        quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
+        getScaleFactor(layer, QuantizedDataType::weights),
+        getScaleFactor(layer, QuantizedDataType::output),
         ptr_inputs,
         ptr_outputs,
         ptr_weights,
@@ -1376,8 +1363,8 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
                              outputs->getPrecision().size(),
                              quantized == nullptr ? input_2->getPrecision().size() : 2,
                              quantized == nullptr ? input_2->getPrecision().size() : 4,
-                             quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
-                             quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
+                             getScaleFactor(layer, QuantizedDataType::weights),
+                             getScaleFactor(layer, QuantizedDataType::output),
                              ptr_input_1,
                              ptr_outputs,
                              ptr_input_2,
@@ -1465,8 +1452,8 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
         outputs->getPrecision().size(),
         weightable._weights->getTensorDesc().getPrecision().size(),
         biasPrecisionSize,
-        quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
-        quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
+        getScaleFactor(layer, QuantizedDataType::weights),
+        getScaleFactor(layer, QuantizedDataType::output),
         ptr_inputs,
         ptr_outputs,
         ptr_weights,
@@ -1605,8 +1592,6 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
         return;
     }
 
-    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-
     void* ptr_inputs = nullptr;
     void* ptr_outputs = nullptr;
     void* ptr_weights = nullptr;
@@ -1645,7 +1630,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
                                num_columns_in,
                                inputs->getPrecision().size(),
                                inputs->getPrecision().size(),
-                               quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
+                               getScaleFactor(layer, QuantizedDataType::output),
                                num_rows_copied,
                                num_columns_in,
                                ptr_inputs,
@@ -1682,8 +1667,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
         outputs->getPrecision().size(),
         filterLayer->_weights->getTensorDesc().getPrecision().size(),
         biasPrecisionSize,
-        quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
-        quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
+        getScaleFactor(layer, QuantizedDataType::weights),
+        getScaleFactor(layer, QuantizedDataType::output),
         ptr_inputs,
         ptr_outputs,
         ptr_weights,
@@ -1739,8 +1724,6 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
         return;
     }
 
-    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-
     auto prevLayer = CNNNetPrevLayer(layer.get(), 0);
     if (!LayerInfo(prevLayer).isSplit() && !LayerInfo(prevLayer).isSlice()) {
         THROW_GNA_EXCEPTION << "Case with Affine Aligning Filter for not Split/Slice layers is not implemented yet!";
@@ -1787,8 +1770,8 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
         numberOfFilters,
         filterWidth,
         convolutionStride,
-        quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
-        quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
+        getScaleFactor(layer, QuantizedDataType::weights),
+        getScaleFactor(layer, QuantizedDataType::output),
         ptr_inputs,
         ptr_outputs,
         ptr_weights,
@@ -1847,9 +1830,8 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
     auto inputs = layer->insData.begin()->lock();
     auto outputs = *layer->outData.begin();
-    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-    float output_pwl_scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
-    float input_pwl_scale_factor = quantized != nullptr ? quantized->_src_quant.GetScale() : 1.0f;
+    float output_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
+    float input_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::input);
 
     auto orientation = kDnnInterleavedOrientation;
 
@@ -1916,6 +1898,7 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
     }
     auto activation_type = DnnActivation::fromType(it->second);
     activation_type.fqParams.set = false;
+    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
     if (quantized != nullptr && quantized->_dst_quant.IsStatsSet()) {
         activation_type.fqParams.set = true;
         activation_type.fqParams.levels = quantized->_dst_quant.GetLevels();
@@ -2057,7 +2040,6 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
         return;
     }
     auto layerOrder = layer->GetParamAsInts("order");
-    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
     if (layer->insData.empty()) {
         THROW_GNA_LAYER_EXCEPTION(layer) << "Input layer pointer is unexpectedly absent";
     }
@@ -2101,7 +2083,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
                                          squeezedInputOrder[1],
                                          inputs->getPrecision().size(),
                                          outputs->getPrecision().size(),
-                                         (quantized == nullptr) ? 1.0f : quantized->_dst_quant.GetScale(),
+                                         getScaleFactor(layer, QuantizedDataType::output),
                                          ptr_inputs,
                                          ptr_outputs);
         }
@@ -2116,7 +2098,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
                                            squeezedInputOrder[1],
                                            inputs->getPrecision().size(),
                                            outputs->getPrecision().size(),
-                                           quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
+                                           getScaleFactor(layer, QuantizedDataType::output),
                                            ptr_inputs,
                                            ptr_outputs);
         }
@@ -2608,4 +2590,4 @@ GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint
         }
     }
     return temp_buffer;
-}
+}
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/gna/scale_factors_tests/const_input_add.cpp b/inference-engine/tests/functional/plugin/gna/scale_factors_tests/const_input_add.cpp
new file mode 100644
index 00000000000000..1663201337931d
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gna/scale_factors_tests/const_input_add.cpp
@@ -0,0 +1,117 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    std::pair<float, float>,            // Input min/max values
+    std::pair<float, float>             // Constant min/max values
+> constInputAddParams;
+
+namespace LayerTestsDefinitions {
+
+class ConstInputAddTest : public testing::WithParamInterface<constInputAddParams>,
+                          public LayerTestsUtils::LayerTestsCommon {
+    public:
+        static std::string getTestCaseName(testing::TestParamInfo<constInputAddParams> obj) {
+            InferenceEngine::Precision netPrecision;
+            std::string targetDevice;
+            std::map<std::string, std::string> configuration;
+            std::pair<float, float> inputRange;
+            std::pair<float, float> constRange;
+            std::tie(netPrecision, targetDevice, configuration, inputRange, constRange) = obj.param;
+
+            std::ostringstream result;
+            result << "netPRC=" << netPrecision.name() << "_";
+            result << "targetDevice=" << targetDevice << "_";
+            for (auto const& configItem : configuration) {
+                result << "_configItem=" << configItem.first << "_" << configItem.second;
+            }
+            result << "_IR=" << inputRange.first << "," << inputRange.second << "_";
+            result << "IR=" << constRange.first << "," << constRange.second;
+            return result.str();
+        }
+
+        InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
+            return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputMax - inputMin, inputMin, (inputMax - inputMin) / 10);
+        }
+
+    protected:
+        void SetUp() override {
+            InferenceEngine::Precision netPrecision;
+            std::pair<float, float> inputRange;
+            std::pair<float, float> constRange;
+            std::tie(netPrecision, targetDevice, configuration, inputRange, constRange) = this->GetParam();
+            auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+            std::tie(inputMin, inputMax) = inputRange;
+
+            ngraph::Shape shape = {1, 72};
+            auto params = ngraph::builder::makeParams(ngPrc, { shape });
+
+            auto constant = ngraph::builder::makeConstant<float>(ngPrc, shape, {}, true, constRange.second, constRange.first);
+            auto eltwise = ngraph::builder::makeEltwise(constant, params[0], ngraph::helpers::EltwiseTypes::ADD);
+
+            ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(eltwise) };
+            function = std::make_shared<ngraph::Function>(results, params, "InputConstAdd");
+        }
+
+    private:
+        float inputMin = 0.0;
+        float inputMax = 0.0;
+};
+
+    TEST_P(ConstInputAddTest, CompareWithRefImpl) {
+        Run();
+    };
+
+    const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+    };
+
+    const std::vector<std::map<std::string, std::string>> configs = {
+        {
+            {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
+        }
+    };
+
+    const std::vector<std::pair<float, float>> inputRange = {
+        {-10, 10},
+        {-100, 100}
+    };
+
+    const std::vector<std::pair<float, float>> constRange = {
+        {-10, 10},
+        {-0.1, 0.1},
+        {-1.0e-5, 1.0e-5}
+    };
+
+    INSTANTIATE_TEST_CASE_P(smoke_const_input_add, ConstInputAddTest,
+        ::testing::Combine(
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+            ::testing::ValuesIn(configs),
+            ::testing::ValuesIn(inputRange),
+            ::testing::ValuesIn(constRange)),
+        ConstInputAddTest::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/unit/gna/gna_get_scale_factor.cpp b/inference-engine/tests/unit/gna/gna_get_scale_factor.cpp
new file mode 100644
index 00000000000000..0a34eabe609633
--- /dev/null
+++ b/inference-engine/tests/unit/gna/gna_get_scale_factor.cpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <limits>
+
+#include <gtest/gtest.h>
+// to suppress deprecated definition errors
+#define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
+#include "legacy/layer_transform.hpp"
+#include "frontend/layer_quantizer.hpp"
+
+namespace {
+
+class GnaGetScaleFactorTest : public ::testing::Test {
+ protected:
+    void GetScaleFactorAndCheck(float src_scale, float dst_scale, float weights_scale, float bias_scale) const {
+        InferenceEngine::LayerParams params("fc", "FullyConnected", InferenceEngine::Precision::FP32);
+        InferenceEngine::CNNLayerPtr layer = std::make_shared<InferenceEngine::CNNLayer>(params);
+        layer = InferenceEngine::injectData<GNAPluginNS::QuantizedLayerParams>(*layer);
+        auto quant = InferenceEngine::getInjectedData<GNAPluginNS::QuantizedLayerParams>(*layer);
+        quant->_src_quant.SetScale(src_scale);
+        quant->_dst_quant.SetScale(dst_scale);
+        quant->_weights_quant.SetScale(weights_scale);
+        quant->_bias_quant.SetScale(bias_scale);
+        ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::input), src_scale);
+        ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::output), dst_scale);
+        ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::weights), weights_scale);
+        ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::bias), bias_scale);
+    }
+};
+
+TEST_F(GnaGetScaleFactorTest, validSF) {
+    EXPECT_NO_THROW(GetScaleFactorAndCheck(100, 200, 300, 400));
+}
+
+TEST_F(GnaGetScaleFactorTest, invalidSF) {
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(0, 200, 300, 400));
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 0, 300, 400));
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 0, 400));
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, 0));
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(-100, 200, 300, 400));
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, -200, 300, 400));
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, -300, 400));
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, -400));
+    double inf = std::numeric_limits<float>::infinity();
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(inf, 200, 300, 400));
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, inf, 300, 400));
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, inf, 400));
+    EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, inf));
+}
+
+} // namespace
\ No newline at end of file