Merge branch 'private/kmagiers/GNAPlugin_Incorrect_result_Conv_Mul_Ad…

…d' into 'master' Const layer support for eltwise fix See merge request inference-engine/dldt!7403
tadamowicz · May 21, 2020 · 3990b4e · 3990b4e
2 parents c35cb56 + a44ccfa
commit 3990b4e
Show file tree

Hide file tree

Showing 6 changed files with 158 additions and 20 deletions.
diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
@@ -17,6 +17,8 @@
 #include "blob_factory.hpp"
 #include "precision_ex.hpp"
 #include "layers/gna_layer_info.hpp"
+#include "weights_converter.hpp"
+#include "layer_transform.hpp"
 
 namespace GNAPluginNS {
 namespace frontend {
@@ -137,6 +139,48 @@ class Quant<QuantI8> {
     }
 };
 
+template <typename T>
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+    auto prec_blob = InferenceEngine::make_shared_blob<T>({ precision,
+        fp32_blob->getTensorDesc().getDims(), fp32_blob->getTensorDesc().getLayout() });
+    prec_blob->allocate();
+
+    int i = 0;
+    for (auto& precValue : *prec_blob) {
+        auto f32Value = fp32_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()[i++] * scale_factor;
+        if (f32Value > std::numeric_limits<T>::max()) {
+            precValue = std::numeric_limits<T>::max();
+        } else if (f32Value < std::numeric_limits<T>::min()) {
+            precValue = std::numeric_limits<T>::min();
+        } else {
+            precValue = static_cast<T>(f32Value);
+        }
+    }
+
+    return  static_cast<InferenceEngine::Blob::Ptr>(prec_blob);
+}
+
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+    InferenceEngine::Blob::Ptr result_ptr = nullptr;
+    switch (precision) {
+    case InferenceEngine::Precision::FP32:
+        result_ptr = fp32_to_precision_blob<float>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I32:
+        result_ptr = fp32_to_precision_blob<int32_t>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I16:
+        result_ptr = fp32_to_precision_blob<int16_t>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I8:
+        result_ptr = fp32_to_precision_blob<int8_t>(fp32_blob, precision, scale_factor);
+        break;
+    default:
+        THROW_GNA_EXCEPTION << "FP32 to " << precision << " not supported";
+    }
+    return result_ptr;
+}
+
 template<class QuantDesc, class QuantFunc>
 inline void quantizeWeightsBiases(const QuantDesc & quantDesc,
                                   InferenceEngine::WeightableLayer *wl,
@@ -389,6 +433,18 @@ class DataQuantizer<Desc, InferenceEngine::CNNLayer *> : public DataQuantizerBas
         }
         cnnLayer->precision = Desc::mandatory().getInputPrecision();
 
+        if (cnnLayer->type == "Const") {
+            if (cnnLayer->blobs["custom"]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+                cnnLayer->blobs["custom"] = make_fp32_blob(cnnLayer->blobs["custom"]);
+            }
+            auto const_scale_factor = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer)->_dst_quant.scale;
+            auto new_const_blob = InferenceEngine::Blob::CreateFromData(cnnLayer->outData[0]);
+            auto const_blob = cnnLayer->blobs["custom"];
+            if (const_blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
+                cnnLayer->blobs["custom"] = fp32_to_precision_blob(const_blob, cnnLayer->outData[0]->getPrecision(), const_scale_factor);
+            }
+        }
+
         return true;
     }
 };

diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -197,6 +197,36 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
             return true;
         }
 
+        if (cnnLayer->type == "Const") {
+            auto blob = cnnLayer->blobs["custom"];
+            if (blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+                blob = make_fp32_blob(blob);
+            }
+            auto max_val = std::numeric_limits<float>::min();
+            auto min_val = std::numeric_limits<float>::max();
+
+            auto flt_buf = blob->buffer().as<float*>();
+            auto size = blob->size();
+
+            for (int i=0; i < size; i++) {
+                auto val = flt_buf[i];
+                if (val > max_val) max_val = val;
+                if (val < min_val) min_val = val;
+            }
+
+            auto abs_val = std::max(std::abs(max_val), std::abs(min_val));
+            auto scale_val = static_cast<float>(std::numeric_limits<int16_t>::max()) / abs_val;
+
+            // TODO: Investigate what should be the scale in such cases (31910)
+            if (std::isinf(scale_val)) {
+                quant->_dst_quant.scale = quant->_src_quant.scale;
+            } else {
+                quant->_dst_quant.scale = scale_val;
+            }
+
+            return ScaleFactorUpdateResult();
+        }
+
         if (!CNNNetHasPrevLayer(cnnLayer)) {
             quant->_dst_quant.scale = quant->_src_quant.scale;
             return ScaleFactorUpdateResult();
@@ -231,6 +261,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
 
         auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
         auto quantParams1 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in1);
+
         auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*eltwiseLayer);
 
         switch (eltwiseLayer->_operation) {
@@ -239,6 +270,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                 quantData->_dst_quant.scale     = quantParams0->_dst_quant.scale * quantParams1->_dst_quant.scale;
                 break;
             }
+            case InferenceEngine::EltwiseLayer::Sub:
             case InferenceEngine::EltwiseLayer::Sum: {
                 // detect which input will be used as biases
                 if (LayerInfo(in0).has32BOutput()) {
@@ -247,6 +279,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                 }
 
                 // this path might result in significant data loss
+                quantData->_bias_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
                 quantData->_weights_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
                 quantData->_dst_quant.scale = quantParams1->_dst_quant.scale;
 

diff --git a/inference-engine/src/gna_plugin/frontend/weights_converter.hpp b/inference-engine/src/gna_plugin/frontend/weights_converter.hpp
@@ -7,22 +7,28 @@
 #include "quantized_layer_params.hpp"
 #include "precision_utils.h"
 
+inline InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob) {
+    auto fp32_blob = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+         fp16_blob->getTensorDesc().getDims(), fp16_blob->getTensorDesc().getLayout() });
+    fp32_blob->allocate();
+
+    int i = 0;
+    for (auto& f32Value : *fp32_blob) {
+        auto f16Value = fp16_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
+        f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
+    }
+
+    return static_cast<InferenceEngine::Blob::Ptr>(fp32_blob);
+}
+
 inline void fp16_to_fp32(InferenceEngine::WeightableLayer *lp) {
     InferenceEngine::BlobMap newBlobs;
     for (auto& blob : lp->blobs) {
         if (blob.second->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) {
             THROW_GNA_EXCEPTION << "Unsupported precision. Layer: " << lp->name << " , Blob: " << blob.first;
         }
-        auto tmp =
-                InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                    blob.second->getTensorDesc().getDims(), InferenceEngine::Layout::C });
-        tmp->allocate();
-        int i = 0;
-        for (auto& f32Value : *tmp) {
-            auto f16Value = blob.second->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
-            f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
-        }
-        newBlobs[blob.first] = tmp;
+        auto fp32_blob = make_fp32_blob(blob.second);
+        newBlobs[blob.first] = fp32_blob;
     }
     lp->_biases = newBlobs["biases"];
     lp->_weights = newBlobs["weights"];
@@ -44,6 +50,18 @@ inline bool convertWeights(InferenceEngine::CNNLayer* lp) {
     for (auto& dataItem : lp->outData) {
         dataItem->setPrecision(InferenceEngine::Precision::FP32);
     }
+    InferenceEngine::BlobMap newBlobs;
+    for (auto& blob_pair : lp->blobs) {
+        auto blob_name = blob_pair.first;
+        auto blob_ptr = blob_pair.second;
+        if (blob_ptr->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+            auto new_blob = make_fp32_blob(blob_ptr);
+            newBlobs[blob_name] = new_blob;
+        } else {
+            newBlobs[blob_name] = blob_ptr;
+        }
+    }
+
     return true;
 }
 

diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -185,17 +185,16 @@ void  GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
     if (constLayer->blobs.find("custom") == constLayer->blobs.end()) {
         THROW_GNA_EXCEPTION << "const layer: " << constLayer->name << "doesn't have custom in blobs section";
     }
-    auto constBlob = constLayer->blobs["custom"];
+    auto const_blob = constLayer->blobs["custom"];
 
-    void* ptr_for_const_blob = &ptr_for_const_blob;
-    connectOutput(constLayer, ptr_for_const_blob, constBlob->size());
-
-    const_connections[constLayer->name] = ptr_for_const_blob;
+    const_connections[constLayer->name] = &const_connections[constLayer->name];
+    void* ptr_for_const_blob = &const_connections[constLayer->name];
 
+    connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
     // TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
     // dont see practical use case when bind storage type need to be different that allocation type
-    gnamem->readonly().bind_initializer(ptr_for_const_blob, [constBlob](void* data, size_t size) {
-        ie_memcpy(data, size, constBlob->buffer(), constBlob->byteSize());
+    gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
+        ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
         });
 }
 
@@ -731,17 +730,27 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
     int biasesLayerIdx = 1;
 
     if (quantized) {
-        if (eltwise._operation == EltwiseLayer::Sum) {
+        switch (eltwise._operation) {
+        case InferenceEngine::EltwiseLayer::Sum:
+        case InferenceEngine::EltwiseLayer::Sub:
+        {
             if (inputs4Bytes->getPrecision().size() != 4) {
                 std::swap(inputs4Bytes, inputs2Bytes);
                 biasesLayerIdx = 0;
             }
             GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
             GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 4);
-        } else {
+            break;
+        }
+        case InferenceEngine::EltwiseLayer::Prod:
+        {
             // for mul both inputs should be 2 bytes precision
             GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
             GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 2);
+            break;
+        }
+        default:
+            THROW_GNA_EXCEPTION << "Unsupported eltwise operation for quantization: " << eltwise._operation;
         }
     }
 
@@ -785,6 +794,18 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
     connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);
 
     switch (eltwise._operation) {
+    case EltwiseLayer::Sub:
+        if (quantized == nullptr) {
+            gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
+        } else {
+            auto scaledIdentity = -quantized->_weights_quant.scale;
+
+            auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
+
+            gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
+        }
+        connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
+        break;
     case EltwiseLayer::Sum:
         if (quantized == nullptr) {
             gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);

diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
@@ -153,6 +153,15 @@ class LayerInfo {
         return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
                InferenceEngine::EltwiseLayer::Sum;
     }
+    bool isEltwiseSub() const noexcept {
+        IS_VALID();
+        if (!isEltwise()) return false;
+        // dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer) is validated in isEltwise function
+        // coverity[var_deref_op]
+        return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
+            InferenceEngine::EltwiseLayer::Sub;
+    }
+
     bool isEltwiseMul() const noexcept {
         IS_VALID();
         if (!isEltwise()) return false;

diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -150,6 +150,7 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
         auto prev1 = PrevFunctionalLayer(l, 1);
 
         switch (eltwise->_operation) {
+            case EltwiseLayer::Sub:
             case EltwiseLayer::Sum:
                 if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
                     return prevLayers;
@@ -227,7 +228,7 @@ void InsertDiagonalLayerPass::run() {
             // for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights
             // for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights
 
-            if (eltwise->_operation != EltwiseLayer::Sum)
+            if (eltwise->_operation != EltwiseLayer::Sum && eltwise->_operation != EltwiseLayer::Sub)
                 continue;
 
             auto prevLayer1 = CNNNetPrevLayerSkipCertain(l, 1, [](CNNLayerPtr ptr) {