[GNA] Fixed export/import precisions (#6273)

openvinotoolkit · Aug 27, 2021 · d0f49fe · d0f49fe
1 parent 148bdf6
commit d0f49fe
Show file tree

Hide file tree

Showing 7 changed files with 45 additions and 9 deletions.
diff --git a/inference-engine/src/gna_plugin/descriptions/gna_input_desc.hpp b/inference-engine/src/gna_plugin/descriptions/gna_input_desc.hpp
@@ -18,6 +18,7 @@ struct InputDesc {
     std::unordered_map<std::string, intel_dnn_orientation_t> orientation_in;
     /// order of scale factors matches inputs order in original topology
     std::vector<float> inputScaleFactors;
+    std::vector<uint8_t> inputPrecisions;
     std::map<std::string, int> bytes_allocated_for_input;
     size_t minBytesRequiredForStoreInput(InferenceEngine::CNNLayerPtr);
 

diff --git a/inference-engine/src/gna_plugin/descriptions/gna_output_desc.hpp b/inference-engine/src/gna_plugin/descriptions/gna_output_desc.hpp
@@ -10,6 +10,7 @@
 
 namespace GNAPluginNS {
 struct OutputDesc {
+    uint8_t precision;
     double scale_factor = 1.0;
     uint32_t num_bytes_per_element = 0;
     uint32_t num_elements = 0;

diff --git a/inference-engine/src/gna_plugin/gna_model_serial.cpp b/inference-engine/src/gna_plugin/gna_model_serial.cpp
@@ -831,14 +831,13 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(cons
         }
         uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(outputDims.begin(), outputDims.end()));
         InferenceEngine::Layout outputLayout = output.second->getLayout();
-        InferenceEngine::Precision::ePrecision outputPrecision = InferenceEngine::Precision::FP32;
         HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
                                                  outputsDesc[outputIndex].ptrs[0],
                                                  outputsDesc[outputIndex].num_bytes_per_element,
                                                  elementsCount,
                                                  outputShape,
                                                  outputLayout,
-                                                 outputPrecision,
+                                                 outputsDesc[outputIndex].precision,
                                                  outputsDesc[outputIndex].orientation);
         endPoints.push_back(endPoint);
         outputIndex++;
@@ -866,7 +865,7 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const
         uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
         intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
         InferenceEngine::Layout inputLayout = input.second->getLayout();
-        InferenceEngine::Precision::ePrecision inputPrecision = InferenceEngine::Precision::FP32;
+        uint8_t inputPrecision = inputDesc->inputPrecisions.at(inputIndex);
         HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
                                                  descriptor_ptr[0],
                                                  element_size,
@@ -886,7 +885,6 @@ void GNAModelSerial::ImportInputs(std::istream &is,
         std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
         InferenceEngine::InputsDataMap& dataMap) {
     dataMap.clear();
-
     for (uint32_t inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
         const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
                 ? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));

diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -518,6 +518,33 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
     }
 }
 
+void GNAPlugin::UpdateInputsAndOutputsInfoFromNetwork(InferenceEngine::CNNNetwork & network) {
+    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateInputsAndOutputsInfoFromNetwork");
+
+    // update inputs
+    {
+        InputsDataMap inputs = network.getInputsInfo();
+        if (inputsDesc->inputPrecisions.size() != 0) {
+            inputsDesc->inputPrecisions.clear();
+        }
+        for (const auto input : inputs) {
+            inputsDesc->inputPrecisions.push_back(input.second->getPrecision().getPrecVal());
+        }
+    }
+
+    // update outputs
+    {
+        OutputsDataMap outputs = network.getOutputsInfo();
+        outputsDesc.resize(outputs.size());
+
+        size_t outputIdx = 0;
+        for (const auto output : outputs) {
+            outputsDesc[outputIdx].precision = output.second->getPrecision().getPrecVal();
+            ++outputIdx;
+        }
+    }
+}
+
 bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer) {
     auto initOutput = [this, portId, layer]
             (intel_dnn_orientation_t orientation, size_t numBytesPerElem, size_t numElem, void* outputPtr) {
@@ -759,6 +786,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
     UpdateGnaQuantModeFromNetwork(network);
     UpdateInputScaleFromNetwork(network);
 
+    // Set input and output information from orginal network
+    UpdateInputsAndOutputsInfoFromNetwork(network);
+
     if (MustBeConvertedFromNCHWToNHWC(details::CNNNetSortTopologically(network))) {
         FillInputsAndOutputsTranspositionInfo(network);
     }
@@ -922,7 +952,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
         inputsDesc->getPtrInputsGlobal(input.first).resize(gnaFlags->gna_lib_async_threads_num);
     }
 
-    // CreatingLayer primitives
+    // Creating Layer primitives
     for (auto & layer : sortedNoMem) {
         graphCompiler.CreateLayerPrimitive(layer);
     }
@@ -940,8 +970,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
     }
 
     /// setting-up output layers information
-    outputsDesc.resize(outputsDataMap.size());
-
     int portId = 0;
     for (auto && outPort : outputsDataMap) {
         // gets output layer pointer in original topology not in cloned

diff --git a/inference-engine/src/gna_plugin/gna_plugin.hpp b/inference-engine/src/gna_plugin/gna_plugin.hpp
@@ -213,6 +213,7 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
     void UpdateFieldsFromConfig();
     void UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork &);
     void UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork &);
+    void UpdateInputsAndOutputsInfoFromNetwork(InferenceEngine::CNNNetwork &);
     /**
      * @brief Tries to init an output on the base of a layer data
      * @param portId output port identificator

diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
@@ -42,8 +42,7 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*ConvolutionLayerTest.CompareWithRefs.*D=\(3.1\).*)",
         R"(.*ConstantResultSubgraphTest.*IS=\(2\.3\.4\.5\).*)",
         R"(.*ConstantResultSubgraphTest.*inPrc=(U8|I8|I32|U64|I64|BOOL).*)",
-        // TODO: Issue 51528
-        R"(.*CachingSupport.*_(u8|i16)_.*)",
+
         // TODO: Issue 57363 (Param -> Result subgraphs)
         R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=1_.*)",
         // TODO: Issue 57368 (accuracy)

diff --git a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
@@ -269,6 +269,14 @@ void LayerTestsCommon::Compare(const InferenceEngine::Blob::Ptr &expected, const
             Compare(reinterpret_cast<const std::int32_t *>(expectedBuffer),
                     reinterpret_cast<const std::int32_t *>(actualBuffer), size, 0);
             break;
+        case InferenceEngine::Precision::I16:
+            Compare(reinterpret_cast<const std::int16_t *>(expectedBuffer),
+                    reinterpret_cast<const std::int16_t *>(actualBuffer), size, 0);
+            break;
+        case InferenceEngine::Precision::U8:
+            Compare(reinterpret_cast<const std::uint8_t *>(expectedBuffer),
+                    reinterpret_cast<const std::uint8_t *>(actualBuffer), size, 0);
+            break;
         default:
             FAIL() << "Comparator for " << precision << " precision isn't supported";
     }