From d0f49fe0c8e82c96ea9a1563ef5cef7d37591230 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Fri, 27 Aug 2021 14:19:18 +0300 Subject: [PATCH] [GNA] Fixed export/import precisions (#6273) --- .../descriptions/gna_input_desc.hpp | 1 + .../descriptions/gna_output_desc.hpp | 1 + .../src/gna_plugin/gna_model_serial.cpp | 6 ++-- .../src/gna_plugin/gna_plugin.cpp | 34 +++++++++++++++++-- .../src/gna_plugin/gna_plugin.hpp | 1 + .../skip_tests_config.cpp | 3 +- .../src/base/layer_test_utils.cpp | 8 +++++ 7 files changed, 45 insertions(+), 9 deletions(-) diff --git a/inference-engine/src/gna_plugin/descriptions/gna_input_desc.hpp b/inference-engine/src/gna_plugin/descriptions/gna_input_desc.hpp index 555e4ab112e6cf..41f08f0bdde66c 100644 --- a/inference-engine/src/gna_plugin/descriptions/gna_input_desc.hpp +++ b/inference-engine/src/gna_plugin/descriptions/gna_input_desc.hpp @@ -18,6 +18,7 @@ struct InputDesc { std::unordered_map orientation_in; /// order of scale factors matches inputs order in original topology std::vector inputScaleFactors; + std::vector inputPrecisions; std::map bytes_allocated_for_input; size_t minBytesRequiredForStoreInput(InferenceEngine::CNNLayerPtr); diff --git a/inference-engine/src/gna_plugin/descriptions/gna_output_desc.hpp b/inference-engine/src/gna_plugin/descriptions/gna_output_desc.hpp index eef68a00e814c0..16237d19ff5209 100644 --- a/inference-engine/src/gna_plugin/descriptions/gna_output_desc.hpp +++ b/inference-engine/src/gna_plugin/descriptions/gna_output_desc.hpp @@ -10,6 +10,7 @@ namespace GNAPluginNS { struct OutputDesc { + uint8_t precision; double scale_factor = 1.0; uint32_t num_bytes_per_element = 0; uint32_t num_elements = 0; diff --git a/inference-engine/src/gna_plugin/gna_model_serial.cpp b/inference-engine/src/gna_plugin/gna_model_serial.cpp index e32ded8a9e37ed..388a3c9d82b3f8 100644 --- a/inference-engine/src/gna_plugin/gna_model_serial.cpp +++ b/inference-engine/src/gna_plugin/gna_model_serial.cpp @@ -831,14 +831,13 @@ std::vector GNAModelSerial::serializeOutputs(cons } uint32_t elementsCount = static_cast(InferenceEngine::details::product(outputDims.begin(), outputDims.end())); InferenceEngine::Layout outputLayout = output.second->getLayout(); - InferenceEngine::Precision::ePrecision outputPrecision = InferenceEngine::Precision::FP32; HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor, outputsDesc[outputIndex].ptrs[0], outputsDesc[outputIndex].num_bytes_per_element, elementsCount, outputShape, outputLayout, - outputPrecision, + outputsDesc[outputIndex].precision, outputsDesc[outputIndex].orientation); endPoints.push_back(endPoint); outputIndex++; @@ -866,7 +865,7 @@ std::vector GNAModelSerial::serializeInputs(const uint32_t elementsCount = static_cast(InferenceEngine::details::product(inputDims.begin(), inputDims.end())); intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName); InferenceEngine::Layout inputLayout = input.second->getLayout(); - InferenceEngine::Precision::ePrecision inputPrecision = InferenceEngine::Precision::FP32; + uint8_t inputPrecision = inputDesc->inputPrecisions.at(inputIndex); HeaderLatest::RuntimeEndPoint endPoint(scaleFactor, descriptor_ptr[0], element_size, @@ -886,7 +885,6 @@ void GNAModelSerial::ImportInputs(std::istream &is, std::shared_ptr inputsDesc, InferenceEngine::InputsDataMap& dataMap) { dataMap.clear(); - for (uint32_t inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) { const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3) ? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex)); diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index 48815f964da2dc..4f1b36ab5d5283 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -518,6 +518,33 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ } } +void GNAPlugin::UpdateInputsAndOutputsInfoFromNetwork(InferenceEngine::CNNNetwork & network) { + OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateInputsAndOutputsInfoFromNetwork"); + + // update inputs + { + InputsDataMap inputs = network.getInputsInfo(); + if (inputsDesc->inputPrecisions.size() != 0) { + inputsDesc->inputPrecisions.clear(); + } + for (const auto input : inputs) { + inputsDesc->inputPrecisions.push_back(input.second->getPrecision().getPrecVal()); + } + } + + // update outputs + { + OutputsDataMap outputs = network.getOutputsInfo(); + outputsDesc.resize(outputs.size()); + + size_t outputIdx = 0; + for (const auto output : outputs) { + outputsDesc[outputIdx].precision = output.second->getPrecision().getPrecVal(); + ++outputIdx; + } + } +} + bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer) { auto initOutput = [this, portId, layer] (intel_dnn_orientation_t orientation, size_t numBytesPerElem, size_t numElem, void* outputPtr) { @@ -759,6 +786,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { UpdateGnaQuantModeFromNetwork(network); UpdateInputScaleFromNetwork(network); + // Set input and output information from orginal network + UpdateInputsAndOutputsInfoFromNetwork(network); + if (MustBeConvertedFromNCHWToNHWC(details::CNNNetSortTopologically(network))) { FillInputsAndOutputsTranspositionInfo(network); } @@ -922,7 +952,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { inputsDesc->getPtrInputsGlobal(input.first).resize(gnaFlags->gna_lib_async_threads_num); } - // CreatingLayer primitives + // Creating Layer primitives for (auto & layer : sortedNoMem) { graphCompiler.CreateLayerPrimitive(layer); } @@ -940,8 +970,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { } /// setting-up output layers information - outputsDesc.resize(outputsDataMap.size()); - int portId = 0; for (auto && outPort : outputsDataMap) { // gets output layer pointer in original topology not in cloned diff --git a/inference-engine/src/gna_plugin/gna_plugin.hpp b/inference-engine/src/gna_plugin/gna_plugin.hpp index 941dc209cea110..476106f558ee99 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.hpp +++ b/inference-engine/src/gna_plugin/gna_plugin.hpp @@ -213,6 +213,7 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin { void UpdateFieldsFromConfig(); void UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork &); void UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork &); + void UpdateInputsAndOutputsInfoFromNetwork(InferenceEngine::CNNNetwork &); /** * @brief Tries to init an output on the base of a layer data * @param portId output port identificator diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp index c580b6501ffc72..8305e65ccae8ec 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp @@ -42,8 +42,7 @@ std::vector disabledTestPatterns() { R"(.*ConvolutionLayerTest.CompareWithRefs.*D=\(3.1\).*)", R"(.*ConstantResultSubgraphTest.*IS=\(2\.3\.4\.5\).*)", R"(.*ConstantResultSubgraphTest.*inPrc=(U8|I8|I32|U64|I64|BOOL).*)", - // TODO: Issue 51528 - R"(.*CachingSupport.*_(u8|i16)_.*)", + // TODO: Issue 57363 (Param -> Result subgraphs) R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=1_.*)", // TODO: Issue 57368 (accuracy) diff --git a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp index 7ba0a1d90b483f..af21f3a39b7b19 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp @@ -269,6 +269,14 @@ void LayerTestsCommon::Compare(const InferenceEngine::Blob::Ptr &expected, const Compare(reinterpret_cast(expectedBuffer), reinterpret_cast(actualBuffer), size, 0); break; + case InferenceEngine::Precision::I16: + Compare(reinterpret_cast(expectedBuffer), + reinterpret_cast(actualBuffer), size, 0); + break; + case InferenceEngine::Precision::U8: + Compare(reinterpret_cast(expectedBuffer), + reinterpret_cast(actualBuffer), size, 0); + break; default: FAIL() << "Comparator for " << precision << " precision isn't supported"; }