Skip to content

Commit

Permalink
[GNA] Fixed export/import precisions (openvinotoolkit#6273)
Browse files Browse the repository at this point in the history
  • Loading branch information
mryzhov authored and dood-apo committed Aug 24, 2023
1 parent 05a223b commit 5593bb1
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct InputDesc {
std::unordered_map<std::string, intel_dnn_orientation_t> orientation_in;
/// order of scale factors matches inputs order in original topology
std::vector<float> inputScaleFactors;
std::vector<uint8_t> inputPrecisions;
std::map<std::string, int> bytes_allocated_for_input;
size_t minBytesRequiredForStoreInput(InferenceEngine::CNNLayerPtr);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

namespace GNAPluginNS {
struct OutputDesc {
uint8_t precision;
double scale_factor = 1.0;
uint32_t num_bytes_per_element = 0;
uint32_t num_elements = 0;
Expand Down
6 changes: 2 additions & 4 deletions inference-engine/src/gna_plugin/gna_model_serial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -831,14 +831,13 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(cons
}
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(outputDims.begin(), outputDims.end()));
InferenceEngine::Layout outputLayout = output.second->getLayout();
InferenceEngine::Precision::ePrecision outputPrecision = InferenceEngine::Precision::FP32;
HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
outputsDesc[outputIndex].ptrs[0],
outputsDesc[outputIndex].num_bytes_per_element,
elementsCount,
outputShape,
outputLayout,
outputPrecision,
outputsDesc[outputIndex].precision,
outputsDesc[outputIndex].orientation);
endPoints.push_back(endPoint);
outputIndex++;
Expand Down Expand Up @@ -866,7 +865,7 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
InferenceEngine::Layout inputLayout = input.second->getLayout();
InferenceEngine::Precision::ePrecision inputPrecision = InferenceEngine::Precision::FP32;
uint8_t inputPrecision = inputDesc->inputPrecisions.at(inputIndex);
HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
descriptor_ptr[0],
element_size,
Expand All @@ -886,7 +885,6 @@ void GNAModelSerial::ImportInputs(std::istream &is,
std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
InferenceEngine::InputsDataMap& dataMap) {
dataMap.clear();

for (uint32_t inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
Expand Down
34 changes: 31 additions & 3 deletions inference-engine/src/gna_plugin/gna_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,33 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
}
}

void GNAPlugin::UpdateInputsAndOutputsInfoFromNetwork(InferenceEngine::CNNNetwork & network) {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateInputsAndOutputsInfoFromNetwork");

// update inputs
{
InputsDataMap inputs = network.getInputsInfo();
if (inputsDesc->inputPrecisions.size() != 0) {
inputsDesc->inputPrecisions.clear();
}
for (const auto input : inputs) {
inputsDesc->inputPrecisions.push_back(input.second->getPrecision().getPrecVal());
}
}

// update outputs
{
OutputsDataMap outputs = network.getOutputsInfo();
outputsDesc.resize(outputs.size());

size_t outputIdx = 0;
for (const auto output : outputs) {
outputsDesc[outputIdx].precision = output.second->getPrecision().getPrecVal();
++outputIdx;
}
}
}

bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer) {
auto initOutput = [this, portId, layer]
(intel_dnn_orientation_t orientation, size_t numBytesPerElem, size_t numElem, void* outputPtr) {
Expand Down Expand Up @@ -759,6 +786,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
UpdateGnaQuantModeFromNetwork(network);
UpdateInputScaleFromNetwork(network);

// Set input and output information from orginal network
UpdateInputsAndOutputsInfoFromNetwork(network);

if (MustBeConvertedFromNCHWToNHWC(details::CNNNetSortTopologically(network))) {
FillInputsAndOutputsTranspositionInfo(network);
}
Expand Down Expand Up @@ -922,7 +952,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
inputsDesc->getPtrInputsGlobal(input.first).resize(gnaFlags->gna_lib_async_threads_num);
}

// CreatingLayer primitives
// Creating Layer primitives
for (auto & layer : sortedNoMem) {
graphCompiler.CreateLayerPrimitive(layer);
}
Expand All @@ -940,8 +970,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
}

/// setting-up output layers information
outputsDesc.resize(outputsDataMap.size());

int portId = 0;
for (auto && outPort : outputsDataMap) {
// gets output layer pointer in original topology not in cloned
Expand Down
1 change: 1 addition & 0 deletions inference-engine/src/gna_plugin/gna_plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
void UpdateFieldsFromConfig();
void UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork &);
void UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork &);
void UpdateInputsAndOutputsInfoFromNetwork(InferenceEngine::CNNNetwork &);
/**
* @brief Tries to init an output on the base of a layer data
* @param portId output port identificator
Expand Down

0 comments on commit 5593bb1

Please sign in to comment.