Skip to content

Commit

Permalink
[GNA] Fixed export/import precisions (#6273)
Browse files Browse the repository at this point in the history
  • Loading branch information
mryzhov authored Aug 27, 2021
1 parent 148bdf6 commit d0f49fe
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct InputDesc {
std::unordered_map<std::string, intel_dnn_orientation_t> orientation_in;
/// order of scale factors matches inputs order in original topology
std::vector<float> inputScaleFactors;
std::vector<uint8_t> inputPrecisions;
std::map<std::string, int> bytes_allocated_for_input;
size_t minBytesRequiredForStoreInput(InferenceEngine::CNNLayerPtr);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

namespace GNAPluginNS {
struct OutputDesc {
uint8_t precision;
double scale_factor = 1.0;
uint32_t num_bytes_per_element = 0;
uint32_t num_elements = 0;
Expand Down
6 changes: 2 additions & 4 deletions inference-engine/src/gna_plugin/gna_model_serial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -831,14 +831,13 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(cons
}
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(outputDims.begin(), outputDims.end()));
InferenceEngine::Layout outputLayout = output.second->getLayout();
InferenceEngine::Precision::ePrecision outputPrecision = InferenceEngine::Precision::FP32;
HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
outputsDesc[outputIndex].ptrs[0],
outputsDesc[outputIndex].num_bytes_per_element,
elementsCount,
outputShape,
outputLayout,
outputPrecision,
outputsDesc[outputIndex].precision,
outputsDesc[outputIndex].orientation);
endPoints.push_back(endPoint);
outputIndex++;
Expand Down Expand Up @@ -866,7 +865,7 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
InferenceEngine::Layout inputLayout = input.second->getLayout();
InferenceEngine::Precision::ePrecision inputPrecision = InferenceEngine::Precision::FP32;
uint8_t inputPrecision = inputDesc->inputPrecisions.at(inputIndex);
HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
descriptor_ptr[0],
element_size,
Expand All @@ -886,7 +885,6 @@ void GNAModelSerial::ImportInputs(std::istream &is,
std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
InferenceEngine::InputsDataMap& dataMap) {
dataMap.clear();

for (uint32_t inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
Expand Down
34 changes: 31 additions & 3 deletions inference-engine/src/gna_plugin/gna_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,33 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
}
}

void GNAPlugin::UpdateInputsAndOutputsInfoFromNetwork(InferenceEngine::CNNNetwork & network) {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateInputsAndOutputsInfoFromNetwork");

// update inputs
{
InputsDataMap inputs = network.getInputsInfo();
if (inputsDesc->inputPrecisions.size() != 0) {
inputsDesc->inputPrecisions.clear();
}
for (const auto input : inputs) {
inputsDesc->inputPrecisions.push_back(input.second->getPrecision().getPrecVal());
}
}

// update outputs
{
OutputsDataMap outputs = network.getOutputsInfo();
outputsDesc.resize(outputs.size());

size_t outputIdx = 0;
for (const auto output : outputs) {
outputsDesc[outputIdx].precision = output.second->getPrecision().getPrecVal();
++outputIdx;
}
}
}

bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer) {
auto initOutput = [this, portId, layer]
(intel_dnn_orientation_t orientation, size_t numBytesPerElem, size_t numElem, void* outputPtr) {
Expand Down Expand Up @@ -759,6 +786,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
UpdateGnaQuantModeFromNetwork(network);
UpdateInputScaleFromNetwork(network);

// Set input and output information from orginal network
UpdateInputsAndOutputsInfoFromNetwork(network);

if (MustBeConvertedFromNCHWToNHWC(details::CNNNetSortTopologically(network))) {
FillInputsAndOutputsTranspositionInfo(network);
}
Expand Down Expand Up @@ -922,7 +952,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
inputsDesc->getPtrInputsGlobal(input.first).resize(gnaFlags->gna_lib_async_threads_num);
}

// CreatingLayer primitives
// Creating Layer primitives
for (auto & layer : sortedNoMem) {
graphCompiler.CreateLayerPrimitive(layer);
}
Expand All @@ -940,8 +970,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
}

/// setting-up output layers information
outputsDesc.resize(outputsDataMap.size());

int portId = 0;
for (auto && outPort : outputsDataMap) {
// gets output layer pointer in original topology not in cloned
Expand Down
1 change: 1 addition & 0 deletions inference-engine/src/gna_plugin/gna_plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
void UpdateFieldsFromConfig();
void UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork &);
void UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork &);
void UpdateInputsAndOutputsInfoFromNetwork(InferenceEngine::CNNNetwork &);
/**
* @brief Tries to init an output on the base of a layer data
* @param portId output port identificator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*ConvolutionLayerTest.CompareWithRefs.*D=\(3.1\).*)",
R"(.*ConstantResultSubgraphTest.*IS=\(2\.3\.4\.5\).*)",
R"(.*ConstantResultSubgraphTest.*inPrc=(U8|I8|I32|U64|I64|BOOL).*)",
// TODO: Issue 51528
R"(.*CachingSupport.*_(u8|i16)_.*)",

// TODO: Issue 57363 (Param -> Result subgraphs)
R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=1_.*)",
// TODO: Issue 57368 (accuracy)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,14 @@ void LayerTestsCommon::Compare(const InferenceEngine::Blob::Ptr &expected, const
Compare(reinterpret_cast<const std::int32_t *>(expectedBuffer),
reinterpret_cast<const std::int32_t *>(actualBuffer), size, 0);
break;
case InferenceEngine::Precision::I16:
Compare(reinterpret_cast<const std::int16_t *>(expectedBuffer),
reinterpret_cast<const std::int16_t *>(actualBuffer), size, 0);
break;
case InferenceEngine::Precision::U8:
Compare(reinterpret_cast<const std::uint8_t *>(expectedBuffer),
reinterpret_cast<const std::uint8_t *>(actualBuffer), size, 0);
break;
default:
FAIL() << "Comparator for " << precision << " precision isn't supported";
}
Expand Down

0 comments on commit d0f49fe

Please sign in to comment.