Skip to content

Commit

Permalink
Merge branch 'private/kmagiers/GNAPlugin_Incorrect_result_Conv_Mul_Ad…
Browse files Browse the repository at this point in the history
…d' into 'master'

Const layer support for eltwise fix

See merge request inference-engine/dldt!7403
  • Loading branch information
dorloff committed May 21, 2020
2 parents c35cb56 + a44ccfa commit 3990b4e
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 20 deletions.
56 changes: 56 additions & 0 deletions inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "blob_factory.hpp"
#include "precision_ex.hpp"
#include "layers/gna_layer_info.hpp"
#include "weights_converter.hpp"
#include "layer_transform.hpp"

namespace GNAPluginNS {
namespace frontend {
Expand Down Expand Up @@ -137,6 +139,48 @@ class Quant<QuantI8> {
}
};

template <typename T>
inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
auto prec_blob = InferenceEngine::make_shared_blob<T>({ precision,
fp32_blob->getTensorDesc().getDims(), fp32_blob->getTensorDesc().getLayout() });
prec_blob->allocate();

int i = 0;
for (auto& precValue : *prec_blob) {
auto f32Value = fp32_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()[i++] * scale_factor;
if (f32Value > std::numeric_limits<T>::max()) {
precValue = std::numeric_limits<T>::max();
} else if (f32Value < std::numeric_limits<T>::min()) {
precValue = std::numeric_limits<T>::min();
} else {
precValue = static_cast<T>(f32Value);
}
}

return static_cast<InferenceEngine::Blob::Ptr>(prec_blob);
}

inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
InferenceEngine::Blob::Ptr result_ptr = nullptr;
switch (precision) {
case InferenceEngine::Precision::FP32:
result_ptr = fp32_to_precision_blob<float>(fp32_blob, precision, scale_factor);
break;
case InferenceEngine::Precision::I32:
result_ptr = fp32_to_precision_blob<int32_t>(fp32_blob, precision, scale_factor);
break;
case InferenceEngine::Precision::I16:
result_ptr = fp32_to_precision_blob<int16_t>(fp32_blob, precision, scale_factor);
break;
case InferenceEngine::Precision::I8:
result_ptr = fp32_to_precision_blob<int8_t>(fp32_blob, precision, scale_factor);
break;
default:
THROW_GNA_EXCEPTION << "FP32 to " << precision << " not supported";
}
return result_ptr;
}

template<class QuantDesc, class QuantFunc>
inline void quantizeWeightsBiases(const QuantDesc & quantDesc,
InferenceEngine::WeightableLayer *wl,
Expand Down Expand Up @@ -389,6 +433,18 @@ class DataQuantizer<Desc, InferenceEngine::CNNLayer *> : public DataQuantizerBas
}
cnnLayer->precision = Desc::mandatory().getInputPrecision();

if (cnnLayer->type == "Const") {
if (cnnLayer->blobs["custom"]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
cnnLayer->blobs["custom"] = make_fp32_blob(cnnLayer->blobs["custom"]);
}
auto const_scale_factor = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer)->_dst_quant.scale;
auto new_const_blob = InferenceEngine::Blob::CreateFromData(cnnLayer->outData[0]);
auto const_blob = cnnLayer->blobs["custom"];
if (const_blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
cnnLayer->blobs["custom"] = fp32_to_precision_blob(const_blob, cnnLayer->outData[0]->getPrecision(), const_scale_factor);
}
}

return true;
}
};
Expand Down
33 changes: 33 additions & 0 deletions inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,36 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
return true;
}

if (cnnLayer->type == "Const") {
auto blob = cnnLayer->blobs["custom"];
if (blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
blob = make_fp32_blob(blob);
}
auto max_val = std::numeric_limits<float>::min();
auto min_val = std::numeric_limits<float>::max();

auto flt_buf = blob->buffer().as<float*>();
auto size = blob->size();

for (int i=0; i < size; i++) {
auto val = flt_buf[i];
if (val > max_val) max_val = val;
if (val < min_val) min_val = val;
}

auto abs_val = std::max(std::abs(max_val), std::abs(min_val));
auto scale_val = static_cast<float>(std::numeric_limits<int16_t>::max()) / abs_val;

// TODO: Investigate what should be the scale in such cases (31910)
if (std::isinf(scale_val)) {
quant->_dst_quant.scale = quant->_src_quant.scale;
} else {
quant->_dst_quant.scale = scale_val;
}

return ScaleFactorUpdateResult();
}

if (!CNNNetHasPrevLayer(cnnLayer)) {
quant->_dst_quant.scale = quant->_src_quant.scale;
return ScaleFactorUpdateResult();
Expand Down Expand Up @@ -231,6 +261,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {

auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
auto quantParams1 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in1);

auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*eltwiseLayer);

switch (eltwiseLayer->_operation) {
Expand All @@ -239,6 +270,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
quantData->_dst_quant.scale = quantParams0->_dst_quant.scale * quantParams1->_dst_quant.scale;
break;
}
case InferenceEngine::EltwiseLayer::Sub:
case InferenceEngine::EltwiseLayer::Sum: {
// detect which input will be used as biases
if (LayerInfo(in0).has32BOutput()) {
Expand All @@ -247,6 +279,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
}

// this path might result in significant data loss
quantData->_bias_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
quantData->_weights_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
quantData->_dst_quant.scale = quantParams1->_dst_quant.scale;

Expand Down
38 changes: 28 additions & 10 deletions inference-engine/src/gna_plugin/frontend/weights_converter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,28 @@
#include "quantized_layer_params.hpp"
#include "precision_utils.h"

inline InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob) {
auto fp32_blob = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
fp16_blob->getTensorDesc().getDims(), fp16_blob->getTensorDesc().getLayout() });
fp32_blob->allocate();

int i = 0;
for (auto& f32Value : *fp32_blob) {
auto f16Value = fp16_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
}

return static_cast<InferenceEngine::Blob::Ptr>(fp32_blob);
}

inline void fp16_to_fp32(InferenceEngine::WeightableLayer *lp) {
InferenceEngine::BlobMap newBlobs;
for (auto& blob : lp->blobs) {
if (blob.second->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) {
THROW_GNA_EXCEPTION << "Unsupported precision. Layer: " << lp->name << " , Blob: " << blob.first;
}
auto tmp =
InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
blob.second->getTensorDesc().getDims(), InferenceEngine::Layout::C });
tmp->allocate();
int i = 0;
for (auto& f32Value : *tmp) {
auto f16Value = blob.second->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
}
newBlobs[blob.first] = tmp;
auto fp32_blob = make_fp32_blob(blob.second);
newBlobs[blob.first] = fp32_blob;
}
lp->_biases = newBlobs["biases"];
lp->_weights = newBlobs["weights"];
Expand All @@ -44,6 +50,18 @@ inline bool convertWeights(InferenceEngine::CNNLayer* lp) {
for (auto& dataItem : lp->outData) {
dataItem->setPrecision(InferenceEngine::Precision::FP32);
}
InferenceEngine::BlobMap newBlobs;
for (auto& blob_pair : lp->blobs) {
auto blob_name = blob_pair.first;
auto blob_ptr = blob_pair.second;
if (blob_ptr->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
auto new_blob = make_fp32_blob(blob_ptr);
newBlobs[blob_name] = new_blob;
} else {
newBlobs[blob_name] = blob_ptr;
}
}

return true;
}

Expand Down
39 changes: 30 additions & 9 deletions inference-engine/src/gna_plugin/gna_graph_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,17 +185,16 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
if (constLayer->blobs.find("custom") == constLayer->blobs.end()) {
THROW_GNA_EXCEPTION << "const layer: " << constLayer->name << "doesn't have custom in blobs section";
}
auto constBlob = constLayer->blobs["custom"];
auto const_blob = constLayer->blobs["custom"];

void* ptr_for_const_blob = &ptr_for_const_blob;
connectOutput(constLayer, ptr_for_const_blob, constBlob->size());

const_connections[constLayer->name] = ptr_for_const_blob;
const_connections[constLayer->name] = &const_connections[constLayer->name];
void* ptr_for_const_blob = &const_connections[constLayer->name];

connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
// TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
// dont see practical use case when bind storage type need to be different that allocation type
gnamem->readonly().bind_initializer(ptr_for_const_blob, [constBlob](void* data, size_t size) {
ie_memcpy(data, size, constBlob->buffer(), constBlob->byteSize());
gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
});
}

Expand Down Expand Up @@ -731,17 +730,27 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
int biasesLayerIdx = 1;

if (quantized) {
if (eltwise._operation == EltwiseLayer::Sum) {
switch (eltwise._operation) {
case InferenceEngine::EltwiseLayer::Sum:
case InferenceEngine::EltwiseLayer::Sub:
{
if (inputs4Bytes->getPrecision().size() != 4) {
std::swap(inputs4Bytes, inputs2Bytes);
biasesLayerIdx = 0;
}
GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 4);
} else {
break;
}
case InferenceEngine::EltwiseLayer::Prod:
{
// for mul both inputs should be 2 bytes precision
GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 2);
break;
}
default:
THROW_GNA_EXCEPTION << "Unsupported eltwise operation for quantization: " << eltwise._operation;
}
}

Expand Down Expand Up @@ -785,6 +794,18 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);

switch (eltwise._operation) {
case EltwiseLayer::Sub:
if (quantized == nullptr) {
gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
} else {
auto scaledIdentity = -quantized->_weights_quant.scale;

auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));

gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
}
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
break;
case EltwiseLayer::Sum:
if (quantized == nullptr) {
gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
Expand Down
9 changes: 9 additions & 0 deletions inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,15 @@ class LayerInfo {
return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
InferenceEngine::EltwiseLayer::Sum;
}
bool isEltwiseSub() const noexcept {
IS_VALID();
if (!isEltwise()) return false;
// dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer) is validated in isEltwise function
// coverity[var_deref_op]
return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
InferenceEngine::EltwiseLayer::Sub;
}

bool isEltwiseMul() const noexcept {
IS_VALID();
if (!isEltwise()) return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
auto prev1 = PrevFunctionalLayer(l, 1);

switch (eltwise->_operation) {
case EltwiseLayer::Sub:
case EltwiseLayer::Sum:
if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
return prevLayers;
Expand Down Expand Up @@ -227,7 +228,7 @@ void InsertDiagonalLayerPass::run() {
// for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights
// for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights

if (eltwise->_operation != EltwiseLayer::Sum)
if (eltwise->_operation != EltwiseLayer::Sum && eltwise->_operation != EltwiseLayer::Sub)
continue;

auto prevLayer1 = CNNNetPrevLayerSkipCertain(l, 1, [](CNNLayerPtr ptr) {
Expand Down

0 comments on commit 3990b4e

Please sign in to comment.