Skip to content

Commit

Permalink
[GNA] Avoid integers overflow during pwl calculation for FakeQuantize (
Browse files Browse the repository at this point in the history
…openvinotoolkit#5841)

* [GNA] Avoid integers overflow during pwl calculation for FakeQuantize

* The similar fix for Relu
  • Loading branch information
elilobanova authored and dood-apo committed Aug 24, 2023
1 parent 29f2c8e commit c0a4a88
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 8 deletions.
16 changes: 8 additions & 8 deletions inference-engine/src/gna_plugin/backend/make_pwl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,10 +282,10 @@ void make_gna_pwl(const DnnActivation fun,
int16_t y_lower = y_min;
int16_t y_upper = y_max;
if (fun.fqParams.set) {
x_lower = FLOAT_TO_INT32(*fun.fqParams.input_low * 1.25 * in_scale);
x_upper = FLOAT_TO_INT32(*fun.fqParams.input_high * 1.25 * in_scale);
y_lower = FLOAT_TO_INT16(*fun.fqParams.input_low * 1.25 * out_scale);
y_upper = FLOAT_TO_INT16(*fun.fqParams.input_high * 1.25 * out_scale);
x_lower = std::max(FLOAT_TO_INT64(*fun.fqParams.input_low * 1.25 * in_scale), static_cast<int64_t>(x_lower));
x_upper = std::min(FLOAT_TO_INT64(*fun.fqParams.input_high * 1.25 * in_scale), static_cast<int64_t>(x_upper));
y_lower = std::max(FLOAT_TO_INT32(*fun.fqParams.input_low * 1.25 * out_scale), static_cast<int32_t>(y_lower));
y_upper = std::min(FLOAT_TO_INT32(*fun.fqParams.input_high * 1.25 * out_scale), static_cast<int32_t>(y_upper));
} else {
if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale);
if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale);
Expand Down Expand Up @@ -365,10 +365,10 @@ void make_gna_pwl(const DnnActivation fun,
int16_t y_lower = y_min;
int16_t y_upper = y_max;
if (fun == kActFakeQuantize && fun.fqParams.set) {
x_lower = *fun.fqParams.input_low * in_scale;
x_upper = *fun.fqParams.input_high * in_scale;
y_lower = *fun.fqParams.input_low * out_scale;
y_upper = *fun.fqParams.input_high * out_scale;
x_lower = std::max(static_cast<int64_t>(*fun.fqParams.input_low * in_scale), static_cast<int64_t>(x_lower));
x_upper = std::min(static_cast<int64_t>(*fun.fqParams.input_high * in_scale), static_cast<int64_t>(x_upper));
y_lower = std::max(static_cast<int32_t>(*fun.fqParams.input_low * out_scale), static_cast<int32_t>(y_lower));
y_upper = std::min(static_cast<int32_t>(*fun.fqParams.input_high * out_scale), static_cast<int32_t>(y_upper));
}
auto n_segments = 2;
if (fun == kActKaldiLstmClipping) {
Expand Down
1 change: 1 addition & 0 deletions inference-engine/src/gna_plugin/round_float_define.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
#define FLOAT_TO_INT8(a) static_cast<int8_t>(((a) < 0)?((a) - 0.5f):((a) + 0.5f))
#define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5f):((a) + 0.5f))
#define FLOAT_TO_INT32(a) static_cast<int32_t>(((a) < 0)?((a)-0.5f):((a)+0.5f))
#define FLOAT_TO_INT64(a) static_cast<int64_t>(((a) < 0)?((a)-0.5f):((a)+0.5f))

0 comments on commit c0a4a88

Please sign in to comment.