[GNA] Avoid integers overflow during pwl calculation for FakeQuantize (…

…openvinotoolkit#5841) * [GNA] Avoid integers overflow during pwl calculation for FakeQuantize * The similar fix for Relu
tadamowicz · Aug 24, 2023 · c0a4a88 · c0a4a88
1 parent 29f2c8e
commit c0a4a88
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 8 deletions.
diff --git a/inference-engine/src/gna_plugin/backend/make_pwl.cpp b/inference-engine/src/gna_plugin/backend/make_pwl.cpp
@@ -282,10 +282,10 @@ void make_gna_pwl(const DnnActivation  fun,
             int16_t y_lower = y_min;
             int16_t y_upper = y_max;
             if (fun.fqParams.set) {
-                x_lower = FLOAT_TO_INT32(*fun.fqParams.input_low * 1.25 * in_scale);
-                x_upper = FLOAT_TO_INT32(*fun.fqParams.input_high * 1.25 * in_scale);
-                y_lower = FLOAT_TO_INT16(*fun.fqParams.input_low * 1.25 * out_scale);
-                y_upper = FLOAT_TO_INT16(*fun.fqParams.input_high * 1.25 * out_scale);
+                x_lower = std::max(FLOAT_TO_INT64(*fun.fqParams.input_low * 1.25 * in_scale), static_cast<int64_t>(x_lower));
+                x_upper = std::min(FLOAT_TO_INT64(*fun.fqParams.input_high * 1.25 * in_scale), static_cast<int64_t>(x_upper));
+                y_lower = std::max(FLOAT_TO_INT32(*fun.fqParams.input_low * 1.25 * out_scale), static_cast<int32_t>(y_lower));
+                y_upper = std::min(FLOAT_TO_INT32(*fun.fqParams.input_high * 1.25 * out_scale), static_cast<int32_t>(y_upper));
             } else {
                 if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale);
                 if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale);
@@ -365,10 +365,10 @@ void make_gna_pwl(const DnnActivation  fun,
             int16_t y_lower = y_min;
             int16_t y_upper = y_max;
             if (fun == kActFakeQuantize && fun.fqParams.set) {
-                x_lower = *fun.fqParams.input_low * in_scale;
-                x_upper = *fun.fqParams.input_high * in_scale;
-                y_lower = *fun.fqParams.input_low * out_scale;
-                y_upper = *fun.fqParams.input_high * out_scale;
+                x_lower = std::max(static_cast<int64_t>(*fun.fqParams.input_low * in_scale), static_cast<int64_t>(x_lower));
+                x_upper = std::min(static_cast<int64_t>(*fun.fqParams.input_high * in_scale), static_cast<int64_t>(x_upper));
+                y_lower = std::max(static_cast<int32_t>(*fun.fqParams.input_low * out_scale), static_cast<int32_t>(y_lower));
+                y_upper = std::min(static_cast<int32_t>(*fun.fqParams.input_high * out_scale), static_cast<int32_t>(y_upper));
             }
             auto n_segments = 2;
             if (fun == kActKaldiLstmClipping) {

diff --git a/inference-engine/src/gna_plugin/round_float_define.hpp b/inference-engine/src/gna_plugin/round_float_define.hpp
@@ -10,3 +10,4 @@
 #define FLOAT_TO_INT8(a) static_cast<int8_t>(((a) < 0)?((a) - 0.5f):((a) + 0.5f))
 #define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5f):((a) + 0.5f))
 #define FLOAT_TO_INT32(a) static_cast<int32_t>(((a) < 0)?((a)-0.5f):((a)+0.5f))
+#define FLOAT_TO_INT64(a) static_cast<int64_t>(((a) < 0)?((a)-0.5f):((a)+0.5f))