diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp
index 16f83ac34da4e2..746ac5c33f53b5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp
@@ -30,6 +30,8 @@ ParamsKey DeconvolutionKernel_b_fs_zyx_fsv16::GetSupportedKey() const {
     k.EnableInputWeightsType(WeightsType::F32);
     k.EnableInputDataType(Datatype::F16);
     k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
     k.EnableInputWeightsType(WeightsType::F16);
     k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
     k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
@@ -44,6 +46,7 @@ ParamsKey DeconvolutionKernel_b_fs_zyx_fsv16::GetSupportedKey() const {
     k.EnableBatching();
     k.EnableSubGroup();
     k.EnableSubGroupShort();
+    k.EnableDifferentTypes();
     return k;
 }
 
@@ -155,10 +158,11 @@ JitConstants DeconvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const deconvolu
     }
     jit.AddConstant(MakeJitConstant("OC_BLOCK", 16));
 
-    if (output.GetDType() == Datatype::F32)
+    if (input.GetDType() == Datatype::F32) {
         jit.AddConstant(MakeJitConstant("DT_F32", 1));
-    else
+    } else {
         jit.AddConstant(MakeJitConstant("DT_F16", 1));
+    }
 
     auto mb_block = 1;
     auto ic_block = 16;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp
index ac89b0b5167460..d44e11f311848c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp
@@ -26,6 +26,8 @@ ParamsKey DeconvolutionKernel_bfyx_opt::GetSupportedKey() const {
     k.EnableInputWeightsType(WeightsType::F32);
     k.EnableOutputDataType(Datatype::F16);
     k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
     k.EnableInputLayout(DataLayout::bfyx);
     k.EnableOutputLayout(DataLayout::bfyx);
     k.EnableTensorOffset();
@@ -36,6 +38,7 @@ ParamsKey DeconvolutionKernel_bfyx_opt::GetSupportedKey() const {
     k.EnableSplitSupport();
     k.EnableDepthwiseSeparableOpt();
     k.EnableGroupedConvolution();
+    k.EnableDifferentTypes();
     return k;
 }
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_bwd_data.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_bwd_data.cl
index 12935b052f8d52..f1535269e31470 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_bwd_data.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_bwd_data.cl
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019 Intel Corporation
+* Copyright 2019-2020 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -14,10 +14,21 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include "ocl_types.h"
 #include "include/fetch.cl"
 #include "include/data_types.cl"
 
+#define INPUT_TYPE8  MAKE_VECTOR_TYPE(INPUT0_TYPE, 8)
+#define OUTPUT_TYPE8 MAKE_VECTOR_TYPE(OUTPUT_TYPE, 8)
+#define FILTER_TYPE8 MAKE_VECTOR_TYPE(FILTER_TYPE, 8)
+
+#if DT_F16 == 1
+#define FMA_ARG_TYPE  half
+#define FMA_ARG_TYPE8 half8
+#else
+#define FMA_ARG_TYPE  INPUT0_TYPE
+#define FMA_ARG_TYPE8 INPUT_TYPE8
+#endif
+
 #if ID > 1
 #define CASE_3D 1
 #else
@@ -31,11 +42,11 @@ __attribute__((reqd_work_group_size(LWS_0, LWS_1, LWS_2))) // attr:no-format
 __attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE))) // attr:no-format
 #endif
 KERNEL(gen9_common_conv_bwd_data_kernel)(
-        const  __global DATA_T *diff_dst,
-        __global DATA_T * restrict diff_src,
-        const __global DATA_T *wei,
+        const  __global INPUT0_TYPE *diff_dst,
+        __global OUTPUT_TYPE * restrict diff_src,
+        const __global FILTER_TYPE *wei,
 #if WITH_BIAS
-        const __global DATA_T *bias,
+        const __global BIAS_TYPE *bias,
 #endif
 #if HAS_FUSED_OPS_DECLS
         FUSED_OPS_DECLS,
@@ -76,11 +87,11 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
     diff_dst += input_offset + mb * OC_FULL * G * OD_FULL * OH_FULL * OW_FULL + g * OC * OD_FULL * OH_FULL * OW_FULL * MB_BLOCK;
 
 #if WITH_BIAS
-    DATA8_T blockC00 = (DATA8_T)bias[g * IC + gic * IC_BLOCK + local_id];
-    DATA8_T blockC01 = (DATA8_T)bias[g * IC + gic * IC_BLOCK + local_id];
+    INPUT_TYPE8 blockC00 = (INPUT_TYPE8)bias[g * IC + gic * IC_BLOCK + local_id];
+    INPUT_TYPE8 blockC01 = (INPUT_TYPE8)bias[g * IC + gic * IC_BLOCK + local_id];
 #else
-    DATA8_T blockC00 = 0.0f;
-    DATA8_T blockC01 = 0.0f;
+    INPUT_TYPE8 blockC00 = INPUT0_VAL_ZERO;
+    INPUT_TYPE8 blockC01 = INPUT0_VAL_ZERO;
 #endif
 
     wei += gic * KD * KH * KW * OC_BLOCK * IC_BLOCK
@@ -111,13 +122,13 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
 #endif
                     if (oh >= OH || ow >= OW) continue;
 
-                    const __global DATA_T *diff_dst1 = diff_dst
+                    const __global INPUT0_TYPE *diff_dst1 = diff_dst
                             + ow * OC_BLOCK * MB_BLOCK
                             + oh * OW_FULL * OC_BLOCK * MB_BLOCK;
 #if CASE_3D
                     diff_dst1 += od * OH_FULL * OW_FULL * OC_BLOCK * MB_BLOCK;
 #endif
-                    const __global DATA_T *wei1 = wei
+                    const __global FILTER_TYPE *wei1 = wei
 #if CASE_3D
                             + kd * KH * KW * OC_BLOCK * IC_BLOCK
 #endif
@@ -148,44 +159,30 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
 #if SW != 1 || SH != 1 || SD != 1 || PH != 0 || PW != 0 || PD != 0
         if (do_ker) {
 #endif
-            const __global DATA_T *diff_dst1 = diff_dst
+            const __global INPUT0_TYPE *diff_dst1 = diff_dst
                     + ow * OC_BLOCK * MB_BLOCK + oh * OW_FULL * OC_BLOCK * MB_BLOCK;
 #if CASE_3D
             diff_dst1 += od * OH_FULL * OW_FULL * OC_BLOCK * MB_BLOCK;
 #endif
-            const __global DATA_T *wei1 = wei;
+            const __global FILTER_TYPE *wei1 = wei;
 #endif
 
-#define LOAD_DIFF_DST(_block, _diff_dst, mb_chunk) \
-    { \
-        (_block) = AS_DATA8_T( \
-                BLOCK_READ8((const __global BLOCK_DATA_T *)((_diff_dst) \
-                        + (mb_chunk)*OC_BLOCK))); \
-    }
-
-#define SAVE_SRC_DIFF(_block, _diff_src, mb_chunk) \
-    { \
-        BLOCK_WRITE8((const __global BLOCK_DATA_T *)(&( \
-                             _diff_src)[(mb_chunk)*IC_BLOCK]), \
-                AS_BLOCK_DATA8_T((_block))); \
-    }
-
 #if DT_F32
 #define TRANSPOSE_8(_block, _col) \
-    (DATA8_T)(intel_sub_group_shuffle(_block, _col))
+    (intel_sub_group_shuffle(_block, _col))
 #else
 #define TRANSPOSE_8(_block, _col) \
-    (DATA8_T)(intel_sub_group_shuffle(_block[0], _col), \
-            intel_sub_group_shuffle(_block[1], _col), \
-            intel_sub_group_shuffle(_block[2], _col), \
-            intel_sub_group_shuffle(_block[3], _col), \
-            intel_sub_group_shuffle(_block[4], _col), \
-            intel_sub_group_shuffle(_block[5], _col), \
-            intel_sub_group_shuffle(_block[6], _col), \
-            intel_sub_group_shuffle(_block[7], _col))
+    (intel_sub_group_shuffle(_block[0], _col), \
+    intel_sub_group_shuffle(_block[1], _col), \
+    intel_sub_group_shuffle(_block[2], _col), \
+    intel_sub_group_shuffle(_block[3], _col), \
+    intel_sub_group_shuffle(_block[4], _col), \
+    intel_sub_group_shuffle(_block[5], _col), \
+    intel_sub_group_shuffle(_block[6], _col), \
+    intel_sub_group_shuffle(_block[7], _col))
 #endif
 
-#define FMA8(a, b, c) fma((DATA8_T)(a), (DATA8_T)b, (DATA8_T)c)
+#define FMA8(a, b, c) fma((FMA_ARG_TYPE8)(a), (FMA_ARG_TYPE8)b, (FMA_ARG_TYPE8)c)
 
 #define MULTIPLY_BLOCKS_8x8(_result, _blockA, _blockB, _blockB1) \
     { \
@@ -207,14 +204,10 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
         _result = FMA8(_blockB1.s7, TRANSPOSE_8(_blockA, 15), _result); \
     }
 
-                    DATA8_T blockA0, blockA1;
-                    LOAD_DIFF_DST(blockA0, diff_dst1, 0);
-                    LOAD_DIFF_DST(blockA1, diff_dst1, 8);
-                    DATA8_T blockB00 = AS_DATA8_T(
-                            BLOCK_READ8((const __global BLOCK_DATA_T *)wei1));
-                    DATA8_T blockB01 = AS_DATA8_T(
-                            BLOCK_READ8((const __global BLOCK_DATA_T *)(wei1
-                                    + 8 * IC_BLOCK)));
+                    INPUT_TYPE8 blockA0 = DT_INPUT_BLOCK_READ(diff_dst1, 0);
+                    INPUT_TYPE8 blockA1 = DT_INPUT_BLOCK_READ(diff_dst1, 8 * OC_BLOCK);
+                    FILTER_TYPE8 blockB00 = DT_FILTER_BLOCK_READ8(wei1, 0);
+                    FILTER_TYPE8 blockB01 = DT_FILTER_BLOCK_READ8(wei1, 8 * IC_BLOCK);
                     MULTIPLY_BLOCKS_8x8(blockC00, blockA0, blockB00, blockB01);
                     MULTIPLY_BLOCKS_8x8(blockC01, blockA1, blockB00, blockB01);
 
@@ -232,7 +225,7 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
         ocb += OC_BLOCK;
     } while (ocb < OC);
 
-    __global DATA_T *src_write0 = diff_src + OUTPUT_OFFSET + mb * IC_FULL * G * ID_FULL * IH_FULL * IW_FULL
+    __global OUTPUT_TYPE *src_write0 = diff_src + OUTPUT_OFFSET + mb * IC_FULL * G * ID_FULL * IH_FULL * IW_FULL
             + gic * ID_FULL * IH_FULL * IW_FULL * IC_BLOCK * MB_BLOCK
             + g * IC * ID_FULL * IH_FULL * IW_FULL * MB_BLOCK
             + id * IH_FULL * IW_FULL * IC_BLOCK * MB_BLOCK + ih * IW_FULL * IC_BLOCK * MB_BLOCK
@@ -240,20 +233,24 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
 
     blockC00 = ACTIVATION(blockC00, ACTIVATION_PARAMS);
     blockC01 = ACTIVATION(blockC01, ACTIVATION_PARAMS);
+    OUTPUT_TYPE8 res0, res1;
 
 #if HAS_FUSED_OPS
     {
         FUSED_OPS_BLOCK_C00;
-        blockC00 = FUSED_OPS_RESULT_BLOCK_C00;
+        res0 = FUSED_OPS_RESULT_BLOCK_C00;
     }
     {
         FUSED_OPS_BLOCK_C01;
-        blockC01 = FUSED_OPS_RESULT_BLOCK_C01;
+        res1 = FUSED_OPS_RESULT_BLOCK_C01;
     }
+#else
+    res0 = blockC00;
+    res1 = blockC01;
 #endif
 
-    SAVE_SRC_DIFF(blockC00, src_write0, 0);
-    SAVE_SRC_DIFF(blockC01, src_write0, 8);
+    DT_OUTPUT_BLOCK_WRITE8(src_write0, 0, res0);
+    DT_OUTPUT_BLOCK_WRITE8(src_write0, 8 * IC_BLOCK, res1);
 
 #endif
 #if VER_8OW16C == 1
@@ -278,7 +275,7 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
     const int iw = (ihw % IWB) * IW_BLOCK;
 
     diff_dst += input_offset + mb * OC_FULL * G * OD_FULL * OH_FULL * OW_FULL + g * OC * OD_FULL * OH_FULL * OW_FULL * MB_BLOCK;
-    DATA_T blockC00[IW_BLOCK] = {0.0f};
+    INPUT0_TYPE blockC00[IW_BLOCK] = {INPUT0_VAL_ZERO};
 
 #if WITH_BIAS
     for (int i = 0; i < IW_BLOCK; i++)
@@ -307,12 +304,12 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
                 oh /= SH;
                 if (oh >= OH) continue;
 
-                const __global DATA_T *diff_dst1
+                const __global INPUT0_TYPE *diff_dst1
                         = diff_dst + oh * OW_FULL * OC_BLOCK * MB_BLOCK;
 #if CASE_3D
                 diff_dst1 += od * OH_FULL * OW_FULL * OC_BLOCK * MB_BLOCK;
 #endif
-                const __global DATA_T *wei1 = wei
+                const __global FILTER_TYPE *wei1 = wei
 #if CASE_3D
                         + kd * KH * KW * OC_BLOCK * IC_BLOCK
 #endif
@@ -341,21 +338,21 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
 #if SW != 1 || SH != 1 || SD != 1 || PH != 0 || PW != 0 || PD != 0
     if (do_ker) {
 #endif
-        const __global DATA_T *diff_dst1
+        const __global INPUT0_TYPE *diff_dst1
                 = diff_dst + oh * OW_FULL * OC_BLOCK * MB_BLOCK;
 #if CASE_3D
         diff_dst1 += od * OH_FULL * OW_FULL * OC_BLOCK * MB_BLOCK;
 #endif
-        const __global DATA_T *wei1 = wei;
+        const __global FILTER_TYPE *wei1 = wei;
 #endif
 
                 int ocb = 0;
                 do {
 
 #define TRANSPOSE_1(_block, _col) \
-    (DATA_T)(intel_sub_group_shuffle(_block, _col))
+    (intel_sub_group_shuffle(_block, _col))
 
-#define FMA1(a, b, c) fma((DATA_T)(a), (DATA_T)b, (DATA_T)c)
+#define FMA1(a, b, c) fma((FMA_ARG_TYPE)(a), (FMA_ARG_TYPE)b, (FMA_ARG_TYPE)c)
 
 #define MULTIPLY_BLOCKS_8x8(_result, _blockA, _blockB, _blockB1) \
     { \
@@ -377,12 +374,9 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
         _result = FMA1(_blockB1.s7, TRANSPOSE_1(_blockA, 15), _result); \
     }
 
-                    DATA8_T blockB00 = AS_DATA8_T(
-                            BLOCK_READ8((const __global BLOCK_DATA_T *)wei1));
-                    DATA8_T blockB01 = AS_DATA8_T(
-                            BLOCK_READ8((const __global BLOCK_DATA_T *)(wei1
-                                    + 8 * IC_BLOCK)));
-                    DATA_T blockA[IW_BLOCK];
+                    FILTER_TYPE8 blockB00 = DT_FILTER_BLOCK_READ8(wei1, 0);
+                    FILTER_TYPE8 blockB01 = DT_FILTER_BLOCK_READ8(wei1, 8 * IC_BLOCK);
+                    INPUT0_TYPE blockA[IW_BLOCK];
 
                     __attribute__((
                             opencl_unroll_hint(IW_BLOCK))) // attr:no-format
@@ -407,9 +401,7 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
                             blockA[i] = 0.0;
                             continue;
                         }
-                        blockA[i] = AS_DATA_T(
-                                BLOCK_READ((const __global BLOCK_DATA_T *)(&(
-                                        diff_dst1)[ow * OC_BLOCK])));
+                        blockA[i] = DT_INPUT_BLOCK_READ(diff_dst1, ow * OC_BLOCK);
                     }
 
                     __attribute__((
@@ -434,7 +426,7 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
 #endif
 #endif
 
-    __global DATA_T *src_write0 = diff_src + output_offset + mb * IC_FULL * G * ID_FULL * IH_FULL * IW_FULL
+    __global OUTPUT_TYPE *src_write0 = diff_src + output_offset + mb * IC_FULL * G * ID_FULL * IH_FULL * IW_FULL
             + gic * ID_FULL * IH_FULL * IW_FULL * IC_BLOCK * MB_BLOCK
             + g * IC * ID_FULL * IH_FULL * IW_FULL * MB_BLOCK
             + id * IH_FULL * IW_FULL * IC_BLOCK * MB_BLOCK + ih * IW_FULL * IC_BLOCK * MB_BLOCK
@@ -443,12 +435,14 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
     for (int i = 0; i < IW_BLOCK; i++) {
         blockC00[i] = ACTIVATION(blockC00[i], ACTIVATION_PARAMS);
         if (iw + i >= IW) continue;
+        OUTPUT_TYPE res;
 #if HAS_FUSED_OPS
         FUSED_OPS_BLOCK_CI;
-        blockC00[i] = FUSED_OPS_RESULT_BLOCK_CI;
+        res = FUSED_OPS_RESULT_BLOCK_CI;
+#else
+        res = blockC00[i];
 #endif
-        BLOCK_WRITE((__global BLOCK_DATA_T *)(&(src_write0)[i * IC_BLOCK]),
-                AS_BLOCK_DATA_T(blockC00[i]));
+        DT_OUTPUT_BLOCK_WRITE(src_write0, i * IC_BLOCK, res);
     }
 #endif
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/ocl_types.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/ocl_types.h
deleted file mode 100644
index 332e9551cee0f6..00000000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/ocl_types.h
+++ /dev/null
@@ -1,444 +0,0 @@
-/*******************************************************************************
-* Copyright 2019 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-// #include "ocl_math_utils.h"
-
-#define for_ for
-
-#define CONCAt2(a, b) a##b
-#define CONCAT2(a, b) CONCAt2(a, b)
-
-#if DT_F32 == 1
-#define DATA_T float
-#define DATA8_T float8
-#define DATA_MAX FLT_MAX
-#define DATA_MIN -DATA_MAX
-#define DATA_ZERO 0.0f
-#define DATA_ONE 1.0f
-#define DEF_ACC_DATA_T float
-#define DEF_ACC_DATA8_T float8
-#define POST_OP_DATA_T float
-#define TO_DATA_T(v) static_cast<float>(v)
-#define TO_DEF_ACC_DATA_T(v) static_cast<float>(v)
-#define DATA_TO_REF convert_float
-#define CONVERT_DATA_T convert_float
-#define CONVERT_DATA8_T convert_float8
-#define CONVERT_FLOAT_T convert_float
-#define CONVERT_FLOAT8_T convert_float8
-#define ROUND
-
-#define BLOCK_READ intel_sub_group_block_read
-#define BLOCK_WRITE intel_sub_group_block_write
-#define BLOCK_READ8 intel_sub_group_block_read8
-#define BLOCK_WRITE8 intel_sub_group_block_write8
-
-#define AS_DATA_T as_float
-#define AS_DATA8_T as_float8
-
-#define AS_UINT_T as_uint
-#define AS_UINT8_T as_uint8
-
-#define BLOCK_DATA_T uint
-#define BLOCK_DATA8_T uint8
-#define AS_BLOCK_DATA_T as_uint
-#define AS_BLOCK_DATA8_T as_uint8
-#elif DT_F16 == 1
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-#define DATA_T half
-#define DATA8_T half8
-#define DATA_MAX HALF_MAX
-#define DATA_MIN -DATA_MAX
-#define DATA_ZERO 0.0h
-#define DATA_ONE 1.0h
-#define DEF_ACC_DATA_T half
-#define DEF_ACC_DATA8_T half8
-#define POST_OP_DATA_T half
-#define TO_DATA_T(v) (half)(v)
-#define TO_DEF_ACC_DATA_T(v) (half)(v)
-#define DATA_TO_REF convert_half
-#define CONVERT_DATA_T convert_half
-#define CONVERT_DATA8_T convert_half8
-#define CONVERT_FLOAT_T convert_float
-#define CONVERT_FLOAT8_T convert_float8
-#define ROUND
-
-#define BLOCK_READ intel_sub_group_block_read_us
-#define BLOCK_WRITE intel_sub_group_block_write_us
-#define BLOCK_READ8 intel_sub_group_block_read_us8
-#define BLOCK_WRITE8 intel_sub_group_block_write_us8
-#define AS_DATA_T as_half
-#define AS_DATA8_T as_half8
-
-#define AS_UINT_T as_ushort
-#define AS_UINT8_T as_ushort8
-
-#define BLOCK_DATA_T ushort
-#define BLOCK_DATA8_T ushort8
-#define AS_BLOCK_DATA_T as_ushort
-#define AS_BLOCK_DATA8_T as_ushort8
-#elif DT_BF16 == 1
-#define DATA_T ushort
-#define POST_OP_DATA_T float
-#define DATA8_T ushort8
-#define DATA_MAX 3.38953138925153547590470800371487866880e+38F
-#define DATA_MIN (-DATA_MAX)
-#define DATA_ZERO 0.0f
-#define DATA_ONE 1.0f
-#define DEF_ACC_DATA_T float
-#define DEF_ACC_DATA8_T float8
-#define TO_DATA_T(v) convert_f32_to_bf16(v)
-#define TO_DEF_ACC_DATA_T(v) convert_bf16_to_f32(v)
-#define DATA_TO_REF convert_bf16_to_f32
-#define CONVERT_DATA_T convert_f32_to_bf16
-#define CONVERT_DATA8_T convert_f32_to_bf16_vec8
-#define CONVERT_FLOAT_T convert_bf16_to_f32
-#define CONVERT_FLOAT8_T convert_bf16_to_f32_vec8
-#define ROUND
-
-#define BLOCK_READ intel_sub_group_block_read_us
-#define BLOCK_WRITE intel_sub_group_block_write_us
-#define BLOCK_READ8 intel_sub_group_block_read_us8
-#define BLOCK_WRITE8 intel_sub_group_block_write_us8
-#define AS_DATA_T as_ushort
-#define AS_DATA8_T as_ushort8
-
-#define AS_UINT_T as_ushort
-#define AS_UINT8_T as_ushort8
-
-#define BLOCK_DATA_T ushort
-#define BLOCK_DATA8_T ushort8
-#define AS_BLOCK_DATA_T as_ushort
-#define AS_BLOCK_DATA8_T as_ushort8
-#elif DT_S8 == 1
-#define DATA_T char
-#define DATA8_T char8
-#define DATA_MAX CHAR_MAX
-#define DATA_MIN CHAR_MIN
-#define DATA_ZERO 0
-#define DATA_ONE 1
-#define DEF_ACC_DATA_T int
-#define DEF_ACC_DATA8_T int8
-#define POST_OP_DATA_T float
-#define TO_DATA_T(v) static_cast<char>(v)
-#define DATA_TO_REF convert_char
-#define CONVERT_DATA_T convert_char
-#define CONVERT_DATA8_T convert_char8
-#define ROUND rint
-
-#define BLOCK_READ intel_sub_group_block_read_uc
-#define BLOCK_WRITE intel_sub_group_block_write_uc
-#define BLOCK_READ8 intel_sub_group_block_read_uc8
-#define BLOCK_WRITE8 intel_sub_group_block_write_uc8
-#define AS_DATA_T as_char
-#define AS_DATA8_T as_char8
-
-#define AS_UINT_T as_uchar
-#define AS_UINT8_T as_uchar8
-
-#define BLOCK_DATA_T uchar
-#define BLOCK_DATA8_T uchar8
-#define AS_BLOCK_DATA_T as_uchar
-#define AS_BLOCK_DATA8_T as_uchar8
-#elif DT_U8 == 1
-#define DATA_T uchar
-#define DATA8_T uchar8
-#define DATA_MAX UCHAR_MAX
-#define DATA_MIN 0
-#define DATA_ZERO 0
-#define DATA_ONE 1
-#define DEF_ACC_DATA_T int
-#define DEF_ACC_DATA8_T int8
-#define POST_OP_DATA_T float
-#define TO_DATA_T(v) (uchar)(v)
-#define DATA_TO_REF convert_uchar
-#define CONVERT_DATA_T convert_uchar
-#define CONVERT_DATA8_T convert_uchar8
-#define ROUND rint
-
-#define BLOCK_READ intel_sub_group_block_read_uc
-#define BLOCK_WRITE intel_sub_group_block_write_uc
-#define BLOCK_READ8 intel_sub_group_block_read_uc8
-#define BLOCK_WRITE8 intel_sub_group_block_write_uc8
-#define AS_DATA_T as_uchar
-#define AS_DATA8_T as_uchar8
-
-#define AS_UINT_T as_uchar
-#define AS_UINT8_T as_uchar8
-
-#define BLOCK_DATA_T uchar
-#define BLOCK_DATA8_T uchar8
-#define AS_BLOCK_DATA_T as_uchar
-#define AS_BLOCK_DATA8_T as_uchar8
-#elif DT_S32 == 1
-#define DATA_T int
-#define CONVERT_DATA_T convert_int_sat_rte
-#define POST_OP_DATA_T float
-#elif !defined(DT_UNDEF)
-#error "Unexpected data type"
-#endif
-
-#if VECT_DT_N == 1
-#define VECT_DATA_T DATA_T
-#define VECT_DEF_ACC_DATA_T DEF_ACC_DATA_T
-#define AS_VECT_DATA_T AS_DATA_T
-#define VECT_BLOCK_READ BLOCK_READ
-#define VECT_BLOCK_WRITE BLOCK_WRITE
-#define VECT_UINT_READ intel_sub_group_block_read
-#define VECT_UINT_WRITE intel_sub_group_block_write
-#define VECT_BLOCK_DATA_T BLOCK_DATA_T
-#define AS_VECT_BLOCK_DATA_T AS_BLOCK_DATA_T
-#define CONVERT_VECT_FLOAT_T CONVERT_FLOAT_T
-#define CONVERT_VECTOR_DATA_T CONVERT_DATA_T
-#define VECT_INT_T int
-#define VECT_UINT_T uint
-#define VECT_FLOAT_T float
-#define AS_VECT_INT_T as_int
-#define AS_VECT_UINT_T as_uint
-#elif VECT_DT_N == 8
-#define VECT_DATA_T DATA8_T
-#define VECT_DEF_ACC_DATA_T DEF_ACC_DATA8_T
-#define AS_VECT_DATA_T AS_DATA8_T
-#define VECT_BLOCK_READ BLOCK_READ8
-#define VECT_BLOCK_WRITE BLOCK_WRITE8
-#define VECT_UINT_READ intel_sub_group_block_read8
-#define VECT_UINT_WRITE intel_sub_group_block_write8
-#define VECT_BLOCK_DATA_T BLOCK_DATA8_T
-#define AS_VECT_BLOCK_DATA_T AS_BLOCK_DATA8_T
-#define CONVERT_VECT_FLOAT_T CONVERT_FLOAT8_T
-#define CONVERT_VECTOR_DATA_T CONVERT_DATA8_T
-#define VECT_INT_T int8
-#define VECT_UINT_T uint8
-#define VECT_FLOAT_T float8
-#define AS_VECT_INT_T as_int8
-#define AS_VECT_UINT_T as_uint8
-#endif
-
-#ifdef SRC_DATA_T
-#define SRC_DATA8_T CONCAT2(SRC_DATA_T, 8)
-#if SRC_DT_BF16
-#define SRC_TO_REF(x) convert_bf16_to_f32(x)
-#define SRC_TO_REF8(x) convert_bf16_to_f32_vec8(x)
-#else
-#define SRC_TO_REF(x) (x)
-#define SRC_TO_REF8(x) (x)
-#endif
-#if SRC_DT_BF16
-#define TO_SRC(x) convert_f32_to_bf16(x)
-#elif SRC_DT_U8
-#define TO_SRC(x) convert_uchar_sat_rte(x)
-#elif SRC_DT_S8
-#define TO_SRC(x) convert_char_sat_rte(x)
-#elif SRC_DT_S32
-#define TO_SRC(x) convert_int_sat_rte(x)
-#else
-#define TO_SRC(x) (x)
-#endif
-#endif
-
-#ifdef WEI_DATA_T
-#if WEI_DT_BF16
-#define WEI_TO_REF(x) convert_bf16_to_f32(x)
-#define REF_TO_WEI(x) convert_f32_to_bf16(x)
-#else
-#define WEI_TO_REF(x) (x)
-#define REF_TO_WEI(x) (x)
-#endif
-#if WEI_DT_BF16
-#define TO_WEI(x) convert_f32_to_bf16(x)
-#elif WEI_DT_U8
-#define TO_WEI(x) convert_uchar_sat_rte(x)
-#elif WEI_DT_S8
-#define TO_WEI(x) convert_char_sat_rte(x)
-#elif WEI_DT_S32
-#define TO_WEI(x) convert_int_sat_rte(x)
-#else
-#define TO_WEI(x) (x)
-#endif
-#endif
-
-#ifdef BIA_DATA_T
-#if BIA_DT_BF16
-#define BIA_TO_REF(x) convert_bf16_to_f32(x)
-#define REF_TO_BIA(x) convert_f32_to_bf16(x)
-#else
-#define BIA_TO_REF(x) (x)
-#define REF_TO_BIA(x) (x)
-#endif
-#if BIA_DT_BF16
-#define TO_BIA(x) convert_f32_to_bf16(x)
-#elif BIA_DT_U8
-#define TO_BIA(x) convert_uchar_sat_rte(x)
-#elif BIA_DT_S8
-#define TO_BIA(x) convert_char_sat_rte(x)
-#elif BIA_DT_S32
-#define TO_BIA(x) convert_int_sat_rte(x)
-#else
-#define TO_BIA(x) (x)
-#endif
-#endif
-
-#ifdef DST_DATA_T
-#define DST_DATA8_T CONCAT2(DST_DATA_T, 8)
-#if DST_DT_BF16
-#define DST_TO_REF(x) convert_bf16_to_f32(x)
-#define DST_TO_REF8(x) convert_bf16_to_f32_vec8(x)
-#define REF_TO_DST(x) convert_f32_to_bf16(x)
-#define REF_TO_DST8(x) convert_f32_to_bf16_vec8(convert_float8(x))
-#else
-#define DST_TO_REF(x) (x)
-#define DST_TO_REF8(x) (x)
-#define REF_TO_DST(x) (x)
-#define REF_TO_DST8(x) (x)
-#endif
-#if DST_DT_BF16
-#define TO_DST(x) convert_f32_to_bf16(x)
-#define TO_DST8(x) convert_f32_to_bf16_vec8(convert_float8(x))
-#elif DST_DT_F16
-#define TO_DST(x) convert_half(x)
-#define TO_DST8(x) convert_half8(x)
-#elif DST_DT_U8
-#define TO_DST(x) convert_uchar_sat_rte(x)
-#define TO_DST8(x) convert_uchar8_sat_rte(x)
-#elif DST_DT_S8
-#define TO_DST(x) convert_char_sat_rte(x)
-#define TO_DST8(x) convert_char8_sat_rte(x)
-#elif DST_DT_S32
-#define TO_DST(x) convert_int_sat_rte(x)
-#define TO_DST8(x) convert_int8_sat_rte(x)
-#elif DST_DT_F32
-#define TO_DST(x) convert_float(x)
-#define TO_DST8(x) convert_float8(x)
-#else
-#error "Not expected"
-#endif
-#endif
-
-#ifdef ACC_DATA_T
-#if ACC_DT_F16
-#define TO_ACC(x) convert_half(x)
-#elif ACC_DT_F32
-#define TO_ACC(x) convert_float(x)
-#elif ACC_DT_S32
-#define TO_ACC(x) convert_int(x)
-#else
-#error "Unexpected accumulation data type"
-#endif
-#endif
-
-#define OFF_MD(prefix, x0, x1, x2, x3, x4, x5) \
-    ((x0 / prefix##_B0_2) / prefix##_B0_1 * prefix##_S0_0) \
-            + ((x0 / prefix##_B0_2) % prefix##_B0_1 * prefix##_S0_1) \
-            + ((x0 % prefix##_B0_2) * prefix##_S0_2) \
-            + ((x1 / prefix##_B1_2) / prefix##_B1_1 * prefix##_S1_0) \
-            + ((x1 / prefix##_B1_2) % prefix##_B1_1 * prefix##_S1_1) \
-            + ((x1 % prefix##_B1_2) * prefix##_S1_2) \
-            + ((x2 / prefix##_B2_2) / prefix##_B2_1 * prefix##_S2_0) \
-            + ((x2 / prefix##_B2_2) % prefix##_B2_1 * prefix##_S2_1) \
-            + ((x2 % prefix##_B2_2) * prefix##_S2_2) \
-            + ((x3 / prefix##_B3_2) / prefix##_B3_1 * prefix##_S3_0) \
-            + ((x3 / prefix##_B3_2) % prefix##_B3_1 * prefix##_S3_1) \
-            + ((x3 % prefix##_B3_2) * prefix##_S3_2) \
-            + ((x4 / prefix##_B4_2) / prefix##_B4_1 * prefix##_S4_0) \
-            + ((x4 / prefix##_B4_2) % prefix##_B4_1 * prefix##_S4_1) \
-            + ((x4 % prefix##_B4_2) * prefix##_S4_2) \
-            + ((x5 / prefix##_B5_2) / prefix##_B5_1 * prefix##_S5_0) \
-            + ((x5 / prefix##_B5_2) % prefix##_B5_1 * prefix##_S5_1) \
-            + ((x5 % prefix##_B5_2) * prefix##_S5_2)
-
-#if NDIMS == 3
-#define SRC_OFF(x0, x1, d, h, x2) \
-    (((x0) % SRC_B0) * SRC_SB0 + ((x0) / SRC_B0) * SRC_S0 \
-            + ((x1) % SRC_B1) * SRC_SB1 + ((x1) / SRC_B1) * SRC_S1 \
-            + ((x2) % SRC_B2) * SRC_SB2 + ((x2) / SRC_B2) * SRC_S2)
-
-#if WITH_GROUPS == 1
-#define WHT_OFF(x0, x1, x2, d, h, x3) \
-    (((x0) % WHT_B0) * WHT_SB0 + ((x0) / WHT_B0) * WHT_S0 \
-            + ((x1) % WHT_B1) * WHT_SB1 + ((x1) / WHT_B1) * WHT_S1 \
-            + ((x2) % WHT_B2) * WHT_SB2 + ((x2) / WHT_B2) * WHT_S2 \
-            + ((x3) % WHT_B3) * WHT_SB3 + ((x3) / WHT_B3) * WHT_S3)
-#else
-#define WHT_OFF(g, x0, x1, d, h, x2) \
-    (((x0) % WHT_B0) * WHT_SB0 + ((x0) / WHT_B0) * WHT_S0 \
-            + ((x1) % WHT_B1) * WHT_SB1 + ((x1) / WHT_B1) * WHT_S1 \
-            + ((x2) % WHT_B2) * WHT_SB2 + ((x2) / WHT_B2) * WHT_S2)
-#endif
-
-#define DST_OFF(x0, x1, d, h, x2) \
-    (((x0) % DST_B0) * DST_SB0 + ((x0) / DST_B0) * DST_S0 \
-            + ((x1) % DST_B1) * DST_SB1 + ((x1) / DST_B1) * DST_S1 \
-            + ((x2) % DST_B2) * DST_SB2 + ((x2) / DST_B2) * DST_S2)
-#elif NDIMS == 4
-#define SRC_OFF(x0, x1, d, x2, x3) \
-    (((x0) % SRC_B0) * SRC_SB0 + ((x0) / SRC_B0) * SRC_S0 \
-            + ((x1) % SRC_B1) * SRC_SB1 + ((x1) / SRC_B1) * SRC_S1 \
-            + ((x2) % SRC_B2) * SRC_SB2 + ((x2) / SRC_B2) * SRC_S2 \
-            + ((x3) % SRC_B3) * SRC_SB3 + ((x3) / SRC_B3) * SRC_S3)
-
-#if WITH_GROUPS == 1
-#define WHT_OFF(x0, x1, x2, d, x3, x4) \
-    (((x0) % WHT_B0) * WHT_SB0 + ((x0) / WHT_B0) * WHT_S0 \
-            + ((x1) % WHT_B1) * WHT_SB1 + ((x1) / WHT_B1) * WHT_S1 \
-            + ((x2) % WHT_B2) * WHT_SB2 + ((x2) / WHT_B2) * WHT_S2 \
-            + ((x3) % WHT_B3) * WHT_SB3 + ((x3) / WHT_B3) * WHT_S3 \
-            + ((x4) % WHT_B4) * WHT_SB4 + ((x4) / WHT_B4) * WHT_S4)
-#else
-#define WHT_OFF(g, x1, x2, d, x3, x4) \
-    (((x1) % WHT_B0) * WHT_SB0 + ((x1) / WHT_B0) * WHT_S0 \
-            + ((x2) % WHT_B1) * WHT_SB1 + ((x2) / WHT_B1) * WHT_S1 \
-            + ((x3) % WHT_B2) * WHT_SB2 + ((x3) / WHT_B2) * WHT_S2 \
-            + ((x4) % WHT_B3) * WHT_SB3 + ((x4) / WHT_B3) * WHT_S3)
-#endif
-
-#define DST_OFF(x0, x1, d, x2, x3) \
-    (((x0) % DST_B0) * DST_SB0 + ((x0) / DST_B0) * DST_S0 \
-            + ((x1) % DST_B1) * DST_SB1 + ((x1) / DST_B1) * DST_S1 \
-            + ((x2) % DST_B2) * DST_SB2 + ((x2) / DST_B2) * DST_S2 \
-            + ((x3) % DST_B3) * DST_SB3 + ((x3) / DST_B3) * DST_S3)
-#elif NDIMS == 5
-#define SRC_OFF(x0, x1, x2, x3, x4) \
-    (((x0) % SRC_B0) * SRC_SB0 + ((x0) / SRC_B0) * SRC_S0 \
-            + ((x1) % SRC_B1) * SRC_SB1 + ((x1) / SRC_B1) * SRC_S1 \
-            + ((x2) % SRC_B2) * SRC_SB2 + ((x2) / SRC_B2) * SRC_S2 \
-            + ((x3) % SRC_B3) * SRC_SB3 + ((x3) / SRC_B3) * SRC_S3 \
-            + ((x4) % SRC_B4) * SRC_SB4 + ((x4) / SRC_B4) * SRC_S4)
-
-#if WITH_GROUPS == 1
-#define WHT_OFF(x0, x1, x2, x3, x4, x5) \
-    (((x0) % WHT_B0) * WHT_SB0 + ((x0) / WHT_B0) * WHT_S0 \
-            + ((x1) % WHT_B1) * WHT_SB1 + ((x1) / WHT_B1) * WHT_S1 \
-            + ((x2) % WHT_B2) * WHT_SB2 + ((x2) / WHT_B2) * WHT_S2 \
-            + ((x3) % WHT_B3) * WHT_SB3 + ((x3) / WHT_B3) * WHT_S3 \
-            + ((x4) % WHT_B4) * WHT_SB4 + ((x4) / WHT_B4) * WHT_S4 \
-            + ((x5) % WHT_B5) * WHT_SB5 + ((x5) / WHT_B5) * WHT_S5)
-#else
-#define WHT_OFF(g, x1, x2, x3, x4, x5) \
-    (((x1) % WHT_B0) * WHT_SB0 + ((x1) / WHT_B0) * WHT_S0 \
-            + ((x2) % WHT_B1) * WHT_SB1 + ((x2) / WHT_B1) * WHT_S1 \
-            + ((x3) % WHT_B2) * WHT_SB2 + ((x3) / WHT_B2) * WHT_S2 \
-            + ((x4) % WHT_B3) * WHT_SB3 + ((x4) / WHT_B3) * WHT_S3 \
-            + ((x5) % WHT_B4) * WHT_SB4 + ((x5) / WHT_B4) * WHT_S4)
-#endif
-
-#define DST_OFF(x0, x1, x2, x3, x4) \
-    (((x0) % DST_B0) * DST_SB0 + ((x0) / DST_B0) * DST_S0 \
-            + ((x1) % DST_B1) * DST_SB1 + ((x1) / DST_B1) * DST_S1 \
-            + ((x2) % DST_B2) * DST_SB2 + ((x2) / DST_B2) * DST_S2 \
-            + ((x3) % DST_B3) * DST_SB3 + ((x3) / DST_B3) * DST_S3 \
-            + ((x4) % DST_B4) * DST_SB4 + ((x4) / DST_B4) * DST_S4)
-#endif
-
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
index 32efb50d630c7f..85b6d03382fa57 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
@@ -676,11 +676,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
                           (input_data.get_dependency(0).get_output_layout().data_type == data_types::u8 ||
                            input_data.get_dependency(0).get_output_layout().data_type == data_types::i8);
 
-            should_fuse |= input_data.is_type<deconvolution>() && quantize_node.get_scale_shift_opt() &&
-                            // fp16/fp32 optimized kernels don't support chaning data type
-                           (input_data.get_dependency(0).get_output_layout().data_type == data_types::u8 ||
-                            input_data.get_dependency(0).get_output_layout().data_type == data_types::i8 ||
-                            input_data.get_output_layout().data_type == out_layout.data_type);
+            should_fuse |= input_data.is_type<deconvolution>() && quantize_node.get_scale_shift_opt();
 
             should_fuse |= input_data.is_type<gather>() && quantize_node.get_scale_shift_opt();
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
index 566bf119e4612c..6d043cf5d5a6ed 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
@@ -445,6 +445,17 @@ class ConvEltwTest : public ::BaseFusingTest<conv_eltw_test_params> {
         network_not_fused.set_input_data("input", input_prim);
 
         compare(network_not_fused, network_fused, p);
+        auto find_prim = [](primitive_info& p) -> bool {
+            // Add more ids when needed
+            if (p.original_id == "deconv_prim")
+                return true;
+            return false;
+        };
+
+        auto pi_fused = network_fused.get_primitives_info();
+        auto info_fused = std::find_if(pi_fused.begin(), pi_fused.end(), find_prim);
+        if (info_fused != pi_fused.end())
+            std::cout << "kernel: " << info_fused->kernel_id << std::endl;
     }
 
     layout get_input_layout(conv_eltw_test_params& p) {
@@ -4333,23 +4344,23 @@ TEST_P(deconv_scale_actv_quant_i8, basic) {
 
 INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_i8,
     ::testing::ValuesIn(std::vector<deconv_test_params>{
-        deconv_test_params{ CASE_DECONV_FP32_1, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_2, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_3, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_4, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_5, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_6, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_7, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_8, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_8, 2, 5 },
 
-        deconv_test_params{ CASE_DECONV_FP16_1, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_2, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_3, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_4, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_5, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_6, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_7, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_8, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_8, 2, 5 },
 
         deconv_test_params{ CASE_DECONV_U8S8_1, 2, 5 },
         deconv_test_params{ CASE_DECONV_U8S8_2, 2, 5 },
@@ -4369,26 +4380,26 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_i8,
         deconv_test_params{ CASE_DECONV_S8S8_7, 2, 5 },
         deconv_test_params{ CASE_DECONV_S8S8_8, 2, 5 },
 
-        deconv_test_params{ CASE_DECONV_FP32_3D_1, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_2, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_3, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_4, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_5, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_6, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_7, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_8, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_8, 2, 5 },
         // FIXME no quantize implementation for bs_fs_yx_bsv16_fsv16 format AND add_required_reorders pass completely ruins data types
         // add_required_reorders pass tries to reorder everything to output type if no format exists, this ruins fp32 -> int8 quantize
         //deconv_test_params{ CASE_DECONV_FP32_3D_9, 3, 5 },
 
-        deconv_test_params{ CASE_DECONV_FP16_3D_1, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_2, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_3, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_4, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_5, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_6, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_7, 3, 5 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_8, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_8, 2, 5 },
         //deconv_test_params{ CASE_DECONV_FP16_3D_9, 3, 5 },
 
         deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 5 },
@@ -4444,23 +4455,23 @@ TEST_P(deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8, basic) {
 
 INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8,
     ::testing::ValuesIn(std::vector<deconv_test_params>{
-        deconv_test_params{ CASE_DECONV_FP32_1, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_2, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_3, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_4, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_5, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_6, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_7, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_8, 4, 9 },
-
-        deconv_test_params{ CASE_DECONV_FP16_1, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_2, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_3, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_4, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_5, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_6, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_7, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_8, 4, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_1, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_2, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_4, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_5, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_6, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_7, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_8, 2, 9 },
+
+        deconv_test_params{ CASE_DECONV_FP16_1, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_2, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_4, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_5, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_6, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_7, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_8, 2, 9 },
 
         deconv_test_params{ CASE_DECONV_U8S8_1, 2, 9 },
         deconv_test_params{ CASE_DECONV_U8S8_2, 2, 9 },
@@ -4480,24 +4491,24 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_
         deconv_test_params{ CASE_DECONV_S8S8_7, 2, 9 },
         deconv_test_params{ CASE_DECONV_S8S8_8, 2, 9 },
 
-        deconv_test_params{ CASE_DECONV_FP32_3D_1, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_2, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_3, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_4, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_5, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_6, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_7, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP32_3D_8, 4, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_1, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_2, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_3, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_4, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_5, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_6, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_7, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_8, 2, 9 },
         // deconv_test_params{ CASE_DECONV_FP32_3D_9, 6, 9 },
 
-        deconv_test_params{ CASE_DECONV_FP16_3D_1, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_2, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_3, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_4, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_5, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_6, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_7, 4, 9 },
-        deconv_test_params{ CASE_DECONV_FP16_3D_8, 4, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_1, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_2, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_3, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_4, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_5, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_6, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_7, 2, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_8, 2, 9 },
         // deconv_test_params{ CASE_DECONV_FP16_3D_9, 6, 9 },
 
         deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 9 },
@@ -4548,14 +4559,14 @@ TEST_P(deconv_scale_activation_quantize_i8_eltwise_quantize_u8, basic) {
 
 INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_activation_quantize_i8_eltwise_quantize_u8,
                         ::testing::ValuesIn(std::vector<conv_eltw_test_params>{
-                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_1, 4, 7},
-                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_2, 4, 7},
-                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_3, 4, 7},
-                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_4, 4, 7},
-                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_5, 4, 7},
-                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_6, 4, 7},
-                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_7, 4, 7},
-                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_8, 4, 7},
+                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_1, 2, 7},
+                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_2, 2, 7},
+                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_3, 2, 7},
+                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_4, 2, 7},
+                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_5, 2, 7},
+                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_6, 2, 7},
+                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_7, 2, 7},
+                                conv_eltw_test_params{CASE_DECONV_ELTW_FP32_8, 2, 7},
 
                                 conv_eltw_test_params{CASE_DECONV_ELTW_i8_1, 2, 7},
                                 conv_eltw_test_params{CASE_DECONV_ELTW_i8_2, 2, 7},