From 0c302b997423a5ec9f966fb734afeafaa432befe Mon Sep 17 00:00:00 2001 From: Lyamin-Roman Date: Wed, 4 Sep 2024 09:28:13 +0900 Subject: [PATCH] [GPU] Added convolution_gpu_b_fs_zyx_fsv16_imad shape agnostic --- .../src/graph/impls/ocl/convolution.cpp | 4 + .../src/graph/impls/ocl/convolution.hpp | 3 +- src/plugins/intel_gpu/src/graph/program.cpp | 25 +--- .../convolution_gpu_b_fs_zyx_fsv16_imad.cl | 7 +- .../intel_gpu/src/kernel_selector/jitter.cpp | 3 +- ...convolution_kernel_b_fs_zyx_fsv16_imad.cpp | 68 +++++++-- .../convolution_kernel_b_fs_zyx_fsv16_imad.h | 17 +-- .../convolution/convolution_kernel_base.h | 15 ++ .../unit/test_cases/convolution_gpu_test.cpp | 138 ++++++++++++++++++ 9 files changed, 233 insertions(+), 47 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index e8043fa9fe90a9..cda7d8f1a4cedc 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -32,6 +32,10 @@ struct convolution_impl : typed_primitive_impl_ocl { if (is_dynamic()) { auto& kernel_selector = kernel_selector_t::Instance(); auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + + const kernel_impl_params* impl_params = reinterpret_cast(ib.getKernelImplParams()); + _kernel_data.params = std::make_shared(get_kernel_params(*impl_params, true)); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); } } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.hpp index 69ef9f0f8a2a7c..1771da5a5a63ba 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.hpp @@ -55,7 +55,8 @@ struct ConvolutionImplementationManager : public ImplementationManager { static const std::vector supported_dyn_formats = { format::bfyx, format::bfzyx, - format::b_fs_yx_fsv16 + format::b_fs_yx_fsv16, + format::b_fs_zyx_fsv16 }; if (!one_of(input_fmt.value, supported_dyn_formats) || !one_of(output_fmt.value, supported_dyn_formats)) diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 07fad4873659cd..7a66d32795c17c 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -1796,15 +1796,9 @@ void program::save(cldnn::BinaryOutputBuffer& ob) const { for (auto& impl_id : impl_ids) { std::string type_name = get_node_ptr(impl_id)->get_selected_impl()->m_manager->get_type_info().name; ob << type_name; - if (get_node_ptr(impl_id)->get_selected_impl()->is_onednn()) { - ob << true; - auto params = get_node_ptr(impl_id)->get_kernel_impl_params(); - ob.setKernelImplParams(params.get()); - ob << get_node_ptr(impl_id)->selected_impl; - } else { - ob << false; - ob << get_node_ptr(impl_id)->selected_impl; - } + auto params = get_node_ptr(impl_id)->get_kernel_impl_params(); + ob.setKernelImplParams(params.get()); + ob << get_node_ptr(impl_id)->selected_impl; ob << get_node_ptr(impl_id)->get_selected_impl()->get_cached_kernel_ids(kernels_cache); } } @@ -1930,15 +1924,10 @@ void program::load(cldnn::BinaryInputBuffer& ib) { ib >> type_name; ov::DiscreteTypeInfo type(type_name.c_str()); auto impl_manager = p_node.type()->get(type); - bool is_onednn; - ib >> is_onednn; - if (is_onednn) { - auto params = p_node.get_kernel_impl_params(); - ib.setKernelImplParams(params.get()); - ib >> p_node.selected_impl; - } else { - ib >> p_node.selected_impl; - } + + auto params = p_node.get_kernel_impl_params(); + ib.setKernelImplParams(params.get()); + ib >> p_node.selected_impl; p_node.selected_impl->m_manager = impl_manager.get(); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_b_fs_zyx_fsv16_imad.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_b_fs_zyx_fsv16_imad.cl index fc5f2c18fe7efb..07486c1b9a1498 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_b_fs_zyx_fsv16_imad.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_b_fs_zyx_fsv16_imad.cl @@ -54,6 +54,7 @@ REQD_SUB_GROUP_SIZE(SIMD) __attribute__((reqd_work_group_size(1, 1, FEATURE_SLM_SPLIT * SIMD))) KERNEL(convolution_gpu_b_fs_zyx_fsv16_imad)( + OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE *conv_input, __global OUTPUT_TYPE *output, const __global FILTER_TYPE *weights @@ -606,11 +607,15 @@ KERNEL(convolution_gpu_b_fs_zyx_fsv16_imad)( __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) { + #if !IS_DYNAMIC #if OUTPUT_SIZE_X % OUT_BLOCK_WIDTH != 0 if (out_x + OUT_BLOCK_WIDTH > OUTPUT_SIZE_X && ow >= OUTPUT_SIZE_X % OUT_BLOCK_WIDTH) break; #endif - + #else + if (OUTPUT_SIZE_X % OUT_BLOCK_WIDTH != 0 && out_x + OUT_BLOCK_WIDTH > OUTPUT_SIZE_X && ow >= OUTPUT_SIZE_X % OUT_BLOCK_WIDTH) + break; + #endif if (out_f_g < FILTER_OFM_NUM) { output[dst_index + ow * FSV + oh * OUTPUT_Y_PITCH * FSV + od * OUTPUT_Z_PITCH * FSV] = result[ofb][od][oh][ow]; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp index 33d13429fdcf3f..716e64937ec9e5 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp @@ -364,7 +364,8 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const { if (_tensor.GetLayout() == DataLayout::bf || _tensor.GetLayout() == DataLayout::bfyx || _tensor.GetLayout() == DataLayout::bfzyx || _tensor.GetLayout() == DataLayout::bfwzyx || _tensor.GetLayout() == DataLayout::bfuwzyx || _tensor.GetLayout() == DataLayout::bfvuwzyx || - _tensor.GetLayout() == DataLayout::b_fs_yx_fsv16 || _tensor.GetLayout() == DataLayout::b_fs_yx_fsv32) { + _tensor.GetLayout() == DataLayout::b_fs_yx_fsv16 || _tensor.GetLayout() == DataLayout::b_fs_yx_fsv32 || + _tensor.GetLayout() == DataLayout::b_fs_zyx_fsv16) { definitions.push_back({_name + "_X_PITCH", "1"}); definitions.push_back({_name + "_Y_PITCH", dims_padded.x()}); definitions.push_back({_name + "_Z_PITCH", toVectorMulString({dims_padded.x(), dims_padded.y()})}); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp index 38ed9e42d3718f..352470913e4751 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp @@ -55,7 +55,10 @@ namespace kernel_selector { Convolution_kernel_b_fs_zyx_fsv16_imad::BlockParams Convolution_kernel_b_fs_zyx_fsv16_imad::GetBlockParams(const convolution_params& params) const { - size_t max_block_width = getOutBlock_X(params.outputs[0].X().v, params.stride.x, params.filterSize.x, params.dilation.x); + size_t max_block_width = 1; + if (!params.outputs[0].X().is_dynamic) { + max_block_width = getOutBlock_X(params.outputs[0].X().v, params.stride.x, params.filterSize.x, params.dilation.x); + } size_t max_in_block_width = (max_block_width - 1) * params.stride.x + (params.filterSize.x - 1) * params.dilation.x + 1; size_t block_width = max_block_width; @@ -90,7 +93,9 @@ Convolution_kernel_b_fs_zyx_fsv16_imad::GetBlockParams(const convolution_params& size_t max_slm_split = params.engineInfo.maxWorkGroupSize / simd; // TGLU exceptions related to SLM usage - if (params.engineInfo.deviceType == dev_type::integrated_gpu && params.engineInfo.computeUnitsCount == 96) { + if (params.is_shape_agnostic) { + max_slm_split = 2; + } else if (params.engineInfo.deviceType == dev_type::integrated_gpu && params.engineInfo.computeUnitsCount == 96) { bool split_exception_1 = params.outputs[0].X().v == 3 && params.outputs[0].Y().v == 3 && params.outputs[0].Z().v == 1 && params.outputs[0].Feature().v == 512; bool split_exception_2 = params.outputs[0].X().v == 5 && params.outputs[0].Y().v == 5 && params.outputs[0].Z().v == 1 @@ -118,13 +123,16 @@ Convolution_kernel_b_fs_zyx_fsv16_imad::GetBlockParams(const convolution_params& } } + size_t max_d = params.outputs[0].Z().is_dynamic ? 1 : 16; + size_t max_h = params.outputs[0].Y().is_dynamic ? 1 : 16; + for (size_t split = 1; split <= max_slm_split; split *= 2) { for (size_t temp_block_features = simd; temp_block_features <= simd * 2; temp_block_features += simd) { - for (size_t d = 1; d < 16; ++d) { - if (params.outputs[0].Z().v % d) + for (size_t d = 1; d < max_d; ++d) { + if (d != 1 && params.outputs[0].Z().v % d) continue; - for (size_t h = 1; h < 16; ++h) { - if (params.outputs[0].Y().v % h) + for (size_t h = 1; h < max_h; ++h) { + if (h != 1 && params.outputs[0].Y().v % h) continue; bool c_ifm_mul = CeilDiv(params.weights.IFM().v, fsv) % split == 0; @@ -174,6 +182,10 @@ Convolution_kernel_b_fs_zyx_fsv16_imad::GetBlockParams(const convolution_params& } float Convolution_kernel_b_fs_zyx_fsv16_imad::EstimateBlockParamsRatio(const convolution_params& params, const BlockParams& block) const { + if (params.has_dynamic_outputs()) { + return -10.f; + } + float occupancy_by_logic_size = static_cast(params.outputs[0].LogicalSize() / static_cast(params.engineInfo.maxThreadsPerDevice)); bool increase_max_reg_pressure = occupancy_by_logic_size >= 595.f; bool twice_increase_max_reg_pressure = occupancy_by_logic_size >= 595.f * 2.f; @@ -373,6 +385,7 @@ ParamsKey Convolution_kernel_b_fs_zyx_fsv16_imad::GetSupportedKey() const { k.EnableQuantization(QuantizationType::SYMMETRIC); k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA); k.EnableDilation(); + k.EnableDynamicShapesSupport(); return k; } @@ -450,10 +463,15 @@ JitConstants Convolution_kernel_b_fs_zyx_fsv16_imad::GetJitConstants(const convo ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_zyx_fsv16_imad::SetDefault(const convolution_params& params, int) const { + const BlockParams& block_params = GetBlockParams(params); + return CalcDispatchDataWithBlockParams(params, block_params); +} // SetDefault + +ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_zyx_fsv16_imad::CalcDispatchDataWithBlockParams(const convolution_params& params, + const BlockParams& block_params) const { DispatchData dispatchData; const auto& output = params.outputs[0]; const auto& weights = params.weights; - auto block_params = GetBlockParams(params); dispatchData.gws[0] = CeilDiv(output.X().v, block_params.output_block_width); dispatchData.gws[1] = CeilDiv(output.Y().v, block_params.output_block_height) * CeilDiv(output.Z().v, block_params.output_block_depth); @@ -466,17 +484,24 @@ ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_zyx_fsv16_imad::SetD dispatchData.cldnnStyle = {0, 0, 0, 0, 0}; dispatchData.gemmStyle = {0, 0, 0, 0, 0, 0}; - + dispatchData.blockParams = { block_params.output_block_width, block_params.output_block_height, + block_params.output_block_depth, block_params.output_block_features, + block_params.input_block_width, block_params.input_block_height, + block_params.input_block_depth, block_params.feature_slm_split }; return dispatchData; -} // SetDefault +} KernelsPriority Convolution_kernel_b_fs_zyx_fsv16_imad::GetKernelsPriority(const Params& params) const { const auto& p = static_cast(params); - if (static_cast(p.weights.IFM().v) / static_cast(Align(p.weights.IFM().v, fsv)) < 0.5f) + if (!p.is_shape_agnostic) { + if (static_cast(p.weights.IFM().v) / static_cast(Align(p.weights.IFM().v, fsv)) < 0.5f) + return FORCE_PRIORITY_4; + else + return FORCE_PRIORITY_2; + } else { return FORCE_PRIORITY_4; - else - return FORCE_PRIORITY_2; + } } bool Convolution_kernel_b_fs_zyx_fsv16_imad::Validate(const Params& params) const { @@ -507,4 +532,23 @@ bool Convolution_kernel_b_fs_zyx_fsv16_imad::Validate(const Params& params) cons return true; } + +void Convolution_kernel_b_fs_zyx_fsv16_imad::GetUpdateDispatchDataFunc(KernelData& kd) const { + const auto& prim_params = static_cast(*kd.params); + const auto& dynamicDispatchData = SetDefault(prim_params); + + kd.update_dispatch_data_func = [this, dynamicDispatchData](const Params& params, KernelData& kd) { + const auto& prim_params = static_cast(params); + const auto& dispatchData = CalcDispatchDataWithBlockParams(prim_params, dynamicDispatchData.blockParams); + OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func"); + kd.kernels[0].params.workGroups.global = dispatchData.gws; + kd.kernels[0].params.workGroups.local = dispatchData.lws; + kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params); + + kd.internalBufferSizes.clear(); + kd.internalBufferSizes.push_back(prim_params.inputs[0].PhysicalSizeInBytes()); + kd.internalBufferDataType = prim_params.inputs[0].GetDType(); + }; +} + } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h index c2da5e66982d11..adb5125873a657 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h @@ -12,6 +12,7 @@ namespace kernel_selector { class Convolution_kernel_b_fs_zyx_fsv16_imad : public ConvolutionKernelBase { public: using Parent = ConvolutionKernelBase; + using BlockParams = DispatchData::BlockParams; Convolution_kernel_b_fs_zyx_fsv16_imad() : ConvolutionKernelBase("convolution_gpu_b_fs_zyx_fsv16_imad") {} virtual ~Convolution_kernel_b_fs_zyx_fsv16_imad() {} @@ -24,6 +25,7 @@ class Convolution_kernel_b_fs_zyx_fsv16_imad : public ConvolutionKernelBase { bool Validate(const Params& params) const override; JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; + void GetUpdateDispatchDataFunc(KernelData& kd) const override; bool NeedPaddedInput() const override { return true; } WeightsLayout GetPreferredWeightsLayout(const convolution_params& p) const override { return p.groups > 1 ? WeightsLayout::g_os_is_zyx_osv16_isv16 : WeightsLayout::os_is_zyx_osv16_isv16; @@ -35,24 +37,11 @@ class Convolution_kernel_b_fs_zyx_fsv16_imad : public ConvolutionKernelBase { FusedOpType::ACTIVATION }; } - struct BlockParams { - size_t output_block_width; - size_t output_block_height; - size_t output_block_depth; - - size_t output_block_features; - - size_t input_block_width; - size_t input_block_height; - size_t input_block_depth; - - size_t feature_slm_split; - }; - BlockParams GetBlockParams(const convolution_params& params) const; float EstimateBlockParamsRatio(const convolution_params& params, const BlockParams& block) const; float EstimateRegPressure(const convolution_params& params, const BlockParams& block) const; float EstimateOccupancy(const convolution_params& params, const BlockParams& block) const; float EstimateSLMUsage(const convolution_params& params, const BlockParams& block) const; + DispatchData CalcDispatchDataWithBlockParams(const convolution_params& params, const BlockParams& block_params) const; }; } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h index f8d1b3bf0de956..ccdd4941cdd1d0 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h @@ -36,8 +36,23 @@ class ConvolutionKernelBase : public WeightBiasKernelBase { size_t globalWorkSizeDZ; }; + struct BlockParams { + size_t output_block_width; + size_t output_block_height; + size_t output_block_depth; + + size_t output_block_features; + + size_t input_block_width; + size_t input_block_height; + size_t input_block_depth; + + size_t feature_slm_split; + }; + CLDNNStyle cldnnStyle; GEMMStyle gemmStyle; + BlockParams blockParams; }; std::string GetAutoTuneOptions(int autoTuneIndex) const; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 4f9c31064e9026..5d01d448dcfc64 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -11056,3 +11056,141 @@ INSTANTIATE_TEST_SUITE_P(smoke, conv_dyn_test, { ov::Shape{1, 32, 91}, ov::Shape{32, 1, 1, 11}, ov::Strides{1}, ov::Strides{1}, ov::CoordinateDiff{0}, ov::CoordinateDiff{0}, 32 }, { ov::Shape{1, 64, 16, 16}, ov::Shape{64, 1, 1, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0}, 64 }, })); + + +struct conv_dyn_3d_test_params { + ov::Shape in_shape; + ov::Shape wei_shape; + ov::Strides stride; + ov::Strides dilation; + ov::CoordinateDiff pad_begin; + ov::CoordinateDiff pad_end; + uint32_t groups; + bool is_caching_test; +}; + +class conv_dyn_3d_test : public testing::TestWithParam {}; + +TEST_P(conv_dyn_3d_test, convolution_gpu_b_fs_zyx_fsv16_imad_quantized) { + auto& engine = get_test_engine(); + auto p = GetParam(); + + auto calculate_ref = [&](memory::ptr input, memory::ptr weights, + memory::ptr a_zp, memory::ptr compensation, ExecutionConfig config) { + auto in_layout = input->get_layout(); + + topology topology_ref( + input_layout("input", in_layout), + data("weights", weights), + data("a_zp", a_zp), + data("compensation", compensation), + convolution("conv", input_info("input"), "weights", no_bias, "", "a_zp", "compensation", + p.groups, p.stride, p.dilation, p.pad_begin, p.pad_end, false, data_types::f32)); + + network network_ref(engine, topology_ref, config); + network_ref.set_input_data("input", input); + + auto outputs_ref = network_ref.execute(); + + return outputs_ref.at("conv").get_memory(); + }; + + ov::PartialShape dyn_input_pshape; + for (size_t i = 0; i < p.in_shape.size(); ++i) { + dyn_input_pshape.emplace_back(ov::Dimension()); + } + dyn_input_pshape[1] = p.in_shape[1]; + + auto in_layout = layout{dyn_input_pshape, data_types::u8, format::b_fs_zyx_fsv16}; + auto input = engine.allocate_memory({ p.in_shape, data_types::u8, format::b_fs_zyx_fsv16 }); + auto weights = engine.allocate_memory({p.wei_shape, data_types::i8, format::bfzyx}); + + auto a_zp_shape = ov::Shape(p.in_shape.size(), 1); + a_zp_shape[1] = p.in_shape[1]; + auto a_zp = engine.allocate_memory({ a_zp_shape, data_types::u8, format::bfyx }); + + auto compensation = engine.allocate_memory({ a_zp_shape, data_types::f32, format::bfyx }); + + tests::random_generator rg(GET_SUITE_NAME); + VF input_rnd = rg.generate_random_1d(ov::shape_size(p.in_shape), 0, 10); + VF weights_rnd = rg.generate_random_1d(ov::shape_size(p.wei_shape), -5, 5); + VF a_zp_rnd = rg.generate_random_1d(ov::shape_size(a_zp_shape), 1, 5); + VF compensation_rnd = rg.generate_random_1d(ov::shape_size(a_zp_shape), -5, 5); + + set_values(input, input_rnd); + set_values(weights, weights_rnd); + set_values(a_zp, a_zp_rnd); + set_values(compensation, compensation_rnd); + + topology topology( + input_layout("input", in_layout), + data("weights", weights), + data("a_zp", a_zp), + data("compensation", compensation), + convolution("conv", input_info("input"), "weights", no_bias, "", "a_zp", "compensation", + p.groups, p.stride, p.dilation, p.pad_begin, p.pad_end, false, data_types::f32)); + + ExecutionConfig config = get_test_default_config(engine); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_zyx_fsv16, "convolution_gpu_b_fs_zyx_fsv16_imad", impl_types::ocl }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl } })); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::enable_profiling(true)); + + cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), p.is_caching_test); + network->set_input_data("input", input); + + auto inst = network->get_primitive("conv"); + auto impl = inst->get_impl(); + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic()); + + auto outputs = network->execute(); + + auto output_memory = outputs.at("conv").get_memory(); + + auto output_memory_ref = calculate_ref(input, weights, a_zp, compensation, config); + + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock output_ptr_ref(output_memory_ref, get_test_stream()); + + ASSERT_EQ(outputs.at("conv").get_layout(), output_memory_ref->get_layout()); + for (size_t i = 0; i < output_ptr.size(); i++) { + ASSERT_EQ(output_ptr[i], output_ptr_ref[i]); + } + + { + // Change original shape for the second run + auto new_shape = p.in_shape; + new_shape[2] += 4; + + auto input = engine.allocate_memory({ new_shape, data_types::u8, format::b_fs_zyx_fsv16 }); + + VF input_rnd = rg.generate_random_1d(ov::shape_size(p.in_shape), 0, 10); + set_values(input, input_rnd); + + network->set_input_data("input", input); + auto outputs = network->execute(); + + auto output_memory = outputs.at("conv").get_memory(); + auto output_memory_ref = calculate_ref(input, weights, a_zp, compensation, config); + + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock output_ptr_ref(output_memory_ref, get_test_stream()); + + ASSERT_EQ(outputs.at("conv").get_layout(), output_memory_ref->get_layout()); + for (size_t i = 0; i < output_ptr.size(); i++) { + ASSERT_EQ(output_ptr[i], output_ptr_ref[i]); + } + } +} + +INSTANTIATE_TEST_SUITE_P(smoke, conv_dyn_3d_test, + testing::ValuesIn(std::vector{ + { ov::Shape{1, 16, 5, 5, 5}, ov::Shape{2, 16, 3, 3, 3}, ov::Strides{1, 1, 1}, ov::Strides{1, 1, 1}, ov::CoordinateDiff{0, 0, 0}, ov::CoordinateDiff{0, 0, 0}, 1, false}, + { ov::Shape{1, 16, 5, 5, 5}, ov::Shape{2, 16, 3, 3, 3}, ov::Strides{1, 1, 1}, ov::Strides{1, 1, 1}, ov::CoordinateDiff{0, 0, 0}, ov::CoordinateDiff{0, 0, 0}, 1, true}, + { ov::Shape{2, 32, 30, 30, 30}, ov::Shape{16, 32, 10, 10, 10}, ov::Strides{1, 1, 1}, ov::Strides{1, 1, 1}, ov::CoordinateDiff{0, 0, 0}, ov::CoordinateDiff{0, 0, 0}, 1, false }, + { ov::Shape{2, 32, 30, 30, 30}, ov::Shape{16, 32, 10, 10, 10}, ov::Strides{1, 1, 1}, ov::Strides{1, 1, 1}, ov::CoordinateDiff{0, 0, 0}, ov::CoordinateDiff{0, 0, 0}, 1, true }, + { ov::Shape{1, 16, 5, 5, 5}, ov::Shape{2, 16, 3, 3, 3}, ov::Strides{2, 2, 2}, ov::Strides{1, 1, 1}, ov::CoordinateDiff{0, 0, 0}, ov::CoordinateDiff{0, 0, 0}, 1, false }, + { ov::Shape{1, 16, 5, 5, 5}, ov::Shape{2, 16, 3, 3, 3}, ov::Strides{1, 1, 1}, ov::Strides{1, 1, 1}, ov::CoordinateDiff{1, 1, 1}, ov::CoordinateDiff{1, 1, 1}, 1, false }, + { ov::Shape{1, 16, 5, 5, 5}, ov::Shape{16, 1, 1, 3, 3, 3}, ov::Strides{1, 1, 1}, ov::Strides{1, 1, 1}, ov::CoordinateDiff{0, 0, 0}, ov::CoordinateDiff{0, 0, 0}, 16, false } +}));