diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml index 7da00df4e46d32..89c39669e67720 100644 --- a/.github/workflows/workflow_rerunner.yml +++ b/.github/workflows/workflow_rerunner.yml @@ -3,11 +3,17 @@ name: Rerun Workflow with Known Errors on: workflow_run: workflows: - - Linux (Ubuntu 20.04, Python 3.11) + - Linux (Ubuntu 20.04, Python 3.9) + - Linux (Ubuntu 22.04, Python 3.11) + - Linux (Ubuntu 24.04, Python 3.12) + - Debian 10 ARM + - Android ARM64 with vcpkg + - Android x64 - Linux ARM64 (Ubuntu 20.04, Python 3.11) - Linux Static CC (Ubuntu 22.04, Python 3.11, Clang) - Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10) - - Windows (VS 2019, Python 3.11) + - Windows (VS 2019, Python 3.11, Release) + - Windows (VS 2019, Python 3.11, Debug) - Windows Conditional Compilation (VS 2022, Python 3.11) types: - completed @@ -56,6 +62,10 @@ jobs: if: ${{ env.PIPELINE_RETRIGGERED == 'true' }} run: echo "Rerun retriggered for ${{ github.event.workflow_run.html_url }} with ticket ${{ env.FOUND_ERROR_TICKET }}" + - name: ${{ github.event.workflow_run.html_url }} + if: ${{ env.PIPELINE_RETRIGGERED == 'true' }} + run: echo "Step for statistics gathering" + rerunner_tests: name: Rerunner Tests if: ${{ github.event_name == 'pull_request' && github.repository_owner == 'openvinotoolkit' }} diff --git a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py index bb26683cd9e579..c82e0a8d5995f7 100644 --- a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py +++ b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py @@ -122,7 +122,7 @@ def process_coveo_meta(meta, url, link): namespace_element = ET.SubElement(url, namespace) for tag_name, tag_value in values.items(): - if tag_name == 'ovcategory': + if tag_name == 'ovdoctype': processed_link = process_link(link) ET.SubElement(namespace_element, tag_name).text = processed_link else: diff --git a/docs/sphinx_setup/_static/js/custom.js b/docs/sphinx_setup/_static/js/custom.js index 52962cf0f7c7e6..3cfe907382c314 100644 --- a/docs/sphinx_setup/_static/js/custom.js +++ b/docs/sphinx_setup/_static/js/custom.js @@ -417,6 +417,7 @@ document.addEventListener('DOMContentLoaded', function () { await searchInterfaceSa.initialize({ accessToken: "xx1f2aebd3-4307-4632-aeea-17c13378b237", organizationId: "intelcorporationnonproduction2ybdyblf7", + organizationEndpoints: await searchInterface.getOrganizationEndpoints('intelcorporationnonproduction2ybdyblf7') }); searchInterfaceSa.executeFirstSearch(); } @@ -424,6 +425,7 @@ document.addEventListener('DOMContentLoaded', function () { await searchInterface.initialize({ accessToken: "xx1f2aebd3-4307-4632-aeea-17c13378b237", organizationId: "intelcorporationnonproduction2ybdyblf7", + organizationEndpoints: await searchInterface.getOrganizationEndpoints('intelcorporationnonproduction2ybdyblf7') }); searchInterface.executeFirstSearch(); } diff --git a/docs/sphinx_setup/conf.py b/docs/sphinx_setup/conf.py index 148309ccbafe96..def41af5943b3c 100644 --- a/docs/sphinx_setup/conf.py +++ b/docs/sphinx_setup/conf.py @@ -84,7 +84,7 @@ ov_sitemap_meta = [ ('coveo:metadata', { 'ovversion': version_name, - 'ovcategory': 'null' + 'ovdoctype': 'null' }) ] diff --git a/src/bindings/js/node/tests/e2e/demo-electron-app/index.js b/src/bindings/js/node/tests/e2e/demo-electron-app/index.js index cfa5fd27b0fa4e..58cc6b3b3cf450 100644 --- a/src/bindings/js/node/tests/e2e/demo-electron-app/index.js +++ b/src/bindings/js/node/tests/e2e/demo-electron-app/index.js @@ -1,11 +1,39 @@ const { app } = require('electron'); const { addon: ov } = require('openvino-node'); -app.whenReady().then(() => { - console.log('Creating OpenVINO Runtime Core'); - // eslint-disable-next-line @typescript-eslint/no-unused-vars - const core = new ov.Core(); - console.log('Created OpenVINO Runtime Core'); +const epsilon = 0.5; // To avoid very small numbers +const pathToModel = '../tests/unit/test_models/test_model_fp32.xml'; + +main(); + +async function main() { + await app.whenReady(); + + try { + console.log('Creating OpenVINO Runtime Core'); + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const core = new ov.Core(); + console.log('Created OpenVINO Runtime Core'); + + const model = await core.readModel(pathToModel); + console.log('Model read successfully:', model); + const compiledModel = await core.compileModel(model, 'CPU'); + const inferRequest = compiledModel.createInferRequest(); + console.log('Infer request created:', inferRequest); + + const tensorData = Float32Array.from( + { length: 3072 }, + () => Math.random() + epsilon, + ); + const tensor = new ov.Tensor(ov.element.f32, [1, 3, 32, 32], tensorData); + console.log('Tensor created:', tensor); + + const result = await inferRequest.inferAsync([tensor]); + console.log('Infer request result:', result); + } catch (error) { + console.error('Error:', error); + app.exit(1); + } app.exit(0); -}); +} diff --git a/src/bindings/js/node/tests/e2e/electron-app.test.js b/src/bindings/js/node/tests/e2e/electron-app.test.js index 01e84dea884502..98982a5f941263 100644 --- a/src/bindings/js/node/tests/e2e/electron-app.test.js +++ b/src/bindings/js/node/tests/e2e/electron-app.test.js @@ -1,24 +1,17 @@ /* global describe, it, before, after */ const fs = require('node:fs'); +const util = require('node:util'); const assert = require('node:assert'); const { exec } = require('child_process'); +const execPromise = util.promisify(exec); +const { testModels, downloadTestModel } = require('../unit/utils.js'); describe('E2E testing for OpenVINO as an Electron dependency.', function() { this.timeout(50000); - before((done) => { - exec( - 'cp -r ./tests/e2e/demo-electron-app/ demo-electron-app-project', - (error) => { - if (error) { - console.error(`exec error: ${error}`); - - return done(error); - } - - done(); - }, - ); + before(async () => { + await downloadTestModel(testModels.testModelFP32); + await execPromise('cp -r ./tests/e2e/demo-electron-app/ demo-electron-app-project'); }); it('should install dependencies', (done) => { @@ -37,7 +30,7 @@ describe('E2E testing for OpenVINO as an Electron dependency.', function() { }); it('should run electron package and verify output', (done) => { - exec('cd demo-electron-app-project && npm start', (error, stdout) => { + exec(`cd demo-electron-app-project && npm start`, (error, stdout) => { if (error) { console.error(`exec error: ${error}`); @@ -48,6 +41,14 @@ describe('E2E testing for OpenVINO as an Electron dependency.', function() { stdout.includes('Created OpenVINO Runtime Core'), 'Check that openvino-node operates fine', ); + assert( + stdout.includes('Model read successfully: ModelWrap {}'), + 'Check that model is read successfully', + ); + assert( + stdout.includes('Infer request result: { fc_out: TensorWrap {} }'), + 'Check that infer request result is successful', + ); done(); }); }); diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index b3a8267e4c1f14..65ce94d71b385e 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -1,5 +1,5 @@ # used in multiple components -numpy>=1.16.6,<2.1.0 # Python bindings, frontends +numpy>=1.16.6,<2.2.0 # Python bindings, frontends # pytest pytest>=5.0,<8.4 diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt index e311c6ed6438db..a2d63161fe764c 100644 --- a/src/bindings/python/requirements.txt +++ b/src/bindings/python/requirements.txt @@ -1,3 +1,3 @@ -numpy>=1.16.6,<2.1.0 +numpy>=1.16.6,<2.2.0 openvino-telemetry>=2023.2.1 packaging diff --git a/src/plugins/intel_cpu/src/utils/print_model.hpp b/src/common/transformations/include/transformations/utils/print_model.hpp similarity index 100% rename from src/plugins/intel_cpu/src/utils/print_model.hpp rename to src/common/transformations/include/transformations/utils/print_model.hpp diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index c0e1e96547cec7..7f5f968b10c3fe 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -549,6 +549,7 @@ int get_model_prefer_threads(const int num_streams, break; case dnnl::cpu_isa::avx512_core_vnni: case dnnl::cpu_isa::avx2_vnni: + case dnnl::cpu_isa::avx2_vnni_2: isaSpecificThreshold = 2.0f; break; case dnnl::cpu_isa::avx512_core_amx: diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index e45b6379d1e968..e98045bd32dbbf 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -100,7 +100,7 @@ #include "transformations/rt_info/keep_const_precision.hpp" #include "transformations/transpose_sinking/ts_shape_of.hpp" #include "utils/ngraph_transformation.hpp" -#include "utils/print_model.hpp" +#include "transformations/utils/print_model.hpp" // LPT transformations #include "low_precision/add.hpp" diff --git a/src/plugins/intel_cpu/tests/unit/transformations/state_concat_sdpa.cpp b/src/plugins/intel_cpu/tests/unit/transformations/state_concat_sdpa.cpp index ca55337fc3d6b8..d8516d9ce8cf39 100644 --- a/src/plugins/intel_cpu/tests/unit/transformations/state_concat_sdpa.cpp +++ b/src/plugins/intel_cpu/tests/unit/transformations/state_concat_sdpa.cpp @@ -17,7 +17,7 @@ #include #include "common_test_utils/ov_test_utils.hpp" -#include "utils/print_model.hpp" +#include "transformations/utils/print_model.hpp" using namespace testing; using namespace ov; diff --git a/src/plugins/intel_cpu/thirdparty/ComputeLibrary b/src/plugins/intel_cpu/thirdparty/ComputeLibrary index f1929dc994d8e5..c61bd3387403b7 160000 --- a/src/plugins/intel_cpu/thirdparty/ComputeLibrary +++ b/src/plugins/intel_cpu/thirdparty/ComputeLibrary @@ -1 +1 @@ -Subproject commit f1929dc994d8e5afae5c77ca66446344119a8592 +Subproject commit c61bd3387403b76d618915ccebf5e9585f52a071 diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl index 57545b0df37cff..70c55bfb73b8f5 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl @@ -952,6 +952,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( // Calculate zero-point and scale only for DECOMPRESSION_SCALE_POST_OP enabled // Calculate weight : w = (w - dzp) * ds + // if DECOMPRESSION_ZP_TERM is not enabled, then dzp is ACCUMULATOR_VAL_ZERO. #if DECOMPRESSION_ZP_TERM #if DECOMPRESSION_ZP_SCALAR DQ_SLM_FILTER_UNPACKED_VEC dzp = (DQ_SLM_FILTER_UNPACKED_VEC)(DECOMPRESSION_ZP_VALUE); @@ -976,8 +977,6 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( } } #endif - #else - DQ_SLM_FILTER_UNPACKED_VEC dzp = (DQ_SLM_FILTER_UNPACKED_VEC)(ACCUMULATOR_VAL_ZERO); #endif #if FILTER_LOAD_BLOCK_SIZE == 2 @@ -1026,7 +1025,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( weights_offset += TILE_K_OFM_PACKED * TILE_OFM_PER_OSV_SIZE * SIMD; - #if DECOMPRESSION_SCALE_POST_OP && (TILE_IFM_ELEMENTS_SIZE > DECOMPRESSION_SCALE_GROUP_SIZE) + #if DQ_DECOMPRESSION_SCALE_POST_OP && (TILE_IFM_ELEMENTS_SIZE > DECOMPRESSION_SCALE_GROUP_SIZE) unroll_for (uint bi = 0; bi < TILE_B; ++bi) { unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { const uint offset_ofm = out_f + fi*SIMD + sglid; @@ -1046,7 +1045,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( #endif } // Whole tile_k elements of each iteration : ki - #if DECOMPRESSION_SCALE_POST_OP && (TILE_IFM_ELEMENTS_SIZE <= DECOMPRESSION_SCALE_GROUP_SIZE) + #if DQ_DECOMPRESSION_SCALE_POST_OP && (TILE_IFM_ELEMENTS_SIZE <= DECOMPRESSION_SCALE_GROUP_SIZE) // Dynamic-quantizing group size set to same or smaller than scale group size if ((ni % NUM_LOOP_IN_DYN_QUAN_GROUP) == (NUM_LOOP_IN_DYN_QUAN_GROUP - 1)) { const uint ni_offset = ((ni*TILE_IFM*SIMD) / DECOMPRESSION_SCALE_GROUP_SIZE)*DECOMPRESSION_SCALE_FEATURE_PITCH; @@ -1175,7 +1174,7 @@ KERNEL(fc)( #endif ) { #if USE_SLM - #if DYNAMIC_QUANTIZE && (TILE_OFM == 2) + #if DYNAMIC_QUANTIZE __local int dq_wei_local_mem[SIMD * TILE_OFM * SIMD]; #else __local ACCUMULATOR_TYPE wei_local_mem[TILE_IFM * SIMD * TILE_OFM * SIMD]; @@ -1317,7 +1316,7 @@ KERNEL(fc)( #endif ); } else { - #if USE_SLM && DYNAMIC_QUANTIZE && (TILE_OFM == 2) + #if USE_SLM && DYNAMIC_QUANTIZE FUNC_CALL(fc_bf_tiled_kernel_dyn_quan)( OPTIONAL_SHAPE_INFO_TENSOR input, @@ -1364,7 +1363,7 @@ KERNEL(fc)( #endif } #else - #if USE_SLM && DYNAMIC_QUANTIZE && (TILE_OFM == 2) + #if USE_SLM && DYNAMIC_QUANTIZE FUNC_CALL(fc_bf_tiled_kernel_dyn_quan)( OPTIONAL_SHAPE_INFO_TENSOR input, diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index b26b11ce97df6a..9c95345e0900b5 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -375,6 +375,9 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, if (params.weights.GetDType() == WeightsType::UINT4 || params.weights.GetDType() == WeightsType::INT4) { if (!params.is_shape_agnostic && batch == 1) { + if (should_dynamic_quantize(params)) + return selector.Default(tune_params(1, 2, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT)); + // Tuning for Meteor Lake if (is_weight_vertical(params, output_f)) { if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2) { @@ -616,7 +619,7 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para // Validated perf gain, Dynamic quantize force enable SCALE_POST_OP for char type multiplication if (should_dynamic_quantize(params)) { jit.AddConstant(MakeJitConstant("DYNAMIC_QUANTIZE", 1)); - jit.AddConstant(MakeJitConstant("DECOMPRESSION_SCALE_POST_OP", 1)); + jit.AddConstant(MakeJitConstant("DQ_DECOMPRESSION_SCALE_POST_OP", 1)); jit.AddConstant(MakeJitConstant("DQ_TYPE", "char")); jit.AddConstant(MakeJitConstant("QUANTIZE_GROUP_SIZE", quantize_grp_size)); } else { diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp index 885da895b91166..315a93190fdc90 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp @@ -24,7 +24,7 @@ namespace ov { namespace intel_gpu { -ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed(bool convert_u4zp_to_u8) { +ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed() { using namespace ov::pass::pattern; auto compressed_constant = [](const ov::Output& output) { @@ -81,6 +81,12 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon bool has_transpose = pattern_map.count(transpose_m); auto scale_shape = pattern_map.at(mul_const_m).get_shape(); bool grouped = std::count_if(scale_shape.begin(), scale_shape.end(), [](size_t d) { return d > 1; }) > 1; + bool sub_with_convert = (pattern_map.count(sub_with_convert_m) > 0) ? true : false; + + auto weight_ptr = std::dynamic_pointer_cast(pattern_map.at(weights_m).get_node_shared_ptr()); + bool weight_u8 = false; + if (weight_ptr->get_element_type() == ov::element::u8 || weight_ptr->get_element_type() == ov::element::i8) + weight_u8 = true; auto reshape_const_to_2d = [has_transpose, grouped](std::shared_ptr node) { auto constant = std::dynamic_pointer_cast(node); @@ -97,11 +103,17 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon return std::make_shared(*constant, new_shape); }; - auto convert_u4const_to_u8 = [convert_u4zp_to_u8](std::shared_ptr node) { + auto convert_const_to_u8 = [&](std::shared_ptr node) { auto constant = std::dynamic_pointer_cast(node); - if (constant->get_element_type() != ov::element::u4 || !convert_u4zp_to_u8) + // Convert ZP to u8 + if (constant->get_element_type() == ov::element::u8) return std::dynamic_pointer_cast(constant); - return std::dynamic_pointer_cast(std::make_shared(node, ov::element::u8)); + if (constant->get_element_type() == ov::element::u4) + return std::dynamic_pointer_cast(std::make_shared(node, ov::element::u8)); + if (weight_u8 && sub_with_convert) + return std::dynamic_pointer_cast(std::make_shared(node, ov::element::u8)); + + return std::dynamic_pointer_cast(constant); }; @@ -111,8 +123,7 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon const bool with_zero_point = pattern_map.count(sub_no_convert_m) > 0 || pattern_map.count(sub_with_convert_m) > 0; if (with_zero_point) { - // WA: Convert ZP to u8 for OneDNN case to avoid u4 reorder - optional_zero_point = convert_u4const_to_u8(reshape_const_to_2d(pattern_map.at(sub_const_m).get_node_shared_ptr())); + optional_zero_point = convert_const_to_u8(reshape_const_to_2d(pattern_map.at(sub_const_m).get_node_shared_ptr())); } std::shared_ptr fc_input_b = reshape_const_to_2d(pattern_map.at(weights_m).get_node_shared_ptr()); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp index 641f55ead5fdaf..d2bc71a91f1285 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp @@ -12,7 +12,7 @@ namespace intel_gpu { class ConvertFullyConnectedToFullyConnectedCompressed: public ov::pass::MatcherPass { public: OPENVINO_RTTI("ConvertFullyConnectedToFullyConnectedCompressed", "0"); - ConvertFullyConnectedToFullyConnectedCompressed(bool convert_u4zp_to_u8 = false); + ConvertFullyConnectedToFullyConnectedCompressed(); }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index b75519ac40e678..f97b7fae126b47 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -810,7 +810,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(); - manager.register_pass(device_info.supports_immad); + manager.register_pass(); bool disable_horizontal_fc_fusion = false; GPU_DEBUG_GET_INSTANCE(debug_config); @@ -819,10 +819,11 @@ void TransformationsPipeline::apply(std::shared_ptr func) { if (!disable_horizontal_fc_fusion) manager.register_pass(); + + // ZP should not be folded for FC. But still, ZP should be folded for Gather. + // Therefore, run MarkDequantizationSubgraph again to fold ZP constant. + manager.register_pass(supported_woq_types, true); if (device_info.supports_immad) { - // For OneDNN, ZP should not be folded for FC. But still, ZP should be folded for Gather. - // Therefore, run MarkDequantizationSubgraph again to fold ZP constant. - manager.register_pass(supported_woq_types, true); if (disable_horizontal_fc_fusion) manager.register_pass(); } diff --git a/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp index 12398c8221f4b7..ada1c1314da040 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp @@ -37,7 +37,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed1) { auto convert = std::make_shared(weights_const, ov::element::f32); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto scale = std::make_shared(convert, scale_const); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto fc = std::make_shared(input1, scale, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); @@ -46,7 +46,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed1) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const); @@ -63,7 +63,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed2) { auto sub = std::make_shared(convert, zp_const); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto scale = std::make_shared(sub, scale_const); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto fc = std::make_shared(input1, scale, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); @@ -72,7 +72,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed2) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); @@ -92,7 +92,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed3) { auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { -1, 16 }); auto reshape = std::make_shared(scale, reshape_const, false); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto fc = std::make_shared(input1, reshape, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); @@ -101,7 +101,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed3) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); @@ -115,13 +115,14 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed4) { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 32, 4, 4 }, { 1 }); auto convert = std::make_shared(weights_const, ov::element::f32); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1, 1 }, { 1 }); - auto sub = std::make_shared(convert, zp_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 1, 1 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f32); + auto sub = std::make_shared(convert, zp_convert); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4, 1 }, { 1 }); auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { -1, 16 }); auto reshape = std::make_shared(scale, reshape_const, false); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto fc = std::make_shared(input1, reshape, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); @@ -130,9 +131,9 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed4) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 32, 16 }, { 1 }); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 1 }, { 1 }); auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); @@ -144,15 +145,16 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed5) { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 4, 4, 32 }, { 1 }); auto convert = std::make_shared(weights_const, ov::element::f32); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1, 1 }, { 1 }); - auto sub = std::make_shared(convert, zp_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 1, 1 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f32); + auto sub = std::make_shared(convert, zp_convert); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 1, 32 }, { 1 }); auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 16, -1 }); auto reshape = std::make_shared(scale, reshape_const, false); auto transpose_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose = std::make_shared(reshape, transpose_const); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto fc = std::make_shared(input1, transpose, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); @@ -163,11 +165,11 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed5) { auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 16, 32 }, { 1 }); auto transpose_weights_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_weights = std::make_shared(weights_const, transpose_weights_const); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 1 }, { 1 }); auto fc_compressed = std::make_shared(input1, transpose_weights, no_bias, transpose_scale, zp_const); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); @@ -179,8 +181,9 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed6) { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 4, 4, 32 }, { 1 }); auto convert = std::make_shared(weights_const, ov::element::f32); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 1, 32 }, { 1 }); - auto sub = std::make_shared(convert, zp_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 1, 32 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f32); + auto sub = std::make_shared(convert, zp_convert); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 1, 32 }, { 1 }); auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 16, -1 }); @@ -202,7 +205,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed6) { auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 32 }, { 1 }); auto transpose_zp_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_zp = std::make_shared(zp_const, transpose_zp_const); auto fc_compressed = std::make_shared(input1, transpose_weights, no_bias, transpose_scale, transpose_zp); @@ -216,8 +219,9 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed7) { auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 4, 4, 32 }, { 1 }); auto convert = std::make_shared(weights_const, ov::element::f16); - auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 1, 32 }, { 1 }); - auto sub = std::make_shared(convert, zp_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 1, 32 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f16); + auto sub = std::make_shared(convert, zp_convert); auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 1, 32 }, { 1 }); auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 16, -1 }); @@ -239,7 +243,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed7) { auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); - auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 32 }, { 1 }); auto transpose_zp_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_zp = std::make_shared(zp_const, transpose_zp_const); auto fc_compressed = std::make_shared(input1, transpose_weights, no_bias, transpose_scale, transpose_zp); @@ -332,8 +336,9 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed8) { { auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 4, 4, 32 }, { 1 }); auto convert = std::make_shared(weights_const, ov::element::f16); - auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 1, 32 }, { 1 }); - auto sub = std::make_shared(convert, zp_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 1, 32 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f16); + auto sub = std::make_shared(convert, zp_convert); auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 1, 32 }, { 1 }); auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 16, -1 }); @@ -373,7 +378,8 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed8) { auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); - auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 32 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f16); auto transpose_zp_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_zp = std::make_shared(zp_const, transpose_zp_const); @@ -402,6 +408,33 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed8) { } TEST_F(TransformationTestsF, ConvertFCToCompressed9) { + { + auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto convert = std::make_shared(weights_const, ov::element::f16); + auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto sub = std::make_shared(convert, zp_const); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto scale = std::make_shared(sub, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto no_bias = std::make_shared(); + auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); + + model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); + } +} + +TEST_F(TransformationTestsF, ConvertFCToCompressed10) { { auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp index a76c07622f40a5..c8183d7dbf2798 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp @@ -21,7 +21,7 @@ align_input_info(const std::shared_ptr& model, const std::map &in_info_ref, const std::unordered_map &matched_op); -// get set nodes of subgraph after start_node +// get set nodes of subgraph after start_node void get_subgraph_set_node(std::unordered_set>& nodes_to_check, const std::shared_ptr& node); @@ -51,14 +51,12 @@ generate_model(ov::NodeVector& nodes, auto orig_node_name = node->get_friendly_name(); cloned_node_map.insert({ orig_node_name, clone_node(node, is_copy_constants, false, orig_node_name) }); - + // create temporary vector to fill node output indexes std::vector out_ports(node->outputs().size()); std::iota(out_ports.begin(), out_ports.end(), 0); // fill by all nodes with output ports - model_output_nodes.insert({ - orig_node_name, - std::unordered_set(out_ports.begin(), out_ports.end()) }); + model_output_nodes.insert({orig_node_name, std::unordered_set(out_ports.begin(), out_ports.end())}); if (!ov::op::util::is_output(node) && !ov::op::util::is_constant(node) && !ov::op::util::is_parameter(node)) { @@ -83,7 +81,7 @@ generate_model(ov::NodeVector& nodes, if (orig_node_to_check.get_node()->shared_from_this() == node) { auto orig_in_node_name = orig_in_node->get_friendly_name(); auto cloned_in_node = cloned_node->get_input_node_shared_ptr(in_idx); - // if op input node is in subgraph replace parameters + // if op input node is in subgraph replace parameters // in cloned node by other nodes from the map if (cloned_node_map.count(orig_in_node_name)) { auto orig_in_node = cloned_node_map[orig_in_node_name]; @@ -192,17 +190,14 @@ generate_model(ov::NodeVector& nodes, } auto h1 = std::hash{}(string_to_hash); model->set_friendly_name(std::to_string(h1)); - { - auto it = nodes.begin(); - while (it != nodes.end()) { - if (cloned_node_map.count((*it)->get_friendly_name())) { - nodes.erase(it); - } else { - ++it; - } + for (auto it = nodes.begin(); it != nodes.end();) { + if (cloned_node_map.count((*it)->get_friendly_name())) { + it = nodes.erase(it); + } else { + ++it; } } - + return { model, model_input_info }; } diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_embedding.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_embedding.py index c0c8fd9dada071..04695d5ba7c723 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_embedding.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_embedding.py @@ -42,6 +42,8 @@ def create_keras_emb_net(self, input_names, input_shapes, input_type, input_dim, @pytest.mark.precommit def test_keras_emb_float32(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): + if ie_device == 'CPU': + pytest.skip('155622: OpenVINO runtime timeout on CPU') self._test(*self.create_keras_emb_net(**params, ir_version=ir_version), ie_device, precision, temp_dir=temp_dir, ir_version=ir_version, use_legacy_frontend=use_legacy_frontend, **params) @@ -62,6 +64,8 @@ def test_keras_emb_float32(self, params, ie_device, precision, ir_version, temp_ @pytest.mark.precommit def test_keras_emb_without_zero_mask_float32(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): + if ie_device == 'CPU': + pytest.skip('155622: OpenVINO runtime timeout on CPU') self._test(*self.create_keras_emb_net(**params, ir_version=ir_version), ie_device, precision, temp_dir=temp_dir, ir_version=ir_version, use_legacy_frontend=use_legacy_frontend, **params) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py b/tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py index 11c1d2a2ffed17..103f99e3f4043f 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py @@ -126,6 +126,8 @@ def test_bias_add_placeholder_const_4D(self, params, ie_device, precision, ir_ve @pytest.mark.nightly def test_bias_add_2_consts_4D(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): + if ie_device == 'CPU': + pytest.skip('155622: OpenVINO runtime timeout on CPU') self._test(*self.create_bias_add_2_consts_net(**params, ir_version=ir_version, use_legacy_frontend=use_legacy_frontend), ie_device, precision, ir_version, temp_dir=temp_dir,