From 345c87234533a2be8262523f2ba40b886ebbecc5 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Tue, 6 Aug 2024 15:30:49 +0400 Subject: [PATCH 01/24] [GPU] Fixed moving a temporary object (#25924) ### Details: - *Fixed extra `std::move()` warning-error from https://github.com/openvinotoolkit/openvino/pull/25886* ### Tickets: - *N/A* --- src/plugins/intel_gpu/src/graph/kv_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/kv_cache.cpp b/src/plugins/intel_gpu/src/graph/kv_cache.cpp index 42972dfc7b7879..1927054faa9bf0 100644 --- a/src/plugins/intel_gpu/src/graph/kv_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/kv_cache.cpp @@ -38,7 +38,7 @@ std::vector kv_cache_inst::calc_output_layouts(kv_cache_node const& /*no if (desc->num_outputs > 1) input_shapes.push_back(impl_param.get_input_layout(2).get()); - std::vector output_shapes = std::move(shape_infer(&op, input_shapes)); + std::vector output_shapes = shape_infer(&op, input_shapes); static const std::map ports_map = {{0, 0}, {1, 2}}; From d35911031932a436b6164584d82b29ac283d94c4 Mon Sep 17 00:00:00 2001 From: Ujjayant Kadian <118752727+ujjayant-kadian@users.noreply.github.com> Date: Tue, 6 Aug 2024 12:40:03 +0100 Subject: [PATCH 02/24] NPUW: Removing unnecessary std::moves from the decompression patterns (#25925) ### Details: - Cleaning up unnecessary std::moves from the patterns. ### Tickets: - *ticket-id* --- .../npuw/partitioning/patterns/dcoff.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp index 4654c9e18b2e72..156f22e59514b4 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp @@ -207,7 +207,7 @@ bool DCOFFPassBase::matcher_callback(ov::pass::pattern::Matcher& m) { LOG_DEBUG("Matched: " << matched_paramB << " - parameter to remove..."); // Record mapping from the Scale coeff paramter to the Real weight parameter - m_params_to.get().scales[matched_paramB] = std::move(matched_paramA); + m_params_to.get().scales[matched_paramB] = matched_paramA; // Disconnect Multiply and Convert from their outputs auto matched_mulply = node_to_output.at(mulply).get_node_shared_ptr(); @@ -220,8 +220,8 @@ bool DCOFFPassBase::matcher_callback(ov::pass::pattern::Matcher& m) { } }; LOG_DEBUG("Dropping the connections..."); - drop_outputs(std::move(matched_mulply)); - drop_outputs(std::move(matched_convrt)); + drop_outputs(matched_mulply); + drop_outputs(matched_convrt); LOG_DEBUG("Reconnecting the root..."); reconnect_root_to_convert(m); @@ -352,8 +352,8 @@ bool DCOFFPassBase::matcher_callback(ov::pass::pattern::Matcher& m) { // it can be probably eliminated as well) // Record mapping from the Scale coeff paramter to the Real weight parameter - m_params_to.get().zerops[matched_paramA] = std::move(matched_valueB); - m_params_to.get().scales[matched_paramC] = std::move(matched_paramA); + m_params_to.get().zerops[matched_paramA] = matched_valueB; + m_params_to.get().scales[matched_paramC] = matched_paramA; // Disconnect Multiply and Convert from their outputs auto matched_mulply = node_to_output.at(mulply).get_node_shared_ptr(); @@ -366,8 +366,8 @@ bool DCOFFPassBase::matcher_callback(ov::pass::pattern::Matcher& m) { } }; LOG_DEBUG("Dropping the connections..."); - drop_outputs(std::move(matched_mulply)); - drop_outputs(std::move(matched_convrt)); + drop_outputs(matched_mulply); + drop_outputs(matched_convrt); LOG_DEBUG("Reconnecting the root..."); reconnect_root(m); @@ -484,8 +484,8 @@ DCOFFPassReshape2::DCOFFPassReshape2(DCOffMode dcoff_mode, ov::element::Type dco // Reshape will be reconnected to Convert directly // Record mapping from the Scale coeff parameter to the Real weight parameter - pref.get().zerops[matched_paramA] = std::move(matched_valueB); - pref.get().scales[matched_paramC] = std::move(matched_paramA); + pref.get().zerops[matched_paramA] = matched_valueB; + pref.get().scales[matched_paramC] = matched_paramA; // Disconnect Multiply and Convert from their outputs auto matched_mulply = node_to_output.at(mulply).get_node_shared_ptr(); @@ -498,7 +498,7 @@ DCOFFPassReshape2::DCOFFPassReshape2(DCOffMode dcoff_mode, ov::element::Type dco } }; LOG_DEBUG("Dropping the connections..."); - drop_outputs(std::move(matched_mulply)); + drop_outputs(matched_mulply); drop_outputs(matched_convrt); LOG_DEBUG("Reconnecting the Root..."); From bc6daa3075c16a17cec4be07ed96fcaf76adf582 Mon Sep 17 00:00:00 2001 From: Nashez Zubair <35090095+nashez@users.noreply.github.com> Date: Tue, 6 Aug 2024 17:59:11 +0530 Subject: [PATCH 03/24] [OV JS] Expose Model.clone to Node.js Api (#25763) ### Details: Changes as part of this PR include: * Add a ModelWrap::clone function: Calls the underlying Model.clone function * Add a ModelWrap::Wrap function to return the cloned model as a Napi::Value * Update the addon.ts file with the clone method * Add unit tests for the clone Api resolves #25402 ### Tickets: - #25402 --------- Signed-off-by: Nashez Zubair --- src/bindings/js/node/include/model_wrap.hpp | 7 +++++++ src/bindings/js/node/lib/addon.ts | 4 ++++ src/bindings/js/node/src/model_wrap.cpp | 15 +++++++++++++++ src/bindings/js/node/tests/unit/model.test.js | 18 ++++++++++++++++++ 4 files changed, 44 insertions(+) diff --git a/src/bindings/js/node/include/model_wrap.hpp b/src/bindings/js/node/include/model_wrap.hpp index 42da58aa76f5e0..1d8aaf5afdd421 100644 --- a/src/bindings/js/node/include/model_wrap.hpp +++ b/src/bindings/js/node/include/model_wrap.hpp @@ -116,6 +116,13 @@ class ModelWrap : public Napi::ObjectWrap { */ Napi::Value get_output_element_type(const Napi::CallbackInfo& info); + /** + * @brief Returns a cloned model for the current model + * @param info Contains information about the environment and passed arguments + * @return Napi::Value Cloned model returned from the API + */ + Napi::Value clone(const Napi::CallbackInfo& info); + private: std::shared_ptr _model; ov::Core _core; diff --git a/src/bindings/js/node/lib/addon.ts b/src/bindings/js/node/lib/addon.ts index 3c07b95455c5c6..88bd874210dbcc 100644 --- a/src/bindings/js/node/lib/addon.ts +++ b/src/bindings/js/node/lib/addon.ts @@ -214,6 +214,10 @@ interface CoreConstructor { * A user-defined model read by {@link Core.readModel}. */ interface Model { + /** + * It returns a cloned model. + */ + clone(): Model; /** * It gets the friendly name for a model. If a friendly name is not set * via {@link Model.setFriendlyName}, a unique model name is returned. diff --git a/src/bindings/js/node/src/model_wrap.cpp b/src/bindings/js/node/src/model_wrap.cpp index b53170c5246f80..88baf9da021b74 100644 --- a/src/bindings/js/node/src/model_wrap.cpp +++ b/src/bindings/js/node/src/model_wrap.cpp @@ -27,6 +27,7 @@ Napi::Function ModelWrap::get_class(Napi::Env env) { InstanceMethod("getFriendlyName", &ModelWrap::get_friendly_name), InstanceMethod("getOutputShape", &ModelWrap::get_output_shape), InstanceMethod("getOutputElementType", &ModelWrap::get_output_element_type), + InstanceMethod("clone", &ModelWrap::clone), InstanceAccessor<&ModelWrap::get_inputs>("inputs"), InstanceAccessor<&ModelWrap::get_outputs>("outputs")}); } @@ -189,3 +190,17 @@ Napi::Value ModelWrap::get_output_element_type(const Napi::CallbackInfo& info) { return info.Env().Undefined(); } } + +Napi::Value ModelWrap::clone(const Napi::CallbackInfo& info) { + std::vector allowed_signatures; + try { + if (ov::js::validate(info, allowed_signatures)) { + return cpp_to_js(info.Env(), _model->clone()); + } else { + OPENVINO_THROW("'clone'", ov::js::get_parameters_error_msg(info, allowed_signatures)); + } + } catch (const std::exception& e) { + reportError(info.Env(), e.what()); + return info.Env().Undefined(); + } +} diff --git a/src/bindings/js/node/tests/unit/model.test.js b/src/bindings/js/node/tests/unit/model.test.js index d5ac4f163367bb..7728f13a25dce9 100644 --- a/src/bindings/js/node/tests/unit/model.test.js +++ b/src/bindings/js/node/tests/unit/model.test.js @@ -9,6 +9,7 @@ const { getModelPath } = require('./utils.js'); const testXml = getModelPath().xml; const core = new ov.Core(); const model = core.readModelSync(testXml); +const clonedModel = model.clone(); describe('Node.js Model.isDynamic()', () => { it('should return a boolean value indicating if the model is dynamic', () => { @@ -157,3 +158,20 @@ describe('Model.getOutputElementType()', () => { ); }); }); + +describe('Model.clone()', () => { + it('should return an object of type model', () => { + assert.ok(clonedModel instanceof ov.Model, 'clone() should return a model'); + }); + + it('should return a model that is a clone of the calling model', () => { + assert.deepStrictEqual(clonedModel, model, "Cloned Model should be exactly equal to the calling model"); + }); + + it('should not accept any arguments', () => { + assert.throws( + () => model.clone("Unexpected argument").then(), + /'clone' method called with incorrect parameters./ + ); + }); +}); From 32f6d8a1045bad82f91e84a62a5a118f615b3913 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 17:04:58 +0400 Subject: [PATCH 04/24] Bump urllib3 from 1.26.18 to 1.26.19 in /docs (#25084) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.18 to 1.26.19.
Release notes

Sourced from urllib3's releases.

1.26.19

🚀 urllib3 is fundraising for HTTP/2 support

urllib3 is raising ~$40,000 USD to release HTTP/2 support and ensure long-term sustainable maintenance of the project after a sharp decline in financial support for 2023. If your company or organization uses Python and would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and thousands of other projects please consider contributing financially to ensure HTTP/2 support is developed sustainably and maintained for the long-haul.

Thank you for your support.

Changes

  • Added the Proxy-Authorization header to the list of headers to strip from requests when redirecting to a different host. As before, different headers can be set via Retry.remove_headers_on_redirect.

Full Changelog: https://github.com/urllib3/urllib3/compare/1.26.18...1.26.19

Note that due to an issue with our release automation, no multiple.intoto.jsonl file is available for this release.

Changelog

Sourced from urllib3's changelog.

1.26.19 (2024-06-17)

  • Added the Proxy-Authorization header to the list of headers to strip from requests when redirecting to a different host. As before, different headers can be set via Retry.remove_headers_on_redirect.
  • Fixed handling of OpenSSL 3.2.0 new error message for misconfiguring an HTTP proxy as HTTPS. ([#3405](https://github.com/urllib3/urllib3/issues/3405) <https://github.com/urllib3/urllib3/issues/3405>__)
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=urllib3&package-manager=pip&previous-version=1.26.18&new-version=1.26.19)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/openvinotoolkit/openvino/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 98328772f48c60..bec013e9997f50 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -45,5 +45,5 @@ sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.9 toml==0.10.2 -urllib3==1.26.18 +urllib3==1.26.19 zipp==3.4.1 \ No newline at end of file From 07b6cb06f85c197bf034bb3a5f9f5b0d8c95744b Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Tue, 6 Aug 2024 15:42:46 +0200 Subject: [PATCH 05/24] DOCS Highlight selected option (#25935) Changed sidebar.js for bolding main menu items Changed css for highligting selected option --- .../benchmarks_files/OV-benchmark-data.csv | 2 +- docs/sphinx_setup/_static/css/custom.css | 17 ++++++++++++++++- docs/sphinx_setup/_static/js/open_sidebar.js | 5 +++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/docs/sphinx_setup/_static/benchmarks_files/OV-benchmark-data.csv b/docs/sphinx_setup/_static/benchmarks_files/OV-benchmark-data.csv index dfdd15997dc38d..8422cd5fd89e90 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/OV-benchmark-data.csv +++ b/docs/sphinx_setup/_static/benchmarks_files/OV-benchmark-data.csv @@ -526,4 +526,4 @@ stable-diffusion-v2-1,OV-2024.3.0,accel,Intel® Arc™ A770M dGPU,,,,,,321,150,1 stable-diffusion-v2-1,OV-2024.3.0,accel,Intel® Data Center GPU Flex 140 dGPU,,,,,,1900,75,1,1900,75,13.59,"Generation time, sec.",Generation time/$,Generation time/TDP,"Generation time, sec.",13.31,,,,, stable-diffusion-v2-1,OV-2024.3.0,core-iGPU,Intel® Core™ i7-1360P iGPU-only,,,,,,480,28,1,480,28,20.85,"Generation time, sec.",Generation time/$,Generation time/TDP,"Generation time, sec.",19.95,,,,, stable-diffusion-v2-1,OV-2024.3.0,core-iGPU,Intel® Core™ Ultra 7 processor 165H iGPU-only,,,,,,460,28,1,460,28,12.98,"Generation time, sec.",Generation time/$,Generation time/TDP,"Generation time, sec.",12.41,,,,, -end_rec,,,,,,,,,,,,,,,,,,,,,,,, +end_rec,,,,,,,,,,,,,,,,,,,,,,,, \ No newline at end of file diff --git a/docs/sphinx_setup/_static/css/custom.css b/docs/sphinx_setup/_static/css/custom.css index 18586f01964db9..aac910c5ecd04d 100644 --- a/docs/sphinx_setup/_static/css/custom.css +++ b/docs/sphinx_setup/_static/css/custom.css @@ -116,6 +116,10 @@ a#wap_dns {display: none;} font-weight: 600; } +.bold { + font-weight: 700; +} + /* Underline width */ a:hover, .toc-entry a.nav-link:hover, @@ -126,7 +130,6 @@ nav.bd-links li > a:hover { #bd-docs-nav div ul a:hover { color: white; - text-decoration: underline } ul#navbar-main-elements > li:hover { @@ -140,6 +143,18 @@ nav.bd-links .current>a { color: black; } +a.current { + background-color: #76CEFF!important; +} + +.bd-sidebar-primary label.toctree-toggle:hover { + background: #76CEFF!important; +} + +a.current svg:not(:host).svg-inline--fa, svg:not(:root).svg-inline--fa { + color: black; +} + .bd-header .navbar-nav li a.nav-link { color: #fff; font-size: 1rem; diff --git a/docs/sphinx_setup/_static/js/open_sidebar.js b/docs/sphinx_setup/_static/js/open_sidebar.js index 6f01a7b2d49614..66ddf98c97403e 100644 --- a/docs/sphinx_setup/_static/js/open_sidebar.js +++ b/docs/sphinx_setup/_static/js/open_sidebar.js @@ -3,4 +3,9 @@ $(document).ready(function() { for(let i = 0; i < labels.length; i++){ labels[i].classList.remove("rotate"); } + + const menus = $( "ul.bd-sidenav > li > a" ); + for(let i = 0; i < menus.length; i++){ + menus[i].classList.add("bold"); + } }) \ No newline at end of file From 7abac66881771f2f7f327d516ea95622a3c47008 Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Tue, 6 Aug 2024 16:12:44 +0200 Subject: [PATCH 06/24] DOCS Port Added button to select number of rows to master (#25939) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- docs/sphinx_setup/_static/js/openVinoDataTables.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/sphinx_setup/_static/js/openVinoDataTables.js b/docs/sphinx_setup/_static/js/openVinoDataTables.js index 0934d10ed90353..59e750220e20e6 100644 --- a/docs/sphinx_setup/_static/js/openVinoDataTables.js +++ b/docs/sphinx_setup/_static/js/openVinoDataTables.js @@ -2,9 +2,14 @@ $(document).ready(function () { var table = $('table.modeldata').DataTable({ "autoWidth": false, stateSave: true, + lengthMenu: [ + [10, 25, 50, -1], + ['10 rows', '25 rows', '50 rows', 'Show all rows'] + ], layout: { topStart: { buttons: [ + 'pageLength', 'colvis', { extend: 'colvisGroup', @@ -18,7 +23,6 @@ $(document).ready(function () { columns: ':visible' } } - ] } } From 95f104fe01a6d5aedeadb6c30a30ebfc23fad0d8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 14:28:12 +0000 Subject: [PATCH 07/24] Bump certifi from 2023.7.22 to 2024.7.4 in /docs (#25411) Bumps [certifi](https://github.com/certifi/python-certifi) from 2023.7.22 to 2024.7.4.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=certifi&package-manager=pip&previous-version=2023.7.22&new-version=2024.7.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/openvinotoolkit/openvino/network/alerts).
> **Note** > Automatic rebases have been disabled on this pull request as it has been open for over 30 days. Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ilya Lavrenov --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index bec013e9997f50..5703503a9ba158 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -4,7 +4,7 @@ attrs==22.1.0 Babel==2.11.0 beautifulsoup4==4.9.3 breathe==4.35.0 -certifi==2023.7.22 +certifi==2024.7.4 colorama==0.4.6 Cython==0.29.33 docutils==0.20 From 515c2130006935d6bcdd1bda603fd4551c6bb302 Mon Sep 17 00:00:00 2001 From: KianYong Gan Date: Tue, 6 Aug 2024 23:01:24 +0800 Subject: [PATCH 08/24] [NPU] Free serialized model earlier to reduce peak mem (#25684) Free serialized model earlier to reduce the memory consumption This screenshot showing the benchmark running in local, using `unet-camvid-onnx-0001.xml` ![image](https://github.com/user-attachments/assets/45667db9-2aab-43c2-ab5a-c63e852616ee) print_memory_usage() ![image](https://github.com/user-attachments/assets/08eb9243-316f-4e72-99e3-84902efdcb83) ### Details: - *item1* - *...* ### Tickets: - *ticket-147118* --- .../include/zero_compiler_in_driver.hpp | 19 +++ .../compiler/src/zero_compiler_in_driver.cpp | 139 ++++++++++++------ 2 files changed, 112 insertions(+), 46 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp index 24ccf0353f7c0a..9885699c4a2623 100644 --- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp +++ b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp @@ -58,6 +58,11 @@ class LevelZeroCompilerInDriver final : public ICompiler { NetworkDescription compile(const std::shared_ptr& model, const Config& config) const override final; + ze_result_t seriazlideIRModelAndCreateGraph(const std::shared_ptr& model, + const Config& config, + ze_device_graph_properties_t deviceGraphProperties, + ze_graph_handle_t& graphHandle) const; + NetworkMetadata parse(const std::vector& network, const Config& config) const override final; std::vector process_profiling_output(const std::vector& profData, @@ -140,11 +145,25 @@ class LevelZeroCompilerInDriver final : public ICompiler { IONodeDescriptorMap& results, IONodeDescriptorMap& state) const; + template = true> + ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr& model, + const Config& config, + ze_device_graph_properties_t deviceGraphProperties, + const ze_device_handle_t& _deviceHandle, + ze_graph_query_network_handle_t& hGraphQueryNetwork) const; + // ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory) template = true> std::unordered_set queryImpl(const std::shared_ptr& model, const Config& config) const; + template = true> + ze_result_t seriazlideIRModelAndQueryNetworkCreateV1(const std::shared_ptr& model, + const Config& config, + ze_device_graph_properties_t deviceGraphProperties, + const ze_device_handle_t& _deviceHandle, + ze_graph_query_network_handle_t& hGraphQueryNetwork) const; + // ext version == 1.3 && 1.4, support API (pfnQueryNetworkCreate, pfnQueryNetworkDestroy, // pfnQueryNetworkGetSupportedLayers) template = true> diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp index c61d5ab0760ac3..0cdf2c961925b9 100644 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp @@ -562,24 +562,15 @@ std::unordered_set LevelZeroCompilerInDriver::query return std::unordered_set(); } -// For ext version == 1.3 && == 1.4, query is supported, calling querynetwork api in _graphDdiTableExt +// For ext version == 1.3 && == 1.4 template template > -std::unordered_set LevelZeroCompilerInDriver::queryImpl( +ze_result_t LevelZeroCompilerInDriver::seriazlideIRModelAndQueryNetworkCreateV1( const std::shared_ptr& model, - const Config& config) const { - _logger.debug("queryImpl - Calling queryNetwork of 1.3 version."); - - ze_device_graph_properties_t deviceGraphProperties{}; - auto result = _graphDdiTableExt->pfnDeviceGetGraphProperties(_deviceHandle, &deviceGraphProperties); - if (ZE_RESULT_SUCCESS != result) { - OPENVINO_THROW("L0 pfnDeviceGetGraphProperties", - " result: ", - ze_result_to_string(result), - ", code 0x", - std::hex, - uint64_t(result)); - } + const Config& config, + ze_device_graph_properties_t deviceGraphProperties, + const ze_device_handle_t& _deviceHandle, + ze_graph_query_network_handle_t& hGraphQueryNetwork) const { ze_graph_compiler_version_info_t& compilerVersion = deviceGraphProperties.compilerVersion; auto serializedIR = serializeIR(model, compilerVersion); @@ -594,21 +585,20 @@ std::unordered_set LevelZeroCompilerInDriver::query serializedIR.first, serializedIR.second.get(), buildFlags.c_str()}; - ze_graph_query_network_handle_t hGraphQueryNetwork = nullptr; // Create querynetwork handle - result = _graphDdiTableExt->pfnQueryNetworkCreate(_context, _deviceHandle, &desc, &hGraphQueryNetwork); + ze_result_t result = _graphDdiTableExt->pfnQueryNetworkCreate(_context, _deviceHandle, &desc, &hGraphQueryNetwork); - return getQueryResultFromSupportedLayers(result, hGraphQueryNetwork); + return result; } -// For ext version >= 1.5 +// For ext version == 1.3 && == 1.4, query is supported, calling querynetwork api in _graphDdiTableExt template -template > +template > std::unordered_set LevelZeroCompilerInDriver::queryImpl( const std::shared_ptr& model, const Config& config) const { - _logger.debug("queryImpl - Calling queryNetwork of 1.5 version."); + _logger.debug("queryImpl - Calling queryNetwork of 1.3 version."); ze_device_graph_properties_t deviceGraphProperties{}; auto result = _graphDdiTableExt->pfnDeviceGetGraphProperties(_deviceHandle, &deviceGraphProperties); @@ -620,6 +610,27 @@ std::unordered_set LevelZeroCompilerInDriver::query std::hex, uint64_t(result)); } + + ze_graph_query_network_handle_t hGraphQueryNetwork = nullptr; + + result = seriazlideIRModelAndQueryNetworkCreateV1(model, + config, + deviceGraphProperties, + _deviceHandle, + hGraphQueryNetwork); + + return getQueryResultFromSupportedLayers(result, hGraphQueryNetwork); +} + +// For ext version >= 1.5 +template +template > +ze_result_t LevelZeroCompilerInDriver::seriazlideIRModelAndQueryNetworkCreateV2( + const std::shared_ptr& model, + const Config& config, + ze_device_graph_properties_t deviceGraphProperties, + const ze_device_handle_t& _deviceHandle, + ze_graph_query_network_handle_t& hGraphQueryNetwork) const { ze_graph_compiler_version_info_t& compilerVersion = deviceGraphProperties.compilerVersion; auto serializedIR = serializeIR(model, compilerVersion); @@ -636,10 +647,38 @@ std::unordered_set LevelZeroCompilerInDriver::query buildFlags.c_str(), ZE_GRAPH_FLAG_NONE}; + // Create querynetwork handle + ze_result_t result = _graphDdiTableExt->pfnQueryNetworkCreate2(_context, _deviceHandle, &desc, &hGraphQueryNetwork); + + return result; +} + +// For ext version >= 1.5 +template +template > +std::unordered_set LevelZeroCompilerInDriver::queryImpl( + const std::shared_ptr& model, + const Config& config) const { + _logger.debug("queryImpl - Calling queryNetwork of 1.5 version."); + + ze_device_graph_properties_t deviceGraphProperties{}; + auto result = _graphDdiTableExt->pfnDeviceGetGraphProperties(_deviceHandle, &deviceGraphProperties); + if (ZE_RESULT_SUCCESS != result) { + OPENVINO_THROW("L0 pfnDeviceGetGraphProperties", + " result: ", + ze_result_to_string(result), + ", code 0x", + std::hex, + uint64_t(result)); + } + ze_graph_query_network_handle_t hGraphQueryNetwork = nullptr; - // Create querynetwork handle - result = _graphDdiTableExt->pfnQueryNetworkCreate2(_context, _deviceHandle, &desc, &hGraphQueryNetwork); + result = seriazlideIRModelAndQueryNetworkCreateV2(model, + config, + deviceGraphProperties, + _deviceHandle, + hGraphQueryNetwork); return getQueryResultFromSupportedLayers(result, hGraphQueryNetwork); } @@ -759,6 +798,36 @@ ze_result_t LevelZeroCompilerInDriver::createGraph(const ze_grap // Create querynetwork handle return _graphDdiTableExt->pfnCreate2(_context, _deviceHandle, &desc, graph); } +template +ze_result_t LevelZeroCompilerInDriver::seriazlideIRModelAndCreateGraph( + const std::shared_ptr& model, + const Config& config, + ze_device_graph_properties_t deviceGraphProperties, + ze_graph_handle_t& graphHandle) const { + const ze_graph_compiler_version_info_t& compilerVersion = deviceGraphProperties.compilerVersion; + auto serializedIR = serializeIR(model, compilerVersion); + + ze_graph_format_t format = ZE_GRAPH_FORMAT_NGRAPH_LITE; + + std::string buildFlags; + + buildFlags += serializeIOInfo(model); + buildFlags += " "; + buildFlags += serializeConfig(config, const_cast(compilerVersion)); + + _logger.debug("compileIR Build flags : %s", buildFlags.c_str()); + + // If OV cache is enabled, disable driver caching + uint32_t flags = ZE_GRAPH_FLAG_NONE; + const auto set_cache_dir = config.get(); + if (!set_cache_dir.empty()) { + flags = flags | ZE_GRAPH_FLAG_DISABLE_CACHING; + } + + _logger.info("compileIR Using extension version: %s", typeid(TableExtension).name()); + ze_result_t result = createGraph(format, serializedIR, buildFlags, flags, &graphHandle); + return result; +} template NetworkDescription LevelZeroCompilerInDriver::compile(const std::shared_ptr& model, @@ -776,33 +845,11 @@ NetworkDescription LevelZeroCompilerInDriver::compile(const std: std::hex, uint64_t(result)); } - ze_graph_compiler_version_info_t& compilerVersion = deviceGraphProperties.compilerVersion; - - auto serializedIR = serializeIR(model, compilerVersion); - - ze_graph_format_t format = ZE_GRAPH_FORMAT_NGRAPH_LITE; - - std::string buildFlags; - - buildFlags += serializeIOInfo(model); - buildFlags += " "; - buildFlags += serializeConfig(config, compilerVersion); - - _logger.debug("compile Build flags : %s", buildFlags.c_str()); - // TODO #-30202 Store graph_handle inside NetworkDesc instead of blob. But this will require changes in zeroAPI // Graph handle should be used only in scope of compile / parse functions. ze_graph_handle_t graphHandle; - // If OV cache is enabled, disable driver caching - uint32_t flags = ZE_GRAPH_FLAG_NONE; - const auto set_cache_dir = config.get(); - if (!set_cache_dir.empty()) { - flags = flags | ZE_GRAPH_FLAG_DISABLE_CACHING; - } - - _logger.info("compile Using extension version: %s", typeid(TableExtension).name()); - result = createGraph(format, serializedIR, buildFlags, flags, &graphHandle); + result = seriazlideIRModelAndCreateGraph(model, config, deviceGraphProperties, graphHandle); OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, "Failed to compile network. L0 createGraph", From 37f6374bc0454d90351f4aa472856639bdad0076 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 6 Aug 2024 19:15:58 +0400 Subject: [PATCH 09/24] [TF FE] Support LookupTableSizeV2 operation (#25892) **Details:** This operation is required for four TensorFlow Hub models: universal-sentence-encoder and random-nnlm **Ticket:** TBD --------- Signed-off-by: Kazantsev, Roman --- .../tensorflow/docs/supported_ops.md | 4 +- .../frontend/tensorflow/hash_table.hpp | 4 - .../tensorflow/src/op/lookup_table_size.cpp | 50 ++++++++++++ src/frontends/tensorflow/src/op_table.cpp | 3 + .../test_tf_LookupTableSize.py | 76 +++++++++++++++++++ 5 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 src/frontends/tensorflow/src/op/lookup_table_size.cpp create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_LookupTableSize.py diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md index cced96c6122685..014becd0d62bdd 100644 --- a/src/frontends/tensorflow/docs/supported_ops.md +++ b/src/frontends/tensorflow/docs/supported_ops.md @@ -601,8 +601,8 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | LookupTableInsert | YES | | | LookupTableInsertV2 | YES | | | LookupTableRemoveV2 | NO | | -| LookupTableSize | NO | | -| LookupTableSizeV2 | NO | | +| LookupTableSize | YES | | +| LookupTableSizeV2 | YES | | | LoopCond | YES | | | LowerBound | NO | | | Lu | NO | | diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/hash_table.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/hash_table.hpp index 131055369fcd3e..beecb75e733f56 100644 --- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/hash_table.hpp +++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/hash_table.hpp @@ -35,10 +35,6 @@ class HashTable : public Variable { : HashTable(other) { m_keys = keys; m_values = values; - // reset names of tensor corresponding to variable value - // that is because variable can have multiple values during inference - m_keys.set_names({}); - m_values.set_names({}); m_is_initialized = true; ++m_init_counter; } diff --git a/src/frontends/tensorflow/src/op/lookup_table_size.cpp b/src/frontends/tensorflow/src/op/lookup_table_size.cpp new file mode 100644 index 00000000000000..42a52d8319d426 --- /dev/null +++ b/src/frontends/tensorflow/src/op/lookup_table_size.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/frontend/tensorflow/hash_table.hpp" +#include "openvino/frontend/tensorflow/node_context.hpp" +#include "openvino/frontend/tensorflow/variable.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/squeeze.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; +using namespace ov::frontend::tensorflow; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_lookup_table_size_op(const NodeContext& node) { + default_op_checks(node, 1, {"LookupTableSize", "LookupTableSizeV2"}); + auto table_handle = as_type_ptr(node.get_input_by_reference(0).get_node_shared_ptr()); + TENSORFLOW_OP_VALIDATION( + node, + table_handle, + "[TensorFlow Frontend] internal error: LookupTableSize operation expects table_handle by the first input"); + + auto all_keys = table_handle->get_keys(); + + // reshape all keys to 1D tensor to work it further + auto target_shape = make_shared(element::i32, Shape{1}, -1); + all_keys = make_shared(all_keys, target_shape, false); + + // compute size of records in HashTable + // table size must be a scalar + ov::Output table_size = make_shared(all_keys, element::i64); + auto squeeze_axis = make_shared(element::i32, Shape{1}, 0); + table_size = make_shared(table_size, squeeze_axis); + set_node_name(node.get_name(), table_size.get_node_shared_ptr()); + + return {table_size}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index f62d55a05fc520..ea0e4bd2643d39 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -97,6 +97,7 @@ TF_OP_CONVERTER(translate_iterator_get_next_op); TF_OP_CONVERTER(translate_iterator_op); TF_OP_CONVERTER(translate_lookup_table_import_op); TF_OP_CONVERTER(translate_lookup_table_find_op); +TF_OP_CONVERTER(translate_lookup_table_size_op); TF_OP_CONVERTER(translate_loop_cond_op); TF_OP_CONVERTER(translate_merge_op); TF_OP_CONVERTER(translate_mergev2checkpoint_op); @@ -301,6 +302,8 @@ const std::map get_supported_ops() { {"LookupTableImportV2", CreatorFunction(translate_lookup_table_import_op)}, {"LookupTableInsert", CreatorFunction(translate_no_op)}, {"LookupTableInsertV2", CreatorFunction(translate_no_op)}, + {"LookupTableSize", CreatorFunction(translate_lookup_table_size_op)}, + {"LookupTableSizeV2", CreatorFunction(translate_lookup_table_size_op)}, {"LRN", CreatorFunction(translate_lrn_op)}, {"MatMul", CreatorFunction(translate_mat_mul_op)}, {"MatrixBandPart", CreatorFunction(translate_matrix_band_part_op)}, diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LookupTableSize.py b/tests/layer_tests/tensorflow_tests/test_tf_LookupTableSize.py new file mode 100644 index 00000000000000..e0050c245f1321 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_LookupTableSize.py @@ -0,0 +1,76 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import platform +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest +from common.utils.tf_utils import mix_array_with_value, run_in_jenkins + +rng = np.random.default_rng() + + +class TestLookupTableSizeOps(CommonTFLayerTest): + def _prepare_input(self, _): + inputs_data = {} + inputs_data['all_keys:0'] = np.array(self.all_keys).astype(self.keys_type) + return inputs_data + + def create_lookup_table_size_net(self, hash_table_type, keys_type, values_type, + all_keys, all_values): + hash_table_op = tf.raw_ops.HashTable if hash_table_type == 0 else tf.raw_ops.HashTableV2 + import_table_op = tf.raw_ops.LookupTableImport if hash_table_type == 0 else tf.raw_ops.LookupTableImportV2 + size_table_op = tf.raw_ops.LookupTableSize if hash_table_type == 0 else tf.raw_ops.LookupTableSizeV2 + + self.keys_type = keys_type + self.all_keys = all_keys + if keys_type == str: + keys_type = tf.string + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + all_keys = tf.compat.v1.placeholder(keys_type, [len(all_keys)], 'all_keys') + all_values = tf.constant(all_values, dtype=values_type) + hash_table = hash_table_op(key_dtype=keys_type, value_dtype=values_type) + import_hash_table = import_table_op(table_handle=hash_table, keys=all_keys, + values=all_values) + with tf.control_dependencies([import_hash_table]): + size_table_op(table_handle=hash_table, name='LookupTableSize') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data = [ + dict(keys_type=np.int32, values_type=np.float32, all_keys=[0, 1, 2, 3, 4, 5], + all_values=[2.0, 13.0, -2.0, 0.0, 3.0, 1.0]), + dict(keys_type=np.int64, values_type=np.int32, all_keys=[0, 1, 2, 3, 4, 5], + all_values=[2, 13, -2, 0, 3, 1]), + dict(keys_type=np.int32, values_type=np.float32, all_keys=[2, 0, 3, -2, 4, 10], + all_values=[2.0, 13.0, -2.0, 0.0, 3.0, 1.0]), + dict(keys_type=np.int64, values_type=np.float32, all_keys=[2, 0, 3, -2, 4, 10], + all_values=[2.0, 13.0, -2.0, 0.0, 3.0, 1.0]), + dict(keys_type=np.int32, values_type=tf.string, all_keys=[20, 10, 33, -22, 44, 11], + all_values=['PyTorch', 'TensorFlow', 'JAX', 'Lightning', 'MindSpore', 'OpenVINO']), + dict(keys_type=str, values_type=np.int64, + all_keys=['PyTorch', 'TensorFlow', 'JAX', 'Lightning', 'MindSpore', 'OpenVINO'], + all_values=[200, 100, 0, -3, 10, 1]), + dict(keys_type=str, values_type=np.int32, + all_keys=['First sentence', 'Second one', '', 'Third', 'Fourth Sentence', 'etc.'], + all_values=[-1, 2, 0, -3, 0, 1]), + ] + + @pytest.mark.parametrize("hash_table_type", [0, 1]) + @pytest.mark.parametrize("params", test_data) + @pytest.mark.precommit + @pytest.mark.nightly + def test_lookup_table_size(self, hash_table_type, params, ie_device, precision, ir_version, temp_dir, + use_legacy_frontend): + keys_type = params['keys_type'] + if ie_device == 'GPU' and keys_type == str: + pytest.skip("148921: Segmentation fault on GPU") + self._test(*self.create_lookup_table_size_net(hash_table_type=hash_table_type, **params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_legacy_frontend=use_legacy_frontend) From 7bc728359e760eb2c17532e04226b4ec5eba33f9 Mon Sep 17 00:00:00 2001 From: Dmitry Matveev Date: Tue, 6 Aug 2024 16:27:50 +0100 Subject: [PATCH 10/24] NPUW: Handle fp16 in constant fusion routines (#25919) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../intel_npu/src/plugin/npuw/partitioning/partitioning.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp index 954c868e4bc887..efd61140e080b2 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp @@ -1226,6 +1226,7 @@ void Partitioner::saveRepeatedConstants(const std::string& func_name) { HANDLE_CASE(u4, uint8_t); HANDLE_CASE(i32, int); HANDLE_CASE(i64, int64_t); + HANDLE_CASE(f16, uint16_t); HANDLE_CASE(f32, float); #undef HANDLE_CASE default: @@ -1246,7 +1247,8 @@ void Partitioner::saveRepeatedConstants(const std::string& func_name) { if ((((proto_shape.size() == 0 || (proto_shape.size() == 1 && proto_shape[0] <= 10)) && proto_node->output(0).get_element_type().is_integral()) || - (proto_node->output(0).get_element_type() == ov::element::f32 && + ((proto_node->output(0).get_element_type() == ov::element::f32 || + proto_node->output(0).get_element_type() == ov::element::f16) && std::accumulate(proto_shape.begin(), proto_shape.end(), size_t{1}, std::multiplies()) == 1)) && std::all_of(instances.begin(), instances.end(), [&](const CTPtr& other_node) -> bool { From f19282fe8106bf0f90c229d957020330dee560e8 Mon Sep 17 00:00:00 2001 From: River Li Date: Tue, 6 Aug 2024 23:45:40 +0800 Subject: [PATCH 11/24] [dGPU] avoid strided_slice to be executed in cpu (#25601) ### Details: - Big input tensor to StrideSlice primitive is executed in CPU will lead to huge performance drop. - *...* ### Tickets: - CVS-147088 --------- Co-authored-by: Pavel Durandin --- .../src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp index aec9e8b5f497e6..3599e68301da29 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp @@ -7,6 +7,7 @@ #include "reshape_inst.h" #include "eltwise_inst.h" #include "select_inst.h" +#include "strided_slice_inst.h" #include "gather_inst.h" #include "pass_manager.h" @@ -78,6 +79,13 @@ bool mark_shape_of_subgraphs::can_mark_node(const program_node& node) { return false; } + // Exclude stride_slice primitive if it's input is big const ternsor, else CPU reference implementation + // will lead to huge performance drop. + if (node.is_type() && node.get_dependency(0).is_constant() && + node.get_dependency(0).get_output_layout().count() > 1024 * 1024) { + return false; + } + auto available_impls = node.type()->get_available_impls(node); auto cpu_impl_found = available_impls.find(impl_types::cpu) != available_impls.end(); From 5ec4375e291247d93836a49cd33659459dba31f6 Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Tue, 6 Aug 2024 18:47:48 +0200 Subject: [PATCH 12/24] Fix docs code snippets (#25864) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .github/workflows/code_snippets.yml | 9 +- .../snippets/{ => gpu}/context_sharing.cpp | 0 .../snippets/{ => gpu}/context_sharing_va.cpp | 0 .../{ => gpu}/context_sharing_va_c.cpp | 0 .../preprocessing_nv12_single_plane.cpp | 0 .../{ => gpu}/preprocessing_nv12_to_gray.cpp | 0 .../preprocessing_nv12_two_planes.cpp | 0 .../preprocessing_nv12_two_planes_c.cpp | 0 .../snippets/{ => gpu}/queue_sharing.cpp | 0 .../{ => gpu}/remote_objects_creation.cpp | 0 .../{ => gpu}/remote_objects_creation_c.cpp | 0 .../assets/snippets/multi_threading.cpp | 2 +- .../snippets/npu_remote_objects_creation.cpp | 2 +- .../assets/snippets/ov_dynamic_shapes.c | 36 +++--- .../assets/snippets/ov_patterns.cpp | 110 +++++++----------- .../ov_sparse_weights_decompression.cpp | 2 +- docs/snippets/CMakeLists.txt | 10 +- 17 files changed, 75 insertions(+), 96 deletions(-) rename docs/articles_en/assets/snippets/{ => gpu}/context_sharing.cpp (100%) rename docs/articles_en/assets/snippets/{ => gpu}/context_sharing_va.cpp (100%) rename docs/articles_en/assets/snippets/{ => gpu}/context_sharing_va_c.cpp (100%) rename docs/articles_en/assets/snippets/{ => gpu}/preprocessing_nv12_single_plane.cpp (100%) rename docs/articles_en/assets/snippets/{ => gpu}/preprocessing_nv12_to_gray.cpp (100%) rename docs/articles_en/assets/snippets/{ => gpu}/preprocessing_nv12_two_planes.cpp (100%) rename docs/articles_en/assets/snippets/{ => gpu}/preprocessing_nv12_two_planes_c.cpp (100%) rename docs/articles_en/assets/snippets/{ => gpu}/queue_sharing.cpp (100%) rename docs/articles_en/assets/snippets/{ => gpu}/remote_objects_creation.cpp (100%) rename docs/articles_en/assets/snippets/{ => gpu}/remote_objects_creation_c.cpp (100%) diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml index af6758bafc11fc..ae5f9ee25624d3 100644 --- a/.github/workflows/code_snippets.yml +++ b/.github/workflows/code_snippets.yml @@ -46,4 +46,11 @@ jobs: run: cmake -DCMAKE_BUILD_TYPE=Release -DTHREADING=SEQ -B build - name: Build snippets - run: cmake --build build --target openvino_docs_snippets --parallel + if: ${{ runner.os == 'Linux' || runner.os == 'macOS'}} + run: cmake --build build --target openvino_docs_snippets --parallel $(nproc) + + - name: Build snippets Windows + if: ${{ runner.os == 'Windows'}} + shell: pwsh + run: cmake --build build --target openvino_docs_snippets --parallel $ENV:NUMBER_OF_PROCESSORS + diff --git a/docs/articles_en/assets/snippets/context_sharing.cpp b/docs/articles_en/assets/snippets/gpu/context_sharing.cpp similarity index 100% rename from docs/articles_en/assets/snippets/context_sharing.cpp rename to docs/articles_en/assets/snippets/gpu/context_sharing.cpp diff --git a/docs/articles_en/assets/snippets/context_sharing_va.cpp b/docs/articles_en/assets/snippets/gpu/context_sharing_va.cpp similarity index 100% rename from docs/articles_en/assets/snippets/context_sharing_va.cpp rename to docs/articles_en/assets/snippets/gpu/context_sharing_va.cpp diff --git a/docs/articles_en/assets/snippets/context_sharing_va_c.cpp b/docs/articles_en/assets/snippets/gpu/context_sharing_va_c.cpp similarity index 100% rename from docs/articles_en/assets/snippets/context_sharing_va_c.cpp rename to docs/articles_en/assets/snippets/gpu/context_sharing_va_c.cpp diff --git a/docs/articles_en/assets/snippets/preprocessing_nv12_single_plane.cpp b/docs/articles_en/assets/snippets/gpu/preprocessing_nv12_single_plane.cpp similarity index 100% rename from docs/articles_en/assets/snippets/preprocessing_nv12_single_plane.cpp rename to docs/articles_en/assets/snippets/gpu/preprocessing_nv12_single_plane.cpp diff --git a/docs/articles_en/assets/snippets/preprocessing_nv12_to_gray.cpp b/docs/articles_en/assets/snippets/gpu/preprocessing_nv12_to_gray.cpp similarity index 100% rename from docs/articles_en/assets/snippets/preprocessing_nv12_to_gray.cpp rename to docs/articles_en/assets/snippets/gpu/preprocessing_nv12_to_gray.cpp diff --git a/docs/articles_en/assets/snippets/preprocessing_nv12_two_planes.cpp b/docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes.cpp similarity index 100% rename from docs/articles_en/assets/snippets/preprocessing_nv12_two_planes.cpp rename to docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes.cpp diff --git a/docs/articles_en/assets/snippets/preprocessing_nv12_two_planes_c.cpp b/docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes_c.cpp similarity index 100% rename from docs/articles_en/assets/snippets/preprocessing_nv12_two_planes_c.cpp rename to docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes_c.cpp diff --git a/docs/articles_en/assets/snippets/queue_sharing.cpp b/docs/articles_en/assets/snippets/gpu/queue_sharing.cpp similarity index 100% rename from docs/articles_en/assets/snippets/queue_sharing.cpp rename to docs/articles_en/assets/snippets/gpu/queue_sharing.cpp diff --git a/docs/articles_en/assets/snippets/remote_objects_creation.cpp b/docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp similarity index 100% rename from docs/articles_en/assets/snippets/remote_objects_creation.cpp rename to docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp diff --git a/docs/articles_en/assets/snippets/remote_objects_creation_c.cpp b/docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp similarity index 100% rename from docs/articles_en/assets/snippets/remote_objects_creation_c.cpp rename to docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp diff --git a/docs/articles_en/assets/snippets/multi_threading.cpp b/docs/articles_en/assets/snippets/multi_threading.cpp index eae2b2e6326945..6b1db124ec6020 100644 --- a/docs/articles_en/assets/snippets/multi_threading.cpp +++ b/docs/articles_en/assets/snippets/multi_threading.cpp @@ -18,7 +18,7 @@ int main() { auto compiled_model_1 = core.compile_model(model, device, ov::inference_num_threads(1)); // Use logical processors of Efficient-cores for inference on hybrid platform - auto compiled_model_2 = core.compile_model(model, device, ov::hint::scheduling_core_type(ECORE_ONLY)); + auto compiled_model_2 = core.compile_model(model, device, ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)); // Use one logical processor per CPU core for inference when hyper threading is on auto compiled_model_3 = core.compile_model(model, device, ov::hint::enable_hyper_threading(false)); diff --git a/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp b/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp index 75eb50839ca117..e8267e5d44cb4c 100644 --- a/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp +++ b/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp @@ -42,7 +42,7 @@ int main() { { //! [wrap_dmabuf_fd] - int32_t fd_heap; // create the DMA-BUF System Heap file descriptor + int32_t fd_heap = 0; // create the DMA-BUF System Heap file descriptor auto remote_tensor = npu_context.create_tensor(in_element_type, in_shape, fd_heap); //! [wrap_dmabuf_fd] } diff --git a/docs/articles_en/assets/snippets/ov_dynamic_shapes.c b/docs/articles_en/assets/snippets/ov_dynamic_shapes.c index 7e720dfc5dc457..fa1f3158365ddf 100644 --- a/docs/articles_en/assets/snippets/ov_dynamic_shapes.c +++ b/docs/articles_en/assets/snippets/ov_dynamic_shapes.c @@ -61,14 +61,14 @@ ov_model_t* model = NULL; ov_core_read_model(core, "model.xml", NULL, &model); //! [ov_dynamic_shapes:print_dynamic] -ov_output_port_t* output_port = NULL; -ov_output_port_t* input_port = NULL; +ov_output_const_port_t* output_port = NULL; +ov_output_const_port_t* input_port = NULL; ov_partial_shape_t partial_shape; -char * str_partial_shape = NULL; +const char * str_partial_shape = NULL; // Print output partial shape { -ov_model_output(model, &output_port); +ov_model_const_output(model, &output_port); ov_port_get_partial_shape(output_port, &partial_shape); str_partial_shape = ov_partial_shape_to_string(partial_shape); printf("The output partial shape: %s", str_partial_shape); @@ -76,7 +76,7 @@ printf("The output partial shape: %s", str_partial_shape); // Print input partial shape { -ov_model_input(model, &input_port); +ov_model_const_input(model, &input_port); ov_port_get_partial_shape(input_port, &partial_shape); str_partial_shape = ov_partial_shape_to_string(partial_shape); printf("The input partial shape: %s", str_partial_shape); @@ -85,8 +85,8 @@ printf("The input partial shape: %s", str_partial_shape); // free allocated resource ov_free(str_partial_shape); ov_partial_shape_free(&partial_shape); -ov_output_port_free(output_port); -ov_output_port_free(input_port); +ov_output_const_port_free(output_port); +ov_output_const_port_free(input_port); //! [ov_dynamic_shapes:print_dynamic] ov_model_free(model); ov_core_free(core); @@ -98,15 +98,15 @@ ov_core_create(&core); //! [ov_dynamic_shapes:detect_dynamic] ov_model_t* model = NULL; -ov_output_port_t* input_port = NULL; -ov_output_port_t* output_port = NULL; +ov_output_const_port_t* input_port = NULL; +ov_output_const_port_t* output_port = NULL; ov_partial_shape_t partial_shape; ov_core_read_model(core, "model.xml", NULL, &model); // for input { -ov_model_input_by_index(model, 0, &input_port); +ov_model_const_input_by_index(model, 0, &input_port); ov_port_get_partial_shape(input_port, &partial_shape); if (ov_partial_shape_is_dynamic(partial_shape)) { // input is dynamic @@ -115,7 +115,7 @@ if (ov_partial_shape_is_dynamic(partial_shape)) { // for output { -ov_model_output_by_index(model, 0, &output_port); +ov_model_const_output_by_index(model, 0, &output_port); ov_port_get_partial_shape(output_port, &partial_shape); if (ov_partial_shape_is_dynamic(partial_shape)) { // output is dynamic @@ -124,8 +124,8 @@ if (ov_partial_shape_is_dynamic(partial_shape)) { // free allocated resource ov_partial_shape_free(&partial_shape); -ov_output_port_free(input_port); -ov_output_port_free(output_port); +ov_output_const_port_free(input_port); +ov_output_const_port_free(output_port); //! [ov_dynamic_shapes:detect_dynamic] ov_model_free(model); ov_core_free(core); @@ -147,8 +147,8 @@ ov_infer_request_t* infer_request = NULL; ov_compiled_model_create_infer_request(compiled_model, &infer_request); //! [ov_dynamic_shapes:set_input_tensor] -ov_output_port_t* input_port = NULL; -ov_element_type_e* type = NULL; +ov_output_const_port_t* input_port = NULL; +ov_element_type_e type = UNDEFINED; ov_shape_t input_shape_1; ov_tensor_t* input_tensor_1 = NULL; ov_tensor_t* output_tensor = NULL; @@ -163,8 +163,8 @@ void* data_2 = NULL; // Create tensor compatible with the model input // Shape {1, 128} is compatible with any reshape statements made in previous examples { -ov_model_input(model, &input_port); -ov_port_get_element_type(input_port, type); +ov_model_const_input(model, &input_port); +ov_port_get_element_type(input_port, &type); int64_t dims[2] = {1, 128}; ov_shape_create(2, dims, &input_shape_1); ov_tensor_create(type, input_shape_1, &input_tensor_1); @@ -214,7 +214,7 @@ ov_tensor_get_shape(output_tensor, &output_shape_2); // ... read values in data_2 according to the shape output_shape_2 // free resource -ov_output_port_free(input_port); +ov_output_const_port_free(input_port); ov_shape_free(&input_shape_1); ov_tensor_free(input_tensor_1); ov_shape_free(&output_shape_1); diff --git a/docs/articles_en/assets/snippets/ov_patterns.cpp b/docs/articles_en/assets/snippets/ov_patterns.cpp index 0382468a01c0e7..ee52c733019d39 100644 --- a/docs/articles_en/assets/snippets/ov_patterns.cpp +++ b/docs/articles_en/assets/snippets/ov_patterns.cpp @@ -2,9 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // ! [ov:imports] -#include - -#include "common_test_utils/matcher.hpp" #include "openvino/op/abs.hpp" #include "openvino/op/add.hpp" #include "openvino/op/matmul.hpp" @@ -22,7 +19,7 @@ using namespace std; // ! [ov:imports] // ! [ov:create_simple_model_and_pattern] -TEST(pattern, simple_model_and_pattern) { +void create_simple_model_and_pattern() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -39,17 +36,13 @@ TEST(pattern, simple_model_and_pattern) { auto pattern_abs = std::make_shared(pattern_mul->output(0)); auto pattern_relu = std::make_shared(pattern_abs->output(0)); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // Should perfectly match - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); + // pattern_relu should perfectly match model_relu } // ! [ov:create_simple_model_and_pattern] // ! [ov:create_simple_model_and_pattern_wrap_type] -TEST(pattern, simple_model_and_pattern_wrap_type) { +void create_simple_model_and_pattern_wrap_type() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -66,17 +59,13 @@ TEST(pattern, simple_model_and_pattern_wrap_type) { auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // Should perfectly match - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); + // pattern_relu should perfectly match model_relu } // ! [ov:create_simple_model_and_pattern_wrap_type] // ! [ov:wrap_type_list] -TEST(pattern, wrap_type_list) { +void wrap_type_list() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -95,45 +84,42 @@ TEST(pattern, wrap_type_list) { auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // The same pattern perfectly matches 2 different nodes - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); - ASSERT_TRUE(tm.match(pattern_relu, model_sig)); + // pattern_relu should perfectly matches model_relu and model_sig } // ! [ov:wrap_type_list] void patterns_misc() { -// ! [ov:any_input] - auto pattern_mul = ov::pass::pattern::wrap_type({pattern::any_input(), pattern::any_input()}); - auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); - auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); -// ! [ov:any_input] - -// ! [ov:wrap_type_predicate] - ov::pass::pattern::wrap_type({pattern::any_input()}, pattern::consumers_count(2)); -// ! [ov:wrap_type_predicate] - - -// ! [ov:any_input_predicate] - auto pattern_mul = ov::pass::pattern::wrap_type({pattern::any_input([](const Output& value){ - return value.get_shape().size() == 4;}), - pattern::any_input([](const Output& value){ - return value.get_shape().size() == 4;})}); - auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); - auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); -// ! [ov:any_input_predicate] - - -// ! [ov:optional_predicate] - auto pattern_sig_opt = ov::pass::pattern::optional(pattern_relu, pattern::consumers_count(2)); -// ! [ov:optional_predicate] +{ + // ! [ov:any_input] + auto pattern_mul = ov::pass::pattern::wrap_type({pattern::any_input(), pattern::any_input()}); + auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); + auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); + // ! [ov:any_input] + + // ! [ov:wrap_type_predicate] + ov::pass::pattern::wrap_type({pattern::any_input()}, pattern::consumers_count(2)); + // ! [ov:wrap_type_predicate] +} +{ + // ! [ov:any_input_predicate] + auto pattern_mul = ov::pass::pattern::wrap_type({pattern::any_input([](const Output& value){ + return value.get_shape().size() == 4;}), + pattern::any_input([](const Output& value){ + return value.get_shape().size() == 4;})}); + auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); + auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); + // ! [ov:any_input_predicate] + + + // ! [ov:optional_predicate] + auto pattern_sig_opt = ov::pass::pattern::optional(pattern_relu, pattern::consumers_count(2)); + // ! [ov:optional_predicate] +} } // ! [ov:pattern_or] -TEST(pattern, pattern_or) { +void pattern_or() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -158,17 +144,13 @@ TEST(pattern, pattern_or) { // Create Or node auto pattern_or = std::make_shared(OutputVector{red_pattern_sigmoid->output(0), blue_pattern_relu->output(0)}); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // The same pattern perfectly matches 2 different nodes - ASSERT_TRUE(tm.match(pattern_or, model_relu)); + // pattern_or should perfectly matches model_relu } // ! [ov:pattern_or] // ! [ov:pattern_optional_middle] -TEST(pattern, pattern_optional_middle) { +void pattern_optional_middle() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -186,17 +168,13 @@ TEST(pattern, pattern_optional_middle) { auto pattern_sig_opt = ov::pass::pattern::optional({pattern_abs->output(0)}); auto pattern_relu = ov::pass::pattern::wrap_type({pattern_sig_opt->output(0)}); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // Should perfectly match - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); + // pattern_relu should perfectly match model_relu } // ! [ov:pattern_optional_middle] // ! [ov:pattern_optional_top] -TEST(pattern, pattern_optional_top) { +void pattern_optional_top() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -214,17 +192,13 @@ TEST(pattern, pattern_optional_top) { auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // Should perfectly match - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); + // pattern_relu should perfectly match model_relu } // ! [ov:pattern_optional_top] // ! [ov:pattern_optional_root] -TEST(pattern, pattern_optional_root) { +void pattern_optional_root() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -242,10 +216,6 @@ TEST(pattern, pattern_optional_root) { auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); auto pattern_sig_opt = ov::pass::pattern::optional(pattern_relu); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // Should perfectly match - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); + // pattern_relu should perfectly match model_relu } // ! [ov:pattern_optional_root] \ No newline at end of file diff --git a/docs/articles_en/assets/snippets/ov_sparse_weights_decompression.cpp b/docs/articles_en/assets/snippets/ov_sparse_weights_decompression.cpp index 61fc1d05ac8d42..81822a8366d96c 100644 --- a/docs/articles_en/assets/snippets/ov_sparse_weights_decompression.cpp +++ b/docs/articles_en/assets/snippets/ov_sparse_weights_decompression.cpp @@ -11,7 +11,7 @@ int main() { ov::AnyMap config; //! [ov:intel_cpu:sparse_weights_decompression:part0] ov::Core core; // Step 1: create ov::Core object - core.set_property(ov::intel_cpu::sparse_weights_decompression_rate(0.8)); // Step 1b: Enable sparse weights decompression feature + core.set_property(ov::intel_cpu::sparse_weights_decompression_rate(0.8f)); // Step 1b: Enable sparse weights decompression feature auto model = core.read_model(modelPath); // Step 2: Read Model //... // Step 3: Prepare inputs/outputs //... // Step 4: Set device configuration diff --git a/docs/snippets/CMakeLists.txt b/docs/snippets/CMakeLists.txt index e21443b7782137..f853d07328373b 100644 --- a/docs/snippets/CMakeLists.txt +++ b/docs/snippets/CMakeLists.txt @@ -18,12 +18,14 @@ endif() file(GLOB SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c") -file(GLOB GPU_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../articles_en/assets/snippets/*.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/../articles_en/assets/snippets/*.c") +file(GLOB GPU_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/../articles_en/assets/snippets/gpu/*.cpp") # add GPU snippets if OpenCL has been found if(TARGET OpenCL::OpenCL) - list(APPEND SOURCES ${GPU_SOURCES}) + list(APPEND SOURCES ${GPU_SOURCES}) endif() # try to find VA libraries @@ -38,7 +40,7 @@ endif() # remove OpenCV related sources find_package(OpenCV QUIET COMPONENTS core imgcodecs) if(NOT OpenCV_FOUND OR NOT OpenCV_VERSION VERSION_GREATER_EQUAL 3) - list(REMOVE_ITEM SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/ShapeInference.cpp") + list(REMOVE_ITEM SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/../articles_en/assets/snippets/ShapeInference.cpp") endif() # requires mfxFrameSurface1 and MSS API From decdac6fff3fe849560bdb91ca17b2b2ad64f6f5 Mon Sep 17 00:00:00 2001 From: Damian Kurek Date: Tue, 6 Aug 2024 18:49:54 +0200 Subject: [PATCH 13/24] [GPU] Use parallel sum reduction in RMS BFYX OPT kernel (#25896) ### Details: - Use parallel sum reduction for RMS BFYX OPT kernel - Improve heuristics ### Tickets: - 148937 Co-authored-by: Pavel Durandin --- .../kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl | 13 ++++++++----- .../kernels/rms/rms_kernel_bfyx_opt.cpp | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl index 14a1fa76a29986..eb8d6d823ad39c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl @@ -92,12 +92,15 @@ KERNEL(rms_gpu_bfyx_opt)( slm_buf[get_sub_group_id()] = rms; barrier(CLK_LOCAL_MEM_FENCE); - if (in_data_idx == 0) { - for (uint i = 1; i < get_num_sub_groups(); ++i) - { - rms += slm_buf[i]; + for (uint offset = get_num_sub_groups() / 2; offset > 0; offset /= 2) { + if (in_data_idx < offset) { + slm_buf[in_data_idx] += slm_buf[in_data_idx + offset]; } - rms = rms / data_size; + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (in_data_idx == 0) { + rms = slm_buf[0] / data_size; slm_buf[0] = native_powr(sqrt(rms + TO_ACCUMULATOR_TYPE(EPSILON)), -1); } barrier(CLK_LOCAL_MEM_FENCE); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp index 6c86863c61aeb7..8d9cda302cf277 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp @@ -120,7 +120,7 @@ RMSKernelBase::DispatchData RMSKernelBfyxOpt::SetDefault(const rms_params& param dispatchData.itemsNum = dispatchData.dataSize; // Compute maximum possible LWS that does not exceed device capabilities and optimizes number of global memory reads - while ((dispatchData.itemsNum > 32 || dispatchData.lws[0] < dispatchData.itemsNum) && (2 * dispatchData.lws[0] <= max_lws)) { + while ((dispatchData.itemsNum > 8 || dispatchData.lws[0] < dispatchData.itemsNum) && (2 * dispatchData.lws[0] <= max_lws)) { dispatchData.lws[0] *= 2; dispatchData.itemsNum /= 2; } From a04986a82ae7f4817fe32fb6b7b6f6093b03ecee Mon Sep 17 00:00:00 2001 From: Egor Tyuvaev Date: Tue, 6 Aug 2024 22:48:45 +0200 Subject: [PATCH 14/24] Fix ONNX frontend code generation (#25937) ### Details ONNX frontend has code generation when `BUILD_SHARED_LIBS=OFF` which targets a file inside source directory. When configuring the project, the generated file is written to the sources directory. If two configurations are running at the same time (i.e. when trying to build the package with `vcpkg` it configures debug and release builds concurrently), both `cmake` instances append lines to the same file inside source directory which leads to corrupted file content and cryptic build errors. This change makes the script write generated header file to a directory inside build directory, so that two different configuration processes don't interfere with each other. --- src/frontends/onnx/frontend/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontends/onnx/frontend/CMakeLists.txt b/src/frontends/onnx/frontend/CMakeLists.txt index 0ceeec8f7606a3..80fd16e2ed6483 100644 --- a/src/frontends/onnx/frontend/CMakeLists.txt +++ b/src/frontends/onnx/frontend/CMakeLists.txt @@ -4,7 +4,7 @@ if(NOT BUILD_SHARED_LIBS) file(GLOB_RECURSE op_list "src/op/*.cpp") - set(static_reg_file "src/static_reg.hpp") + set(static_reg_file ${CMAKE_CURRENT_BINARY_DIR}/static_reg.hpp) file(WRITE ${static_reg_file} "// Copyright (C) 2018-2024 Intel Corporation\n// SPDX-License-Identifier: Apache-2.0\n// Auto generated file, DO NOT EDIT INLINE\n\n") file(APPEND ${static_reg_file} "#include \"core/operator_set.hpp\"\n\n") file(APPEND ${static_reg_file} "#define ONNX_DECL_OP(op) extern ov::OutputVector op(const Node&)\n\n") From 1dfb571b8ceef0e988694bd0e6b0bc58d0822eb0 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 7 Aug 2024 10:45:03 +0400 Subject: [PATCH 15/24] [GPU] Minor refactoring (#25907) ### Details: - move update of shape info and dispatch data into single `update` method to avoid shape_info fill for non-ocl impls - move `implementation_map.hpp` header to impls/registry folder - return device info by ref instead of copy - small headers cleanup - added missing default c-tors for some primitives --- .../intel_gpu/primitives/implementation_desc.hpp | 1 - .../intel_gpu/primitives/non_max_suppression.hpp | 2 ++ .../include/intel_gpu/primitives/unique.hpp | 4 ++++ .../intel_gpu/include/intel_gpu/runtime/device.hpp | 2 +- .../intel_gpu/include/intel_gpu/runtime/engine.hpp | 2 +- .../graph/graph_optimizer/post_optimize_weights.cpp | 2 +- .../prepare_primitive_fusing_through.cpp | 1 - .../intel_gpu/src/graph/impls/common/condition.cpp | 2 +- .../intel_gpu/src/graph/impls/common/loop.cpp | 3 +-- .../src/graph/impls/common/wait_for_events.cpp | 4 ++-- .../intel_gpu/src/graph/impls/cpu/activation.cpp | 6 ++---- .../intel_gpu/src/graph/impls/cpu/assign.cpp | 3 +-- .../intel_gpu/src/graph/impls/cpu/broadcast.cpp | 6 ++---- .../intel_gpu/src/graph/impls/cpu/concat.cpp | 6 ++---- src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp | 6 ++---- .../src/graph/impls/cpu/detection_output.cpp | 2 +- .../intel_gpu/src/graph/impls/cpu/eltwise.cpp | 6 ++---- .../intel_gpu/src/graph/impls/cpu/gather.cpp | 6 ++---- .../src/graph/impls/cpu/non_max_suppression.cpp | 2 +- .../intel_gpu/src/graph/impls/cpu/proposal.cpp | 7 ++----- src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp | 6 ++---- .../intel_gpu/src/graph/impls/cpu/read_value.cpp | 3 +-- .../intel_gpu/src/graph/impls/cpu/reduce.cpp | 4 ++-- .../intel_gpu/src/graph/impls/cpu/reorder.cpp | 6 ++---- .../src/graph/impls/cpu/scatter_update.cpp | 6 ++---- .../intel_gpu/src/graph/impls/cpu/select.cpp | 6 ++---- .../intel_gpu/src/graph/impls/cpu/shape_of.cpp | 6 ++---- .../intel_gpu/src/graph/impls/cpu/strided_slice.cpp | 6 ++---- src/plugins/intel_gpu/src/graph/impls/cpu/tile.cpp | 6 ++---- .../src/graph/impls/ocl/multi_stage_primitive.hpp | 13 ++++++++++++- .../src/graph/impls/ocl/primitive_base.hpp | 13 ++++++++++++- .../src/graph/impls/onednn/concatenation_onednn.cpp | 2 +- .../src/graph/impls/onednn/convolution_onednn.cpp | 2 +- .../src/graph/impls/onednn/deconvolution_onednn.cpp | 2 +- .../src/graph/impls/onednn/eltwise_onednn.cpp | 2 +- .../graph/impls/onednn/fully_connected_onednn.cpp | 2 +- .../src/graph/impls/onednn/gemm_onednn.cpp | 2 +- .../src/graph/impls/onednn/pooling_onednn.cpp | 2 +- .../src/graph/impls/onednn/reduction_onednn.cpp | 2 +- .../src/graph/impls/onednn/reorder_onednn.cpp | 2 +- .../registry}/implementation_map.hpp | 0 .../intel_gpu/src/graph/include/primitive_inst.h | 8 ++++---- .../src/graph/include/primitive_type_base.h | 2 +- .../intel_gpu/src/graph/include/to_string_utils.h | 2 -- src/plugins/intel_gpu/src/graph/input_layout.cpp | 1 - .../intel_gpu/src/graph/layout_optimizer.cpp | 1 - src/plugins/intel_gpu/src/graph/loop.cpp | 1 - src/plugins/intel_gpu/src/graph/multinomial.cpp | 1 - src/plugins/intel_gpu/src/graph/primitive_inst.cpp | 7 ++----- .../src/graph/scaled_dot_product_attention.cpp | 1 - src/plugins/intel_gpu/src/runtime/engine.cpp | 2 +- .../intel_gpu/src/runtime/ocl/ocl_device.hpp | 8 +------- .../src/runtime/ocl/ocl_device_detector.cpp | 3 +-- .../tests/unit/module_tests/device_test.cpp | 2 +- .../module_tests/weights_reorder_factory_test.cpp | 2 +- 55 files changed, 92 insertions(+), 115 deletions(-) rename src/plugins/intel_gpu/src/graph/{include => impls/registry}/implementation_map.hpp (100%) diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp index eb51b1dfb37307..4e5c53d6b37e3e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp @@ -9,7 +9,6 @@ #include "openvino/core/except.hpp" #include "intel_gpu/primitives/primitive.hpp" -#include "intel_gpu/runtime/tensor.hpp" namespace cldnn { diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp index 2a0b81b2aba20d..b2497c6d711d7b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp @@ -160,6 +160,8 @@ struct non_max_suppression : public primitive_base { struct non_max_suppression_gather : primitive_base { CLDNN_DECLARE_PRIMITIVE(non_max_suppression_gather) + non_max_suppression_gather() : primitive_base("", {}) {} + /// @brief Constructs non_max_suppression_gather primitive. /// @param id This primitive id. /// @param inputs Input primitives ids. diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/unique.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/unique.hpp index 5dc7e61bc21734..5563bf8acf54d5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/unique.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/unique.hpp @@ -13,6 +13,8 @@ namespace cldnn { struct unique_count : primitive_base { CLDNN_DECLARE_PRIMITIVE(unique_count) + unique_count() : primitive_base("", {}) {} + /// @brief Constructs unique_count primitive. /// @param id This primitive id. /// @param input Input primitive id. @@ -45,6 +47,8 @@ struct unique_count : primitive_base { struct unique_gather : primitive_base { CLDNN_DECLARE_PRIMITIVE(unique_gather) + unique_gather() : primitive_base("", {}) {} + /// @brief Constructs unique_gather primitive. /// @param id This primitive id. /// @param inputs Input primitives ids. diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp index d0105b0e83a028..63f0311f675123 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp @@ -18,7 +18,7 @@ const uint32_t INTEL_VENDOR_ID = 0x8086; struct device { public: using ptr = std::shared_ptr; - virtual device_info get_info() const = 0; + virtual const device_info& get_info() const = 0; virtual memory_capabilities get_mem_caps() const = 0; virtual bool is_same(const device::ptr other) = 0; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp index 7e77ceb6785cb5..79e37d1890b78d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp @@ -94,7 +94,7 @@ class engine { bool supports_allocation(allocation_type type) const; /// Returns device structure which represents stores device capabilities - device_info get_device_info() const; + const device_info& get_device_info() const; /// Returns device object associated with the engine const device::ptr get_device() const; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp index f7b4db99afa092..5441d4a7930a51 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp @@ -4,7 +4,7 @@ #include "pass_manager.h" #include "program_helpers.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "convolution_inst.h" #include "deconvolution_inst.h" diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp index 64895c4b6f2814..f63f1bf4efbe21 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "intel_gpu/runtime/error_handler.hpp" #include "pass_manager.h" #include "program_helpers.h" #include "strided_slice_inst.h" diff --git a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp index 49b2ab5aa38c0b..300d93bc96f708 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp @@ -4,7 +4,7 @@ #include "condition_inst.h" #include "data_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" #include diff --git a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp index b11fb675f76196..f8aac08a07af04 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp @@ -2,11 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // #include "loop_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" #include "mutable_data_inst.h" #include "input_layout_inst.h" -#include "intel_gpu/runtime/error_handler.hpp" #include #include diff --git a/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp b/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp index d8cce52a0aa167..35b433933d1295 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp @@ -6,7 +6,7 @@ #include "data_inst.h" #include "prior_box_inst.h" #include "input_layout_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" #include @@ -54,7 +54,7 @@ class wait_for_events_impl : public primitive_impl { return make_unique(prior_box); } - void update_dispatch_data(const kernel_impl_params& impl_param) override { } + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override { } }; namespace detail { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp index 16084f47efea1f..e750303b955d77 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp @@ -5,9 +5,7 @@ #include "openvino/core/type/element_type_traits.hpp" #include "register.hpp" #include "activation_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/power.hpp" #include "openvino/op/tanh.hpp" @@ -290,7 +288,7 @@ struct activation_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const activation_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp index 7d11374f178c23..d03c49fb28efbe 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp @@ -3,9 +3,8 @@ // #include "assign_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" -#include "intel_gpu/runtime/error_handler.hpp" namespace cldnn { namespace cpu { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp index 515615f700a847..79a6b77f442cba 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "broadcast_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/broadcast.hpp" @@ -124,7 +122,7 @@ struct broadcast_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const broadcast_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp index 85fd52fa3a24b6..6b7a483bae7d8c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "concatenation_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/concat.hpp" @@ -111,7 +109,7 @@ struct concatenation_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const concatenation_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp index 7b58dcdb20010c..6633bca02da8d2 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp @@ -6,9 +6,7 @@ #include "register.hpp" #include "crop_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/slice.hpp" @@ -113,7 +111,7 @@ struct crop_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const crop_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index 364ac62d1d1510..c2a01b56c63740 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -3,7 +3,7 @@ // #include "detection_output_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" #include "cpu_impl_helpers.hpp" diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp index 67fd065412fe12..eb10f340d2656b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "eltwise_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/add.hpp" #include "openvino/op/multiply.hpp" @@ -205,7 +203,7 @@ struct eltwise_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const eltwise_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp index 5a6de3fd749e4f..242273a23dd000 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "gather_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/gather.hpp" @@ -114,7 +112,7 @@ struct gather_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const gather_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp index f38efcd5c0d30c..4783159d501404 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp @@ -6,7 +6,7 @@ #include "primitive_inst.h" #include "register.hpp" #include "cpu_impl_helpers.hpp" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include #include diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp index 6e89d4d25d8106..e49cb3a832f8ae 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp @@ -4,8 +4,7 @@ #include "proposal_inst.h" #include "intel_gpu/runtime/engine.hpp" -#include "implementation_map.hpp" -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" #include @@ -457,9 +456,7 @@ struct proposal_impl : typed_primitive_impl { // - image_info[3] = { img_height, img_width, img_depth } // - image_info[4] = { img_height, img_width, scale_min_bbox_y, scale_min_bbox_x } // - image_info[6] = { img_height, img_width, img_depth, scale_min_bbox_y, scale_min_bbox_x, scale_depth_index } - if (count != 3 && count != 4 && count != 6) { - CLDNN_ERROR_MESSAGE(arg.id(), "image_info must have either 3, 4 or 6 items"); - } + OPENVINO_ASSERT(one_of(count, {3, 4, 6}), arg.id(), "image_info must have either 3, 4 or 6 items"); } return make_unique(arg); diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp index 26515c0a35cd92..83142812f29e8b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "range_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/range.hpp" @@ -91,7 +89,7 @@ struct range_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const range_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp index 20a8a4afa0e8e3..6c16618ac816d0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp @@ -3,9 +3,8 @@ // #include "read_value_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" -#include "intel_gpu/runtime/error_handler.hpp" namespace cldnn { namespace cpu { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp index 80bd72f74528b2..5a3867f9d1582a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp @@ -4,7 +4,7 @@ #include "register.hpp" #include "reduce_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/reduce_max.hpp" #include "openvino/op/reduce_sum.hpp" @@ -149,7 +149,7 @@ struct reduce_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const reduce_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp index 98c5d618aebcfa..1b6f145c4ceb2d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "reorder_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/convert.hpp" @@ -84,7 +82,7 @@ struct reorder_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const reorder_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp index 13b97cdf818726..1a329ea495ef82 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "scatter_update_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/scatter_update.hpp" @@ -106,7 +104,7 @@ struct scatter_update_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const scatter_update_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp index 47728050f9731c..9c9ab75f64ad59 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "select_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/select.hpp" @@ -101,7 +99,7 @@ struct select_impl : public typed_primitive_impl