diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml index af6758bafc11fc..ae5f9ee25624d3 100644 --- a/.github/workflows/code_snippets.yml +++ b/.github/workflows/code_snippets.yml @@ -46,4 +46,11 @@ jobs: run: cmake -DCMAKE_BUILD_TYPE=Release -DTHREADING=SEQ -B build - name: Build snippets - run: cmake --build build --target openvino_docs_snippets --parallel + if: ${{ runner.os == 'Linux' || runner.os == 'macOS'}} + run: cmake --build build --target openvino_docs_snippets --parallel $(nproc) + + - name: Build snippets Windows + if: ${{ runner.os == 'Windows'}} + shell: pwsh + run: cmake --build build --target openvino_docs_snippets --parallel $ENV:NUMBER_OF_PROCESSORS + diff --git a/.github/workflows/linux_cpu_dev.yml b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml similarity index 98% rename from .github/workflows/linux_cpu_dev.yml rename to .github/workflows/dev_cpu_linux_snippets_libxsmm.yml index 447a8c52968044..a8bac1b208044c 100644 --- a/.github/workflows/linux_cpu_dev.yml +++ b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml @@ -1,9 +1,9 @@ -name: Linux developer workflow for CPU plugin (Ubuntu 20.04) +name: Linux CPU Plugin Snippets with LIBXSMM (Ubuntu 20.04) on: workflow_dispatch: pull_request: paths: - - '.github/workflows/linux_cpu_dev.yml' + - '.github/workflows/dev_cpu_linux_snippets_libxsmm.yml' - 'src/common/snippets/**' - 'src/plugins/intel_cpu/src/nodes/subgraph.cpp' - 'src/plugins/intel_cpu/src/nodes/subgraph.h' diff --git a/docs/articles_en/assets/images/deployment_full.svg b/docs/articles_en/assets/images/deployment_full.svg index 0e059f4b626a7e..e3b4e02a393c8e 100644 --- a/docs/articles_en/assets/images/deployment_full.svg +++ b/docs/articles_en/assets/images/deployment_full.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f4f556048b744609002b58ad550ddc1bca91fa9e04d496cdabb73187d1681b2 -size 59990 +oid sha256:a4739c6e6de67cc82e1fb06f463c542209ed589dfb844cc2348de25dccafcb68 +size 83675 diff --git a/docs/articles_en/assets/images/deployment_simplified.svg b/docs/articles_en/assets/images/deployment_simplified.svg index c8b48412d576a1..735c557034dc55 100644 --- a/docs/articles_en/assets/images/deployment_simplified.svg +++ b/docs/articles_en/assets/images/deployment_simplified.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68d5003431670cea03abc68eba89ffc9c566e08782ae6f5a80dd4a2a20766847 -size 21883 +oid sha256:2c85d8be8a526aef8e200cfef35ae08f86a46bf5366957f312620d36d3d5403a +size 34874 diff --git a/docs/articles_en/assets/snippets/compile_model_gpu.cpp b/docs/articles_en/assets/snippets/gpu/compile_model_gpu.cpp similarity index 100% rename from docs/articles_en/assets/snippets/compile_model_gpu.cpp rename to docs/articles_en/assets/snippets/gpu/compile_model_gpu.cpp diff --git a/docs/articles_en/assets/snippets/compile_model_gpu.py b/docs/articles_en/assets/snippets/gpu/compile_model_gpu.py similarity index 100% rename from docs/articles_en/assets/snippets/compile_model_gpu.py rename to docs/articles_en/assets/snippets/gpu/compile_model_gpu.py diff --git a/docs/articles_en/assets/snippets/context_sharing.cpp b/docs/articles_en/assets/snippets/gpu/context_sharing.cpp similarity index 100% rename from docs/articles_en/assets/snippets/context_sharing.cpp rename to docs/articles_en/assets/snippets/gpu/context_sharing.cpp diff --git a/docs/articles_en/assets/snippets/context_sharing_va.cpp b/docs/articles_en/assets/snippets/gpu/context_sharing_va.cpp similarity index 100% rename from docs/articles_en/assets/snippets/context_sharing_va.cpp rename to docs/articles_en/assets/snippets/gpu/context_sharing_va.cpp diff --git a/docs/articles_en/assets/snippets/context_sharing_va_c.cpp b/docs/articles_en/assets/snippets/gpu/context_sharing_va_c.cpp similarity index 100% rename from docs/articles_en/assets/snippets/context_sharing_va_c.cpp rename to docs/articles_en/assets/snippets/gpu/context_sharing_va_c.cpp diff --git a/docs/articles_en/assets/snippets/custom_kernels_api.cpp b/docs/articles_en/assets/snippets/gpu/custom_kernels_api.cpp similarity index 100% rename from docs/articles_en/assets/snippets/custom_kernels_api.cpp rename to docs/articles_en/assets/snippets/gpu/custom_kernels_api.cpp diff --git a/docs/articles_en/assets/snippets/custom_kernels_api.py b/docs/articles_en/assets/snippets/gpu/custom_kernels_api.py similarity index 100% rename from docs/articles_en/assets/snippets/custom_kernels_api.py rename to docs/articles_en/assets/snippets/gpu/custom_kernels_api.py diff --git a/docs/articles_en/assets/snippets/dynamic_batch.cpp b/docs/articles_en/assets/snippets/gpu/dynamic_batch.cpp similarity index 100% rename from docs/articles_en/assets/snippets/dynamic_batch.cpp rename to docs/articles_en/assets/snippets/gpu/dynamic_batch.cpp diff --git a/docs/articles_en/assets/snippets/dynamic_batch.py b/docs/articles_en/assets/snippets/gpu/dynamic_batch.py similarity index 100% rename from docs/articles_en/assets/snippets/dynamic_batch.py rename to docs/articles_en/assets/snippets/gpu/dynamic_batch.py diff --git a/docs/articles_en/assets/snippets/preprocessing_nv12_single_plane.cpp b/docs/articles_en/assets/snippets/gpu/preprocessing_nv12_single_plane.cpp similarity index 100% rename from docs/articles_en/assets/snippets/preprocessing_nv12_single_plane.cpp rename to docs/articles_en/assets/snippets/gpu/preprocessing_nv12_single_plane.cpp diff --git a/docs/articles_en/assets/snippets/preprocessing_nv12_to_gray.cpp b/docs/articles_en/assets/snippets/gpu/preprocessing_nv12_to_gray.cpp similarity index 100% rename from docs/articles_en/assets/snippets/preprocessing_nv12_to_gray.cpp rename to docs/articles_en/assets/snippets/gpu/preprocessing_nv12_to_gray.cpp diff --git a/docs/articles_en/assets/snippets/preprocessing_nv12_two_planes.cpp b/docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes.cpp similarity index 100% rename from docs/articles_en/assets/snippets/preprocessing_nv12_two_planes.cpp rename to docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes.cpp diff --git a/docs/articles_en/assets/snippets/preprocessing_nv12_two_planes.py b/docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes.py similarity index 100% rename from docs/articles_en/assets/snippets/preprocessing_nv12_two_planes.py rename to docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes.py diff --git a/docs/articles_en/assets/snippets/preprocessing_nv12_two_planes_c.cpp b/docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes_c.cpp similarity index 100% rename from docs/articles_en/assets/snippets/preprocessing_nv12_two_planes_c.cpp rename to docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes_c.cpp diff --git a/docs/articles_en/assets/snippets/queue_sharing.cpp b/docs/articles_en/assets/snippets/gpu/queue_sharing.cpp similarity index 100% rename from docs/articles_en/assets/snippets/queue_sharing.cpp rename to docs/articles_en/assets/snippets/gpu/queue_sharing.cpp diff --git a/docs/articles_en/assets/snippets/remote_objects_creation.cpp b/docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp similarity index 100% rename from docs/articles_en/assets/snippets/remote_objects_creation.cpp rename to docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp diff --git a/docs/articles_en/assets/snippets/remote_objects_creation_c.cpp b/docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp similarity index 100% rename from docs/articles_en/assets/snippets/remote_objects_creation_c.cpp rename to docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp diff --git a/docs/articles_en/assets/snippets/multi_threading.cpp b/docs/articles_en/assets/snippets/multi_threading.cpp index eae2b2e6326945..6b1db124ec6020 100644 --- a/docs/articles_en/assets/snippets/multi_threading.cpp +++ b/docs/articles_en/assets/snippets/multi_threading.cpp @@ -18,7 +18,7 @@ int main() { auto compiled_model_1 = core.compile_model(model, device, ov::inference_num_threads(1)); // Use logical processors of Efficient-cores for inference on hybrid platform - auto compiled_model_2 = core.compile_model(model, device, ov::hint::scheduling_core_type(ECORE_ONLY)); + auto compiled_model_2 = core.compile_model(model, device, ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)); // Use one logical processor per CPU core for inference when hyper threading is on auto compiled_model_3 = core.compile_model(model, device, ov::hint::enable_hyper_threading(false)); diff --git a/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp b/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp index 75eb50839ca117..e8267e5d44cb4c 100644 --- a/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp +++ b/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp @@ -42,7 +42,7 @@ int main() { { //! [wrap_dmabuf_fd] - int32_t fd_heap; // create the DMA-BUF System Heap file descriptor + int32_t fd_heap = 0; // create the DMA-BUF System Heap file descriptor auto remote_tensor = npu_context.create_tensor(in_element_type, in_shape, fd_heap); //! [wrap_dmabuf_fd] } diff --git a/docs/articles_en/assets/snippets/ov_dynamic_shapes.c b/docs/articles_en/assets/snippets/ov_dynamic_shapes.c index 7e720dfc5dc457..fa1f3158365ddf 100644 --- a/docs/articles_en/assets/snippets/ov_dynamic_shapes.c +++ b/docs/articles_en/assets/snippets/ov_dynamic_shapes.c @@ -61,14 +61,14 @@ ov_model_t* model = NULL; ov_core_read_model(core, "model.xml", NULL, &model); //! [ov_dynamic_shapes:print_dynamic] -ov_output_port_t* output_port = NULL; -ov_output_port_t* input_port = NULL; +ov_output_const_port_t* output_port = NULL; +ov_output_const_port_t* input_port = NULL; ov_partial_shape_t partial_shape; -char * str_partial_shape = NULL; +const char * str_partial_shape = NULL; // Print output partial shape { -ov_model_output(model, &output_port); +ov_model_const_output(model, &output_port); ov_port_get_partial_shape(output_port, &partial_shape); str_partial_shape = ov_partial_shape_to_string(partial_shape); printf("The output partial shape: %s", str_partial_shape); @@ -76,7 +76,7 @@ printf("The output partial shape: %s", str_partial_shape); // Print input partial shape { -ov_model_input(model, &input_port); +ov_model_const_input(model, &input_port); ov_port_get_partial_shape(input_port, &partial_shape); str_partial_shape = ov_partial_shape_to_string(partial_shape); printf("The input partial shape: %s", str_partial_shape); @@ -85,8 +85,8 @@ printf("The input partial shape: %s", str_partial_shape); // free allocated resource ov_free(str_partial_shape); ov_partial_shape_free(&partial_shape); -ov_output_port_free(output_port); -ov_output_port_free(input_port); +ov_output_const_port_free(output_port); +ov_output_const_port_free(input_port); //! [ov_dynamic_shapes:print_dynamic] ov_model_free(model); ov_core_free(core); @@ -98,15 +98,15 @@ ov_core_create(&core); //! [ov_dynamic_shapes:detect_dynamic] ov_model_t* model = NULL; -ov_output_port_t* input_port = NULL; -ov_output_port_t* output_port = NULL; +ov_output_const_port_t* input_port = NULL; +ov_output_const_port_t* output_port = NULL; ov_partial_shape_t partial_shape; ov_core_read_model(core, "model.xml", NULL, &model); // for input { -ov_model_input_by_index(model, 0, &input_port); +ov_model_const_input_by_index(model, 0, &input_port); ov_port_get_partial_shape(input_port, &partial_shape); if (ov_partial_shape_is_dynamic(partial_shape)) { // input is dynamic @@ -115,7 +115,7 @@ if (ov_partial_shape_is_dynamic(partial_shape)) { // for output { -ov_model_output_by_index(model, 0, &output_port); +ov_model_const_output_by_index(model, 0, &output_port); ov_port_get_partial_shape(output_port, &partial_shape); if (ov_partial_shape_is_dynamic(partial_shape)) { // output is dynamic @@ -124,8 +124,8 @@ if (ov_partial_shape_is_dynamic(partial_shape)) { // free allocated resource ov_partial_shape_free(&partial_shape); -ov_output_port_free(input_port); -ov_output_port_free(output_port); +ov_output_const_port_free(input_port); +ov_output_const_port_free(output_port); //! [ov_dynamic_shapes:detect_dynamic] ov_model_free(model); ov_core_free(core); @@ -147,8 +147,8 @@ ov_infer_request_t* infer_request = NULL; ov_compiled_model_create_infer_request(compiled_model, &infer_request); //! [ov_dynamic_shapes:set_input_tensor] -ov_output_port_t* input_port = NULL; -ov_element_type_e* type = NULL; +ov_output_const_port_t* input_port = NULL; +ov_element_type_e type = UNDEFINED; ov_shape_t input_shape_1; ov_tensor_t* input_tensor_1 = NULL; ov_tensor_t* output_tensor = NULL; @@ -163,8 +163,8 @@ void* data_2 = NULL; // Create tensor compatible with the model input // Shape {1, 128} is compatible with any reshape statements made in previous examples { -ov_model_input(model, &input_port); -ov_port_get_element_type(input_port, type); +ov_model_const_input(model, &input_port); +ov_port_get_element_type(input_port, &type); int64_t dims[2] = {1, 128}; ov_shape_create(2, dims, &input_shape_1); ov_tensor_create(type, input_shape_1, &input_tensor_1); @@ -214,7 +214,7 @@ ov_tensor_get_shape(output_tensor, &output_shape_2); // ... read values in data_2 according to the shape output_shape_2 // free resource -ov_output_port_free(input_port); +ov_output_const_port_free(input_port); ov_shape_free(&input_shape_1); ov_tensor_free(input_tensor_1); ov_shape_free(&output_shape_1); diff --git a/docs/articles_en/assets/snippets/ov_patterns.cpp b/docs/articles_en/assets/snippets/ov_patterns.cpp index 0382468a01c0e7..ee52c733019d39 100644 --- a/docs/articles_en/assets/snippets/ov_patterns.cpp +++ b/docs/articles_en/assets/snippets/ov_patterns.cpp @@ -2,9 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // ! [ov:imports] -#include - -#include "common_test_utils/matcher.hpp" #include "openvino/op/abs.hpp" #include "openvino/op/add.hpp" #include "openvino/op/matmul.hpp" @@ -22,7 +19,7 @@ using namespace std; // ! [ov:imports] // ! [ov:create_simple_model_and_pattern] -TEST(pattern, simple_model_and_pattern) { +void create_simple_model_and_pattern() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -39,17 +36,13 @@ TEST(pattern, simple_model_and_pattern) { auto pattern_abs = std::make_shared(pattern_mul->output(0)); auto pattern_relu = std::make_shared(pattern_abs->output(0)); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // Should perfectly match - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); + // pattern_relu should perfectly match model_relu } // ! [ov:create_simple_model_and_pattern] // ! [ov:create_simple_model_and_pattern_wrap_type] -TEST(pattern, simple_model_and_pattern_wrap_type) { +void create_simple_model_and_pattern_wrap_type() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -66,17 +59,13 @@ TEST(pattern, simple_model_and_pattern_wrap_type) { auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // Should perfectly match - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); + // pattern_relu should perfectly match model_relu } // ! [ov:create_simple_model_and_pattern_wrap_type] // ! [ov:wrap_type_list] -TEST(pattern, wrap_type_list) { +void wrap_type_list() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -95,45 +84,42 @@ TEST(pattern, wrap_type_list) { auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // The same pattern perfectly matches 2 different nodes - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); - ASSERT_TRUE(tm.match(pattern_relu, model_sig)); + // pattern_relu should perfectly matches model_relu and model_sig } // ! [ov:wrap_type_list] void patterns_misc() { -// ! [ov:any_input] - auto pattern_mul = ov::pass::pattern::wrap_type({pattern::any_input(), pattern::any_input()}); - auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); - auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); -// ! [ov:any_input] - -// ! [ov:wrap_type_predicate] - ov::pass::pattern::wrap_type({pattern::any_input()}, pattern::consumers_count(2)); -// ! [ov:wrap_type_predicate] - - -// ! [ov:any_input_predicate] - auto pattern_mul = ov::pass::pattern::wrap_type({pattern::any_input([](const Output& value){ - return value.get_shape().size() == 4;}), - pattern::any_input([](const Output& value){ - return value.get_shape().size() == 4;})}); - auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); - auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); -// ! [ov:any_input_predicate] - - -// ! [ov:optional_predicate] - auto pattern_sig_opt = ov::pass::pattern::optional(pattern_relu, pattern::consumers_count(2)); -// ! [ov:optional_predicate] +{ + // ! [ov:any_input] + auto pattern_mul = ov::pass::pattern::wrap_type({pattern::any_input(), pattern::any_input()}); + auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); + auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); + // ! [ov:any_input] + + // ! [ov:wrap_type_predicate] + ov::pass::pattern::wrap_type({pattern::any_input()}, pattern::consumers_count(2)); + // ! [ov:wrap_type_predicate] +} +{ + // ! [ov:any_input_predicate] + auto pattern_mul = ov::pass::pattern::wrap_type({pattern::any_input([](const Output& value){ + return value.get_shape().size() == 4;}), + pattern::any_input([](const Output& value){ + return value.get_shape().size() == 4;})}); + auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); + auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); + // ! [ov:any_input_predicate] + + + // ! [ov:optional_predicate] + auto pattern_sig_opt = ov::pass::pattern::optional(pattern_relu, pattern::consumers_count(2)); + // ! [ov:optional_predicate] +} } // ! [ov:pattern_or] -TEST(pattern, pattern_or) { +void pattern_or() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -158,17 +144,13 @@ TEST(pattern, pattern_or) { // Create Or node auto pattern_or = std::make_shared(OutputVector{red_pattern_sigmoid->output(0), blue_pattern_relu->output(0)}); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // The same pattern perfectly matches 2 different nodes - ASSERT_TRUE(tm.match(pattern_or, model_relu)); + // pattern_or should perfectly matches model_relu } // ! [ov:pattern_or] // ! [ov:pattern_optional_middle] -TEST(pattern, pattern_optional_middle) { +void pattern_optional_middle() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -186,17 +168,13 @@ TEST(pattern, pattern_optional_middle) { auto pattern_sig_opt = ov::pass::pattern::optional({pattern_abs->output(0)}); auto pattern_relu = ov::pass::pattern::wrap_type({pattern_sig_opt->output(0)}); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // Should perfectly match - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); + // pattern_relu should perfectly match model_relu } // ! [ov:pattern_optional_middle] // ! [ov:pattern_optional_top] -TEST(pattern, pattern_optional_top) { +void pattern_optional_top() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -214,17 +192,13 @@ TEST(pattern, pattern_optional_top) { auto pattern_abs = ov::pass::pattern::wrap_type({pattern_mul->output(0)}); auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // Should perfectly match - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); + // pattern_relu should perfectly match model_relu } // ! [ov:pattern_optional_top] // ! [ov:pattern_optional_root] -TEST(pattern, pattern_optional_root) { +void pattern_optional_root() { // Create a sample model PartialShape shape{2, 2}; auto model_param1 = std::make_shared(element::i32, shape); @@ -242,10 +216,6 @@ TEST(pattern, pattern_optional_root) { auto pattern_relu = ov::pass::pattern::wrap_type({pattern_abs->output(0)}); auto pattern_sig_opt = ov::pass::pattern::optional(pattern_relu); - // Create a matcher and try to match the nodes - TestMatcher tm; - - // Should perfectly match - ASSERT_TRUE(tm.match(pattern_relu, model_relu)); + // pattern_relu should perfectly match model_relu } // ! [ov:pattern_optional_root] \ No newline at end of file diff --git a/docs/articles_en/assets/snippets/ov_sparse_weights_decompression.cpp b/docs/articles_en/assets/snippets/ov_sparse_weights_decompression.cpp index 61fc1d05ac8d42..81822a8366d96c 100644 --- a/docs/articles_en/assets/snippets/ov_sparse_weights_decompression.cpp +++ b/docs/articles_en/assets/snippets/ov_sparse_weights_decompression.cpp @@ -11,7 +11,7 @@ int main() { ov::AnyMap config; //! [ov:intel_cpu:sparse_weights_decompression:part0] ov::Core core; // Step 1: create ov::Core object - core.set_property(ov::intel_cpu::sparse_weights_decompression_rate(0.8)); // Step 1b: Enable sparse weights decompression feature + core.set_property(ov::intel_cpu::sparse_weights_decompression_rate(0.8f)); // Step 1b: Enable sparse weights decompression feature auto model = core.read_model(modelPath); // Step 2: Read Model //... // Step 3: Prepare inputs/outputs //... // Step 4: Set device configuration diff --git a/docs/articles_en/documentation/openvino-extensibility/custom-gpu-operations.rst b/docs/articles_en/documentation/openvino-extensibility/custom-gpu-operations.rst index 015c416ac5c258..97cf7314476076 100644 --- a/docs/articles_en/documentation/openvino-extensibility/custom-gpu-operations.rst +++ b/docs/articles_en/documentation/openvino-extensibility/custom-gpu-operations.rst @@ -30,14 +30,14 @@ There are two options for using the custom operation configuration file: .. tab-item:: Python :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/custom_kernels_api.py + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/custom_kernels_api.py :language: python :fragment: [part0] .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/custom_kernels_api.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/custom_kernels_api.cpp :language: cpp :fragment: [part0] diff --git a/docs/articles_en/openvino-workflow/deployment-locally/local-distribution-libraries.rst b/docs/articles_en/openvino-workflow/deployment-locally/local-distribution-libraries.rst index 629b6646a7a80e..4d05172abac96b 100644 --- a/docs/articles_en/openvino-workflow/deployment-locally/local-distribution-libraries.rst +++ b/docs/articles_en/openvino-workflow/deployment-locally/local-distribution-libraries.rst @@ -41,9 +41,6 @@ to optimally saturate devices with computations. If your application is in C language, you need to additionally include the ``openvino_c`` library. -The ``plugins.xml`` file with information about inference devices must also be taken as a support file for ``openvino``. - - Libraries for Pluggable Components ################################## @@ -58,6 +55,7 @@ For each inference device, OpenVINO Runtime has its own plugin library: - ``openvino_intel_cpu_plugin`` for :doc:`Intel® CPU devices <../running-inference/inference-devices-and-modes/cpu-device>` - ``openvino_intel_gpu_plugin`` for :doc:`Intel® GPU devices <../running-inference/inference-devices-and-modes/gpu-device>` +- ``openvino_intel_npu_plugin`` for :doc:`Intel® NPU devices <../running-inference/inference-devices-and-modes/npu-device>` - ``openvino_arm_cpu_plugin`` for :doc:`ARM CPU devices <../running-inference/inference-devices-and-modes/cpu-device>` Depending on which devices are used in the app, the corresponding libraries should be included in the distribution package. @@ -80,6 +78,8 @@ Refer to the table below for details: | | | cache.json | | ``.\runtime\bin\intel64\Release\cache.json`` or | | | | | | ``.\runtime\bin\intel64\Debug\cache.json`` | +--------------+-------------------------+-------------------------------------------------------+ + | NPU | — | — | + +--------------+-------------------------+-------------------------------------------------------+ | Arm® CPU | — | — | +--------------+-------------------------+-------------------------------------------------------+ @@ -103,6 +103,8 @@ Refer to the table below for details: | GPU | | libOpenCL.so | | ``/usr/lib/x86_64-linux-gnu/libOpenCL.so.1`` | | | | cache.json | | ``./runtime/lib/intel64/cache.json`` | +--------------+-------------------------+-------------------------------------------------------+ + | NPU | — | — | + +--------------+-------------------------+-------------------------------------------------------+ .. tab-item:: macOS arm64 :sync: macos-arm-64 diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst index 1b9c5b89eff8bc..6104998c4beae8 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst @@ -59,14 +59,14 @@ Then, the device name can be passed to the ``ov::Core::compile_model()`` method, .. tab-item:: Python :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.py + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.py :language: Python :fragment: compile_model_default_gpu .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.cpp :language: cpp :fragment: compile_model_default_gpu @@ -77,14 +77,14 @@ Then, the device name can be passed to the ``ov::Core::compile_model()`` method, .. tab-item:: Python :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.py + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.py :language: Python :fragment: compile_model_gpu_with_id .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.cpp :language: cpp :fragment: compile_model_gpu_with_id @@ -95,14 +95,14 @@ Then, the device name can be passed to the ``ov::Core::compile_model()`` method, .. tab-item:: Python :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.py + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.py :language: Python :fragment: compile_model_gpu_with_id_and_tile .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.cpp :language: cpp :fragment: compile_model_gpu_with_id_and_tile @@ -152,14 +152,14 @@ It is done by specifying ``AUTO:GPU.1,GPU.0`` as a target device, and adding the .. tab-item:: Python :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.py + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.py :language: Python :fragment: compile_model_auto .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.cpp :language: cpp :fragment: compile_model_auto @@ -183,14 +183,14 @@ Alternatively, it can be enabled explicitly via the device notion, for example ` .. tab-item:: Python :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.py + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.py :language: Python :fragment: compile_model_batch_plugin .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.cpp :language: cpp :fragment: compile_model_batch_plugin @@ -201,14 +201,14 @@ Alternatively, it can be enabled explicitly via the device notion, for example ` .. tab-item:: Python :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.py + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.py :language: Python :fragment: compile_model_auto_batch .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/compile_model_gpu.cpp :language: cpp :fragment: compile_model_auto_batch @@ -275,14 +275,14 @@ The code snippet below demonstrates examples of a bounded dynamic batch: .. tab-item:: Python :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/dynamic_batch.py + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/dynamic_batch.py :language: Python :fragment: dynamic_batch .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/dynamic_batch.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/dynamic_batch.cpp :language: cpp :fragment: dynamic_batch @@ -351,14 +351,14 @@ The GPU plugin has the following additional preprocessing options: .. tab-item:: Python :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_two_planes.py + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes.py :language: Python :fragment: init_preproc .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_two_planes.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes.cpp :language: cpp :fragment: init_preproc diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device/remote-tensor-api-gpu-plugin.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device/remote-tensor-api-gpu-plugin.rst index 9af801ae9861c2..f865c3b7813f5c 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device/remote-tensor-api-gpu-plugin.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device/remote-tensor-api-gpu-plugin.rst @@ -61,21 +61,21 @@ of ``ov::RemoteContext`` derived classes. .. tab-item:: Create from cl_context :sync: create-from-cl-context - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [context_from_cl_context] .. tab-item:: Create from cl_queue :sync: create-from-cl-queue - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [context_from_cl_queue] .. tab-item:: Create from ID3D11Device :sync: create-from-id3d11device - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [context_from_d3d_device] @@ -87,21 +87,21 @@ of ``ov::RemoteContext`` derived classes. .. tab-item:: Create from cl_context :sync: create-from-cl-context - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [context_from_cl_context] .. tab-item:: Create from cl_queue :sync: create-from-cl-queue - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [context_from_cl_queue] .. tab-item:: Create from ID3D11Device :sync: create-from-id3d11device - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [context_from_d3d_device] @@ -113,21 +113,21 @@ of ``ov::RemoteContext`` derived classes. .. tab-item:: Create from cl_context :sync: create-from-cl-context - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [context_from_cl_context] .. tab-item:: Create from cl_queue :sync: create-from-cl-queue - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [context_from_cl_queue] .. tab-item:: Create from VADisplay :sync: create-from-vadisplay - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [context_from_va_display] @@ -139,21 +139,21 @@ of ``ov::RemoteContext`` derived classes. .. tab-item:: Create from cl_context :sync: create-from-cl-context - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [context_from_cl_context] .. tab-item:: Create from cl_queue :sync: create-from-cl-queue - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [context_from_cl_queue] .. tab-item:: Create from VADisplay :sync: create-from-vadisplay - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [context_from_va_display] @@ -177,14 +177,14 @@ To request the current default context of the plugin, use one of the following m .. tab-item:: Get context from Core :sync: get-context-core - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [default_context_from_core] .. tab-item:: Get context from compiled model :sync: get-context-compiled-model - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [default_context_from_model] @@ -196,14 +196,14 @@ To request the current default context of the plugin, use one of the following m .. tab-item:: Get context from Core :sync: get-context-core - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [default_context_from_core] .. tab-item:: Get context from compiled model :sync: get-context-compiled-model - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [default_context_from_model] @@ -231,35 +231,35 @@ For more details, see the code snippets below: .. tab-item:: USM pointer :sync: usm-pointer - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [wrap_usm_pointer] .. tab-item:: cl_mem :sync: cl-mem - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [wrap_cl_mem] .. tab-item:: cl::Buffer :sync: buffer - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [wrap_cl_buffer] .. tab-item:: cl::Image2D :sync: image2D - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [wrap_cl_image] .. tab-item:: biplanar NV12 surface :sync: biplanar-nv12-surface - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [wrap_nv12_surface] @@ -271,21 +271,21 @@ For more details, see the code snippets below: .. tab-item:: USM host memory :sync: usm-host-memory - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [allocate_usm_host] .. tab-item:: USM device memory :sync: usm-device-memory - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [allocate_usm_device] .. tab-item:: cl::Buffer :sync: buffer - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation.cpp :language: cpp :fragment: [allocate_cl_buffer] @@ -299,35 +299,35 @@ For more details, see the code snippets below: .. tab-item:: USM pointer :sync: usm-pointer - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [wrap_usm_pointer] .. tab-item:: cl_mem :sync: cl-mem - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [wrap_cl_mem] .. tab-item:: cl::Buffer :sync: buffer - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [wrap_cl_buffer] .. tab-item:: cl::Image2D :sync: image2D - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [wrap_cl_image] .. tab-item:: biplanar NV12 surface :sync: biplanar-nv12-surface - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [create_nv12_surface] @@ -339,14 +339,14 @@ For more details, see the code snippets below: .. tab-item:: USM host memory :sync: usm-host-memory - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [allocate_usm_host] .. tab-item:: USM device memory :sync: usm-device-memory - .. doxygensnippet:: docs/articles_en/assets/snippets/remote_objects_creation_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/remote_objects_creation_c.cpp :language: c :fragment: [allocate_usm_device] @@ -379,28 +379,28 @@ should be added before model compilation: .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_two_planes.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes.cpp :language: cpp :fragment: [init_preproc] .. tab-item:: C :sync: c - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_two_planes_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes_c.cpp :language: c :fragment: [init_preproc] .. tab-item:: single-plane :sync: single-plane - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_single_plane.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_single_plane.cpp :language: cpp :fragment: [init_preproc] .. tab-item:: NV12 to Grey :sync: nv12-grey - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_to_gray.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_to_gray.cpp :language: cpp :fragment: [init_preproc] @@ -424,28 +424,28 @@ inputs need to be set via the ``ov::InferRequest::set_tensors`` method with vect .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_two_planes.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes.cpp :language: cpp :fragment: [single_batch] .. tab-item:: C :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_two_planes_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes_c.cpp :language: c :fragment: [single_batch] .. tab-item:: single-plane :sync: single-plane - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_single_plane.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_single_plane.cpp :language: cpp :fragment: [single_batch] .. tab-item:: NV12 to Grey :sync: nv12-grey - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_to_gray.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_to_gray.cpp :language: cpp :fragment: [single_batch] @@ -457,21 +457,21 @@ inputs need to be set via the ``ov::InferRequest::set_tensors`` method with vect .. tab-item:: two-plane :sync: two-plane - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_two_planes.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_two_planes.cpp :language: cpp :fragment: [batched_case] .. tab-item:: single-plane :sync: single-plane - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_single_plane.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_single_plane.cpp :language: cpp :fragment: [batched_case] .. tab-item:: NV12 to Grey :sync: nv12-grey - .. doxygensnippet:: docs/articles_en/assets/snippets/preprocessing_nv12_to_gray.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/preprocessing_nv12_to_gray.cpp :language: cpp :fragment: [batched_case] @@ -492,7 +492,7 @@ on waiting for the completion of inference. The pseudo-code may look as follows: .. dropdown:: Queue and context sharing example - .. doxygensnippet:: docs/articles_en/assets/snippets/queue_sharing.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/queue_sharing.cpp :language: cpp :fragment: [queue_sharing] @@ -533,13 +533,13 @@ To see pseudo-code of usage examples, refer to the sections below. This example uses the OpenCL context obtained from a compiled model object. - .. doxygensnippet:: docs/articles_en/assets/snippets/context_sharing.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/context_sharing.cpp :language: cpp :fragment: [context_sharing_get_from_ov] .. dropdown:: Running GPU Plugin Inference within User-Supplied Shared Context - .. doxygensnippet:: docs/articles_en/assets/snippets/context_sharing.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/context_sharing.cpp :language: cpp :fragment: [context_sharing_user_handle] @@ -550,14 +550,14 @@ To see pseudo-code of usage examples, refer to the sections below. .. tab-item:: C++ :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/context_sharing_va.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/context_sharing_va.cpp :language: cpp :fragment: [context_sharing_va] .. tab-item:: C :sync: c - .. doxygensnippet:: docs/articles_en/assets/snippets/context_sharing_va_c.cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/gpu/context_sharing_va_c.cpp :language: c :fragment: [context_sharing_va] diff --git a/docs/requirements.txt b/docs/requirements.txt index 98328772f48c60..5703503a9ba158 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -4,7 +4,7 @@ attrs==22.1.0 Babel==2.11.0 beautifulsoup4==4.9.3 breathe==4.35.0 -certifi==2023.7.22 +certifi==2024.7.4 colorama==0.4.6 Cython==0.29.33 docutils==0.20 @@ -45,5 +45,5 @@ sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.9 toml==0.10.2 -urllib3==1.26.18 +urllib3==1.26.19 zipp==3.4.1 \ No newline at end of file diff --git a/docs/snippets/CMakeLists.txt b/docs/snippets/CMakeLists.txt index e21443b7782137..f853d07328373b 100644 --- a/docs/snippets/CMakeLists.txt +++ b/docs/snippets/CMakeLists.txt @@ -18,12 +18,14 @@ endif() file(GLOB SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c") -file(GLOB GPU_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../articles_en/assets/snippets/*.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/../articles_en/assets/snippets/*.c") +file(GLOB GPU_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/../articles_en/assets/snippets/gpu/*.cpp") # add GPU snippets if OpenCL has been found if(TARGET OpenCL::OpenCL) - list(APPEND SOURCES ${GPU_SOURCES}) + list(APPEND SOURCES ${GPU_SOURCES}) endif() # try to find VA libraries @@ -38,7 +40,7 @@ endif() # remove OpenCV related sources find_package(OpenCV QUIET COMPONENTS core imgcodecs) if(NOT OpenCV_FOUND OR NOT OpenCV_VERSION VERSION_GREATER_EQUAL 3) - list(REMOVE_ITEM SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/ShapeInference.cpp") + list(REMOVE_ITEM SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/../articles_en/assets/snippets/ShapeInference.cpp") endif() # requires mfxFrameSurface1 and MSS API diff --git a/docs/sphinx_setup/_static/benchmarks_files/OV-benchmark-data.csv b/docs/sphinx_setup/_static/benchmarks_files/OV-benchmark-data.csv index dfdd15997dc38d..8422cd5fd89e90 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/OV-benchmark-data.csv +++ b/docs/sphinx_setup/_static/benchmarks_files/OV-benchmark-data.csv @@ -526,4 +526,4 @@ stable-diffusion-v2-1,OV-2024.3.0,accel,Intel® Arc™ A770M dGPU,,,,,,321,150,1 stable-diffusion-v2-1,OV-2024.3.0,accel,Intel® Data Center GPU Flex 140 dGPU,,,,,,1900,75,1,1900,75,13.59,"Generation time, sec.",Generation time/$,Generation time/TDP,"Generation time, sec.",13.31,,,,, stable-diffusion-v2-1,OV-2024.3.0,core-iGPU,Intel® Core™ i7-1360P iGPU-only,,,,,,480,28,1,480,28,20.85,"Generation time, sec.",Generation time/$,Generation time/TDP,"Generation time, sec.",19.95,,,,, stable-diffusion-v2-1,OV-2024.3.0,core-iGPU,Intel® Core™ Ultra 7 processor 165H iGPU-only,,,,,,460,28,1,460,28,12.98,"Generation time, sec.",Generation time/$,Generation time/TDP,"Generation time, sec.",12.41,,,,, -end_rec,,,,,,,,,,,,,,,,,,,,,,,, +end_rec,,,,,,,,,,,,,,,,,,,,,,,, \ No newline at end of file diff --git a/docs/sphinx_setup/_static/css/custom.css b/docs/sphinx_setup/_static/css/custom.css index 18586f01964db9..aac910c5ecd04d 100644 --- a/docs/sphinx_setup/_static/css/custom.css +++ b/docs/sphinx_setup/_static/css/custom.css @@ -116,6 +116,10 @@ a#wap_dns {display: none;} font-weight: 600; } +.bold { + font-weight: 700; +} + /* Underline width */ a:hover, .toc-entry a.nav-link:hover, @@ -126,7 +130,6 @@ nav.bd-links li > a:hover { #bd-docs-nav div ul a:hover { color: white; - text-decoration: underline } ul#navbar-main-elements > li:hover { @@ -140,6 +143,18 @@ nav.bd-links .current>a { color: black; } +a.current { + background-color: #76CEFF!important; +} + +.bd-sidebar-primary label.toctree-toggle:hover { + background: #76CEFF!important; +} + +a.current svg:not(:host).svg-inline--fa, svg:not(:root).svg-inline--fa { + color: black; +} + .bd-header .navbar-nav li a.nav-link { color: #fff; font-size: 1rem; diff --git a/docs/sphinx_setup/_static/js/openVinoDataTables.js b/docs/sphinx_setup/_static/js/openVinoDataTables.js index 0934d10ed90353..59e750220e20e6 100644 --- a/docs/sphinx_setup/_static/js/openVinoDataTables.js +++ b/docs/sphinx_setup/_static/js/openVinoDataTables.js @@ -2,9 +2,14 @@ $(document).ready(function () { var table = $('table.modeldata').DataTable({ "autoWidth": false, stateSave: true, + lengthMenu: [ + [10, 25, 50, -1], + ['10 rows', '25 rows', '50 rows', 'Show all rows'] + ], layout: { topStart: { buttons: [ + 'pageLength', 'colvis', { extend: 'colvisGroup', @@ -18,7 +23,6 @@ $(document).ready(function () { columns: ':visible' } } - ] } } diff --git a/docs/sphinx_setup/_static/js/open_sidebar.js b/docs/sphinx_setup/_static/js/open_sidebar.js index 6f01a7b2d49614..66ddf98c97403e 100644 --- a/docs/sphinx_setup/_static/js/open_sidebar.js +++ b/docs/sphinx_setup/_static/js/open_sidebar.js @@ -3,4 +3,9 @@ $(document).ready(function() { for(let i = 0; i < labels.length; i++){ labels[i].classList.remove("rotate"); } + + const menus = $( "ul.bd-sidenav > li > a" ); + for(let i = 0; i < menus.length; i++){ + menus[i].classList.add("bold"); + } }) \ No newline at end of file diff --git a/src/bindings/js/node/include/model_wrap.hpp b/src/bindings/js/node/include/model_wrap.hpp index 42da58aa76f5e0..1d8aaf5afdd421 100644 --- a/src/bindings/js/node/include/model_wrap.hpp +++ b/src/bindings/js/node/include/model_wrap.hpp @@ -116,6 +116,13 @@ class ModelWrap : public Napi::ObjectWrap { */ Napi::Value get_output_element_type(const Napi::CallbackInfo& info); + /** + * @brief Returns a cloned model for the current model + * @param info Contains information about the environment and passed arguments + * @return Napi::Value Cloned model returned from the API + */ + Napi::Value clone(const Napi::CallbackInfo& info); + private: std::shared_ptr _model; ov::Core _core; diff --git a/src/bindings/js/node/lib/addon.ts b/src/bindings/js/node/lib/addon.ts index 3c07b95455c5c6..88bd874210dbcc 100644 --- a/src/bindings/js/node/lib/addon.ts +++ b/src/bindings/js/node/lib/addon.ts @@ -214,6 +214,10 @@ interface CoreConstructor { * A user-defined model read by {@link Core.readModel}. */ interface Model { + /** + * It returns a cloned model. + */ + clone(): Model; /** * It gets the friendly name for a model. If a friendly name is not set * via {@link Model.setFriendlyName}, a unique model name is returned. diff --git a/src/bindings/js/node/src/model_wrap.cpp b/src/bindings/js/node/src/model_wrap.cpp index b53170c5246f80..88baf9da021b74 100644 --- a/src/bindings/js/node/src/model_wrap.cpp +++ b/src/bindings/js/node/src/model_wrap.cpp @@ -27,6 +27,7 @@ Napi::Function ModelWrap::get_class(Napi::Env env) { InstanceMethod("getFriendlyName", &ModelWrap::get_friendly_name), InstanceMethod("getOutputShape", &ModelWrap::get_output_shape), InstanceMethod("getOutputElementType", &ModelWrap::get_output_element_type), + InstanceMethod("clone", &ModelWrap::clone), InstanceAccessor<&ModelWrap::get_inputs>("inputs"), InstanceAccessor<&ModelWrap::get_outputs>("outputs")}); } @@ -189,3 +190,17 @@ Napi::Value ModelWrap::get_output_element_type(const Napi::CallbackInfo& info) { return info.Env().Undefined(); } } + +Napi::Value ModelWrap::clone(const Napi::CallbackInfo& info) { + std::vector allowed_signatures; + try { + if (ov::js::validate(info, allowed_signatures)) { + return cpp_to_js(info.Env(), _model->clone()); + } else { + OPENVINO_THROW("'clone'", ov::js::get_parameters_error_msg(info, allowed_signatures)); + } + } catch (const std::exception& e) { + reportError(info.Env(), e.what()); + return info.Env().Undefined(); + } +} diff --git a/src/bindings/js/node/tests/unit/model.test.js b/src/bindings/js/node/tests/unit/model.test.js index d5ac4f163367bb..7728f13a25dce9 100644 --- a/src/bindings/js/node/tests/unit/model.test.js +++ b/src/bindings/js/node/tests/unit/model.test.js @@ -9,6 +9,7 @@ const { getModelPath } = require('./utils.js'); const testXml = getModelPath().xml; const core = new ov.Core(); const model = core.readModelSync(testXml); +const clonedModel = model.clone(); describe('Node.js Model.isDynamic()', () => { it('should return a boolean value indicating if the model is dynamic', () => { @@ -157,3 +158,20 @@ describe('Model.getOutputElementType()', () => { ); }); }); + +describe('Model.clone()', () => { + it('should return an object of type model', () => { + assert.ok(clonedModel instanceof ov.Model, 'clone() should return a model'); + }); + + it('should return a model that is a clone of the calling model', () => { + assert.deepStrictEqual(clonedModel, model, "Cloned Model should be exactly equal to the calling model"); + }); + + it('should not accept any arguments', () => { + assert.throws( + () => model.clone("Unexpected argument").then(), + /'clone' method called with incorrect parameters./ + ); + }); +}); diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp index a3d7143340f44c..b05da86fc3515d 100644 --- a/src/common/snippets/include/snippets/generator.hpp +++ b/src/common/snippets/include/snippets/generator.hpp @@ -11,6 +11,7 @@ #include "snippets_isa.hpp" #include "snippets/lowered/linear_ir.hpp" +#include "snippets/kernel_executor_table.hpp" #include "snippets/shape_types.hpp" #include "target_machine.hpp" @@ -32,7 +33,8 @@ class LoweringResult { std::vector> m_saved_emitters{}; public: - std::shared_ptr compiled_snippet = nullptr; + CompiledSnippetPtr compiled_snippet = nullptr; + KernelExecutorTablePtr kernel_executor_table = nullptr; }; /** diff --git a/src/common/snippets/include/snippets/kernel_executor_table.hpp b/src/common/snippets/include/snippets/kernel_executor_table.hpp index af797e4c80422a..2d4b1185ffc5d7 100644 --- a/src/common/snippets/include/snippets/kernel_executor_table.hpp +++ b/src/common/snippets/include/snippets/kernel_executor_table.hpp @@ -43,7 +43,7 @@ class KernelExecutorBase { * @brief Update current kernel config in accordance with the passed expression. Corresponding kernel is recompiled if necessary. * This method should be called to update KernelExecutor based on runtime info (e.g. shapes) available through expression ptr */ - virtual void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir) = 0; + virtual void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRCPtr& linear_ir) = 0; /** * @brief Replace current kernel config with the provided value. Corresponding kernel is recompiled if necessary. * This method should be called to restore a saved state of the executor, that was configured using update_by_expression(). @@ -70,7 +70,7 @@ class KernelExecutor : public KernelExecutorBase { explicit KernelExecutor(Conf c) : KernelExecutorBase(), m_config{std::move(c)} {} // Note: override when final is redundant, but needed to avoid warnings on some compilers - void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir) override final { // NOLINT + void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRCPtr& linear_ir) override final { // NOLINT update_config(expr, linear_ir, m_config); OPENVINO_ASSERT(m_config.is_completed(), "Failed to update kernel config in update_by_expression"); update_kernel(m_config, m_kernel); @@ -103,7 +103,7 @@ class KernelExecutor : public KernelExecutorBase { protected: /*** Updates stored kernel config based on runtime info from expression (e.g. new input shapes). */ - virtual void update_config(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir, Conf& config) const = 0; + virtual void update_config(const lowered::ExpressionPtr& expr, const lowered::LinearIRCPtr& linear_ir, Conf& config) const = 0; /*** Updates stored kernel in accordance with the passed config. Recompilation of the kernel is * performed if necessary. */ virtual void update_kernel(const Conf& c, std::shared_ptr& kernel) const = 0; @@ -122,17 +122,26 @@ class KernelExecutorTable { typename std::enable_if::value, bool>::type = true> std::shared_ptr register_kernel(const lowered::ExpressionPtr& expr, C... args) { const auto& instance = std::make_shared(args...); - OPENVINO_ASSERT(m_table.insert({expr, instance}).second, "This expression already has an alterable kernel"); + OPENVINO_ASSERT(m_table.insert({expr->get_exec_num(), instance}).second, "This expression execution number already has an alterable kernel"); return instance; } - const std::shared_ptr& get_kernel_executor(const lowered::ExpressionPtr& expr) const { - OPENVINO_ASSERT(m_table.count(expr), "This expression doesn't have a registered kernel executor"); - return m_table.at(expr); + + const std::shared_ptr& get_kernel_executor(const lowered::ExpressionPtr& expr) const { + return get_kernel_executor(expr->get_exec_num()); + } + const std::shared_ptr& get_kernel_executor(double expr_exec_num) const { + OPENVINO_ASSERT(m_table.count(expr_exec_num), "This expression execution number doesn't have a registered kernel executor"); + return m_table.at(expr_exec_num); } + /*** Updates every registered KernelExecutor in accordance with the corresponding expression */ - void update_state(const lowered::LinearIRPtr& linear_ir) const { - for (const auto& record : m_table) - record.second->update_by_expression(record.first, linear_ir); + void update_state(const lowered::LinearIRCPtr& linear_ir) const { + for (const auto& expr : *linear_ir) { + const auto& found = m_table.find(expr->get_exec_num()); + if (found != m_table.end()) { + found->second->update_by_expression(expr, linear_ir); + } + } } /*** Returns lambda function that contains current state of the table, and restores this state when called */ @@ -141,19 +150,12 @@ class KernelExecutorTable { return [=]() { reset_state(current_state); }; } - /** - * @brief Replace originally registered ExpressionPtr with a new value. - * Note that code emission is performed on a copy of LIR, so all expression pointers visible from emitters won't - * be accessible from RuntimeConfigurator. In order to replace these cloned ExpressionPtrs with the original ones, - * we need to call this method. - */ - void replace_key_expression(const lowered::ExpressionPtr& from, const lowered::ExpressionPtr& to); - virtual ~KernelExecutorTable() = default; protected: - std::unordered_map> m_table{}; - typedef std::vector>> ExecTableState; + std::unordered_map> m_table {}; + + typedef std::vector>> ExecTableState; /*** Restore the table state previously obtained by get_state() */ void reset_state(const ExecTableState& state); diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp index f2e45f8af68e17..55afd2c9ccd7ab 100644 --- a/src/common/snippets/include/snippets/lowered/linear_ir.hpp +++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp @@ -284,6 +284,7 @@ class LinearIR { size_t m_static_buffer_scratchpad_size = 0; }; using LinearIRPtr = std::shared_ptr; +using LinearIRCPtr = std::shared_ptr; template iterator LinearIR::find(iterator begin, iterator end, const ExpressionPtr& target) const { diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp index 7837625f6e3e3c..84b66ce4d5306c 100644 --- a/src/common/snippets/include/snippets/op/subgraph.hpp +++ b/src/common/snippets/include/snippets/op/subgraph.hpp @@ -116,6 +116,7 @@ class Subgraph : public ov::op::util::SubGraphOp { std::shared_ptr clone() const; + const std::shared_ptr& get_runtime_configurator() const; const std::shared_ptr& update_runtime_config() const; static auto wrap_node_as_subgraph(const std::shared_ptr& node) -> std::shared_ptr; diff --git a/src/common/snippets/include/snippets/runtime_configurator.hpp b/src/common/snippets/include/snippets/runtime_configurator.hpp index 058eca59716d1b..a0c7d8336c5cd1 100644 --- a/src/common/snippets/include/snippets/runtime_configurator.hpp +++ b/src/common/snippets/include/snippets/runtime_configurator.hpp @@ -61,28 +61,36 @@ class RuntimeConfigurator { * @param linear_ir LinearIR * @return updated config */ - const std::shared_ptr& get_updated_config(const lowered::LinearIRPtr& linear_ir); - /*** Returns pointer to KernelExecutorTable owned by the config */ + const std::shared_ptr& get_updated_config(const lowered::LinearIRCPtr& linear_ir); + /** + * @brief Returns pointer to KernelExecutorTable owned by the config + * @return updated KernelExecutorTable + */ const std::shared_ptr& get_kernel_executor_table() const { return m_config->kernel_executor_table; } + /** + * @brief Set new KernelExecutorTable to the config + * @param table new KernelExecutorTable + */ + void set_kernel_executor_table(std::shared_ptr table) const; protected: /** * @brief Update RuntimeConfig based on LinearIR * @param linear_ir LinearIR */ - virtual void update(const lowered::LinearIRPtr& linear_ir); + virtual void update(const lowered::LinearIRCPtr& linear_ir); /** * @brief Allocate and intialize fields in RuntimeConfig and RuntimeConfigurator * @param linear_ir LinearIR */ - virtual void initialization(const lowered::LinearIRPtr& linear_ir); + virtual void initialization(const lowered::LinearIRCPtr& linear_ir); /** * @brief Initializes input and data information of LinearIR: * descriptors (that contains shapes and layouts) and data_sizes * @param linear_ir LinearIR */ - void init_data_info(const lowered::LinearIRPtr& linear_ir); + void init_data_info(const lowered::LinearIRCPtr& linear_ir); /** * @brief Initializes information of buffers: * - static buffer_scratchpad_size @@ -90,23 +98,23 @@ class RuntimeConfigurator { * - clusters with dynamic buffers (`m_dynamic_buffer_clusters`) for the quick access in `update()` * @param linear_ir LinearIR */ - void init_buffer_info(const lowered::LinearIRPtr& linear_ir); + void init_buffer_info(const lowered::LinearIRCPtr& linear_ir); /** * @brief Initializes tensor rank of config * @param linear_ir LinearIR */ - virtual void init_tensor_rank(const lowered::LinearIRPtr& linear_ir) const; + virtual void init_tensor_rank(const lowered::LinearIRCPtr& linear_ir) const; /** * @brief Update Loop informations in LinearIR: Unified and ExpandedLoopInfo * @param linear_ir LinearIR */ - void update_loop_info(const lowered::LinearIRPtr& linear_ir) const; + void update_loop_info(const lowered::LinearIRCPtr& linear_ir) const; /** * @brief Update Buffer scratchpad size and offsets if needed * Note: `update_loop_info` must be called before * @param linear_ir LinearIR */ - void update_buffer_scratchpad_size(const lowered::LinearIRPtr& linear_ir) const; + void update_buffer_scratchpad_size(const lowered::LinearIRCPtr& linear_ir) const; /** * @brief Calculate data offsets of LinearIR and update these values in RuntimeConfig */ diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp index 29d9e066b153af..c01685e6531eb6 100644 --- a/src/common/snippets/src/generator.cpp +++ b/src/common/snippets/src/generator.cpp @@ -5,6 +5,7 @@ #include "snippets/generator.hpp" #include "snippets/itt.hpp" +#include "snippets/runtime_configurator.hpp" #include "snippets/lowered/linear_ir.hpp" #include "snippets/lowered/expression.hpp" #include "snippets/op/kernel.hpp" @@ -46,6 +47,7 @@ LoweringResult Generator::generate(lowered::LinearIR& linear_ir, const void* com result.m_saved_emitters.emplace_back(emitter); } result.compiled_snippet = target->get_snippet(); + result.kernel_executor_table = target->get_runtime_configurator()->get_kernel_executor_table(); return result; } diff --git a/src/common/snippets/src/kernel_executor_table.cpp b/src/common/snippets/src/kernel_executor_table.cpp index 964ed736f13dd0..9b43c901f55edb 100644 --- a/src/common/snippets/src/kernel_executor_table.cpp +++ b/src/common/snippets/src/kernel_executor_table.cpp @@ -7,21 +7,13 @@ namespace ov { namespace snippets { -void KernelExecutorTable::replace_key_expression(const snippets::lowered::ExpressionPtr& from, const snippets::lowered::ExpressionPtr& to) { - const auto& found = m_table.find(from); - if (found != m_table.end()) { - OPENVINO_ASSERT(m_table.count(to) == 0, "Attempt to replace a value that is already in the KernelExecutorTable"); - m_table.insert({to, found->second}); - m_table.erase(found); - } -} - void KernelExecutorTable::reset_state(const ExecTableState& state) { OPENVINO_ASSERT(state.size() == m_table.size(), "Invalid state in restore_state: size mismatch"); auto state_it = state.begin(); for (const auto& table_record : m_table) { const auto& state_record = *state_it++; - OPENVINO_ASSERT(table_record.first == state_record.first, "Invalid state in restore_state: expressions mismatch"); + OPENVINO_ASSERT(table_record.first == state_record.first, + "Invalid state in restore_state: expression execution numbers mismatched"); table_record.second->update_by_config(*state_record.second); } } diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 4ede0b58a66cf0..55fd4acb2fa315 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -544,22 +544,21 @@ snippets::Schedule Subgraph::generate(const void* compile_params) const { } auto lowering_result = m_generator->generate(linear_ir, compile_params); - - // Note: Since the code emission is performed on a copy of LIR, but RuntimeConfigurator works with the initial instance, - // we need to replace cloned expression pointers to original ones in the KernelExecutorTable. Ticket: 129772 - const auto& exec_table = m_generator->get_target_machine()->get_runtime_configurator()->get_kernel_executor_table(); - for (const auto& expr : *m_linear_ir) - exec_table->replace_key_expression(expression_map.at(expr.get()), expr); // Some kernel executors might've been registered during code emission. // We need to update them, so appropriate kernels will be compiled. + const auto& exec_table = get_runtime_configurator()->get_kernel_executor_table(); exec_table->update_state(m_linear_ir); return {std::move(lowering_result)}; } -const std::shared_ptr& Subgraph::update_runtime_config() const { +const std::shared_ptr& Subgraph::get_runtime_configurator() const { OPENVINO_ASSERT(m_generator, "Generator has not been inited!"); + return m_generator->get_target_machine()->get_runtime_configurator(); +} + +const std::shared_ptr& Subgraph::update_runtime_config() const { OPENVINO_ASSERT(m_linear_ir, "LoweredLinearIR has not been inited!"); - return m_generator->get_target_machine()->get_runtime_configurator()->get_updated_config(m_linear_ir); + return get_runtime_configurator()->get_updated_config(m_linear_ir); } void Subgraph::print() const { diff --git a/src/common/snippets/src/runtime_configurator.cpp b/src/common/snippets/src/runtime_configurator.cpp index ec1db44f074766..062b3a2d86fbb2 100644 --- a/src/common/snippets/src/runtime_configurator.cpp +++ b/src/common/snippets/src/runtime_configurator.cpp @@ -35,7 +35,7 @@ RuntimeConfigurator::RuntimeConfigurator(std::shared_ptr c) : OPENVINO_ASSERT(m_config, "Runtime config is nullptr!"); } -const std::shared_ptr& RuntimeConfigurator::get_updated_config(const lowered::LinearIRPtr& linear_ir) { +const std::shared_ptr& RuntimeConfigurator::get_updated_config(const lowered::LinearIRCPtr& linear_ir) { // First initialization if (m_io_num == 0) initialization(linear_ir); @@ -44,7 +44,7 @@ const std::shared_ptr& RuntimeConfigurator::get_updated_config(co return m_config; } -void RuntimeConfigurator::initialization(const lowered::LinearIRPtr& linear_ir) { +void RuntimeConfigurator::initialization(const lowered::LinearIRCPtr& linear_ir) { init_data_info(linear_ir); init_tensor_rank(linear_ir); init_buffer_info(linear_ir); @@ -55,7 +55,7 @@ void RuntimeConfigurator::initialization(const lowered::LinearIRPtr& linear_ir) m_config->tile_rank = linear_ir->get_config().m_loop_depth; } -void RuntimeConfigurator::update(const lowered::LinearIRPtr& linear_ir) { +void RuntimeConfigurator::update(const lowered::LinearIRCPtr& linear_ir) { if (linear_ir->is_dynamic()) { update_loop_info(linear_ir); update_buffer_scratchpad_size(linear_ir); @@ -67,11 +67,11 @@ void RuntimeConfigurator::update(const lowered::LinearIRPtr& linear_ir) { update_latest_shapes(); } -void RuntimeConfigurator::init_tensor_rank(const lowered::LinearIRPtr& linear_ir) const { +void RuntimeConfigurator::init_tensor_rank(const lowered::LinearIRCPtr& linear_ir) const { m_config->tensor_rank = linear_ir->get_master_shape().size(); } -void RuntimeConfigurator::init_data_info(const lowered::LinearIRPtr& linear_ir) { +void RuntimeConfigurator::init_data_info(const lowered::LinearIRCPtr& linear_ir) { const auto& parameters = linear_ir->get_parameters(); const auto& results = linear_ir->get_results(); m_in_num = parameters.size(); @@ -113,7 +113,7 @@ void RuntimeConfigurator::init_data_info(const lowered::LinearIRPtr& linear_ir) } } -void RuntimeConfigurator::init_buffer_info(const lowered::LinearIRPtr& linear_ir) { +void RuntimeConfigurator::init_buffer_info(const lowered::LinearIRCPtr& linear_ir) { std::map> dynamic_buffer_clusters, static_buffer_clusters; // All needed checks are in Validate pass @@ -143,7 +143,7 @@ void RuntimeConfigurator::init_buffer_info(const lowered::LinearIRPtr& linear_ir m_dynamic_buffer_clusters = std::move(dynamic_buffer_clusters); } -void RuntimeConfigurator::update_loop_info(const lowered::LinearIRPtr& linear_ir) const { +void RuntimeConfigurator::update_loop_info(const lowered::LinearIRCPtr& linear_ir) const { // Initialized UnifiedLoopInfo struct CurrentUnifiedLoopInfo { size_t current_work_amount = 0; @@ -202,7 +202,7 @@ void RuntimeConfigurator::update_loop_info(const lowered::LinearIRPtr& linear_ir } } -void RuntimeConfigurator::update_buffer_scratchpad_size(const lowered::LinearIRPtr& linear_ir) const { +void RuntimeConfigurator::update_buffer_scratchpad_size(const lowered::LinearIRCPtr& linear_ir) const { const auto& loop_manager = linear_ir->get_loop_manager(); m_config->buffer_scratchpad_size = linear_ir->get_static_buffer_scratchpad_size(); @@ -278,5 +278,10 @@ void RuntimeConfigurator::update_latest_shapes() { } } +void RuntimeConfigurator::set_kernel_executor_table(std::shared_ptr table) const { + OPENVINO_ASSERT(table, "Failed to update Kernel Executo Table: passed table is missed"); + m_config->kernel_executor_table = std::move(table); +} + } // namespace snippets } // namespace ov diff --git a/src/frontends/onnx/frontend/CMakeLists.txt b/src/frontends/onnx/frontend/CMakeLists.txt index 0ceeec8f7606a3..80fd16e2ed6483 100644 --- a/src/frontends/onnx/frontend/CMakeLists.txt +++ b/src/frontends/onnx/frontend/CMakeLists.txt @@ -4,7 +4,7 @@ if(NOT BUILD_SHARED_LIBS) file(GLOB_RECURSE op_list "src/op/*.cpp") - set(static_reg_file "src/static_reg.hpp") + set(static_reg_file ${CMAKE_CURRENT_BINARY_DIR}/static_reg.hpp) file(WRITE ${static_reg_file} "// Copyright (C) 2018-2024 Intel Corporation\n// SPDX-License-Identifier: Apache-2.0\n// Auto generated file, DO NOT EDIT INLINE\n\n") file(APPEND ${static_reg_file} "#include \"core/operator_set.hpp\"\n\n") file(APPEND ${static_reg_file} "#define ONNX_DECL_OP(op) extern ov::OutputVector op(const Node&)\n\n") diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md index cced96c6122685..014becd0d62bdd 100644 --- a/src/frontends/tensorflow/docs/supported_ops.md +++ b/src/frontends/tensorflow/docs/supported_ops.md @@ -601,8 +601,8 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | LookupTableInsert | YES | | | LookupTableInsertV2 | YES | | | LookupTableRemoveV2 | NO | | -| LookupTableSize | NO | | -| LookupTableSizeV2 | NO | | +| LookupTableSize | YES | | +| LookupTableSizeV2 | YES | | | LoopCond | YES | | | LowerBound | NO | | | Lu | NO | | diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/hash_table.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/hash_table.hpp index 131055369fcd3e..beecb75e733f56 100644 --- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/hash_table.hpp +++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/hash_table.hpp @@ -35,10 +35,6 @@ class HashTable : public Variable { : HashTable(other) { m_keys = keys; m_values = values; - // reset names of tensor corresponding to variable value - // that is because variable can have multiple values during inference - m_keys.set_names({}); - m_values.set_names({}); m_is_initialized = true; ++m_init_counter; } diff --git a/src/frontends/tensorflow/src/op/lookup_table_size.cpp b/src/frontends/tensorflow/src/op/lookup_table_size.cpp new file mode 100644 index 00000000000000..42a52d8319d426 --- /dev/null +++ b/src/frontends/tensorflow/src/op/lookup_table_size.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/frontend/tensorflow/hash_table.hpp" +#include "openvino/frontend/tensorflow/node_context.hpp" +#include "openvino/frontend/tensorflow/variable.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/squeeze.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; +using namespace ov::frontend::tensorflow; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_lookup_table_size_op(const NodeContext& node) { + default_op_checks(node, 1, {"LookupTableSize", "LookupTableSizeV2"}); + auto table_handle = as_type_ptr(node.get_input_by_reference(0).get_node_shared_ptr()); + TENSORFLOW_OP_VALIDATION( + node, + table_handle, + "[TensorFlow Frontend] internal error: LookupTableSize operation expects table_handle by the first input"); + + auto all_keys = table_handle->get_keys(); + + // reshape all keys to 1D tensor to work it further + auto target_shape = make_shared(element::i32, Shape{1}, -1); + all_keys = make_shared(all_keys, target_shape, false); + + // compute size of records in HashTable + // table size must be a scalar + ov::Output table_size = make_shared(all_keys, element::i64); + auto squeeze_axis = make_shared(element::i32, Shape{1}, 0); + table_size = make_shared(table_size, squeeze_axis); + set_node_name(node.get_name(), table_size.get_node_shared_ptr()); + + return {table_size}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index f62d55a05fc520..ea0e4bd2643d39 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -97,6 +97,7 @@ TF_OP_CONVERTER(translate_iterator_get_next_op); TF_OP_CONVERTER(translate_iterator_op); TF_OP_CONVERTER(translate_lookup_table_import_op); TF_OP_CONVERTER(translate_lookup_table_find_op); +TF_OP_CONVERTER(translate_lookup_table_size_op); TF_OP_CONVERTER(translate_loop_cond_op); TF_OP_CONVERTER(translate_merge_op); TF_OP_CONVERTER(translate_mergev2checkpoint_op); @@ -301,6 +302,8 @@ const std::map get_supported_ops() { {"LookupTableImportV2", CreatorFunction(translate_lookup_table_import_op)}, {"LookupTableInsert", CreatorFunction(translate_no_op)}, {"LookupTableInsertV2", CreatorFunction(translate_no_op)}, + {"LookupTableSize", CreatorFunction(translate_lookup_table_size_op)}, + {"LookupTableSizeV2", CreatorFunction(translate_lookup_table_size_op)}, {"LRN", CreatorFunction(translate_lrn_op)}, {"MatMul", CreatorFunction(translate_mat_mul_op)}, {"MatrixBandPart", CreatorFunction(translate_matrix_band_part_op)}, diff --git a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp index 925a6d28697d41..1387992792e0a0 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp @@ -14,7 +14,7 @@ namespace intel_cpu { CPURuntimeConfigurator::CPURuntimeConfigurator() : ov::snippets::RuntimeConfigurator(std::make_shared()) { } -void CPURuntimeConfigurator::update(const ov::snippets::lowered::LinearIRPtr& linear_ir) { +void CPURuntimeConfigurator::update(const ov::snippets::lowered::LinearIRCPtr& linear_ir) { if (linear_ir->is_dynamic()) { update_loop_info(linear_ir); update_loop_args(linear_ir); @@ -30,11 +30,11 @@ void CPURuntimeConfigurator::update(const ov::snippets::lowered::LinearIRPtr& li update_latest_shapes(); } -void CPURuntimeConfigurator::init_tensor_rank(const ov::snippets::lowered::LinearIRPtr& linear_ir) const { +void CPURuntimeConfigurator::init_tensor_rank(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const { m_config->tensor_rank = std::max(linear_ir->get_master_shape().size(), rank6D); } -void CPURuntimeConfigurator::update_loop_args(const ov::snippets::lowered::LinearIRPtr& linear_ir) const { +void CPURuntimeConfigurator::update_loop_args(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const { const auto& cpu_config = ov::as_type_ptr(m_config); OPENVINO_ASSERT(cpu_config, "CPURuntimeConfigurator expects CPURuntimeConfig"); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp index f1a21e5982aa1c..93cbb6b598146c 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp @@ -29,17 +29,17 @@ class CPURuntimeConfigurator : public ov::snippets::RuntimeConfigurator { * @brief Update RuntimeConfig based on LinearIR * @param linear_ir LinearIR */ - void update(const ov::snippets::lowered::LinearIRPtr& linear_ir) override; + void update(const ov::snippets::lowered::LinearIRCPtr& linear_ir) override; /** * @brief Initializes tensor rank of config * @param linear_ir LinearIR */ - void init_tensor_rank(const ov::snippets::lowered::LinearIRPtr& linear_ir) const override; + void init_tensor_rank(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const override; /** * @brief Calculate Loop parameters of Loop emitters and update these values in CPURuntimeConfig * @param linear_ir LinearIR */ - void update_loop_args(const ov::snippets::lowered::LinearIRPtr& linear_ir) const; + void update_loop_args(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const; const size_t rank6D = 6; }; diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp index 920f95f0c8bc37..aa917c89dcb016 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp @@ -184,7 +184,7 @@ float BrgemmKernelExecutor::get_beta(const ov::snippets::lowered::LoopManagerPtr return 0; } void BrgemmKernelExecutor::update_config(const ov::snippets::lowered::ExpressionPtr& expr, - const ov::snippets::lowered::LinearIRPtr& linear_ir, + const ov::snippets::lowered::LinearIRCPtr& linear_ir, BrgemmKernelConfig& config) const { const auto& input_pds = expr->get_input_port_descriptors(); const auto& output_pds = expr->get_output_port_descriptors(); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp index b673c61d6d0aef..2549580c1a176c 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp @@ -100,7 +100,7 @@ class BrgemmKernelExecutor : public CPUKernelExecutor compile_kernel(const BrgemmKernelConfig& c) const override; void update_config(const ov::snippets::lowered::ExpressionPtr& expr, - const ov::snippets::lowered::LinearIRPtr& linear_ir, + const ov::snippets::lowered::LinearIRCPtr& linear_ir, BrgemmKernelConfig& config) const override; static float get_beta(const ov::snippets::lowered::LoopManagerPtr& loop_manager, int loop_id, diff --git a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp index 8bfad5e86cf022..829502cefdcd7d 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp @@ -5,12 +5,44 @@ #include "shl_fullyconnected.hpp" #include "csinn/csi_nn.h" +#include "rvv/rvv.h" #include "nodes/executors/executor.hpp" #include "nodes/executors/memory_arguments.hpp" +#include "nodes/common/cpu_memcpy.h" #include "utils/debug_capabilities.h" namespace ov { namespace intel_cpu { +namespace { +static MemoryPtr prepareWeightMemory(const MemoryPtr weightsMemory, const ExecutorContext::CPtr context) { + DEBUG_LOG("ShlFCExecutor: prepack weights"); + + auto create = [&]() { + const auto& weiDesc = weightsMemory->getDescPtr(); + MemoryPtr _ptr = std::make_shared(context->getEngine(), + intel_cpu::CpuBlockedMemoryDesc(ov::element::f32, weightsMemory->getShape())); + cpu_parallel_memcpy(_ptr->getData(), weightsMemory->getData(), weightsMemory->getSize()); + DEBUG_LOG("ShlFCExecutor: cache miss, perform packing"); + const auto repack_wei = ShlTensor(ShlSession(), precisionToShlDataType(weiDesc->getPrecision()), getShlDataLayoutByMemoryDesc(weiDesc, true), + weiDesc->getShape().getStaticDims(), _ptr->getData()); + shl_rvv_fc_gemm_reorder_weight_fp32(repack_wei.get()); + return _ptr; + }; + + auto weightCache = context->getWeightsCache(); + if (weightCache != nullptr) { + const auto& wgtDims = weightsMemory->getStaticDims(); + std::string format = "gemm_shl_" + std::to_string(wgtDims[0]) + "_" + std::to_string(wgtDims[1]); + const std::string string_hash = format + "_" + std::to_string(weightsMemory->getSize()) + "_" + + std::to_string(reinterpret_cast(weightsMemory->getData())); + DEBUG_LOG("ShlFCExecutor: findOrCreate, string_hash: ", string_hash); + return *weightCache->findOrCreate(string_hash, create); + } + + DEBUG_LOG("ShlFCExecutor: Weights cache is not available"); + return create(); +} +} // namespace bool ShlFCExecutor::supports(const FCConfig& config) { if (config.attrs.weightsNonTransposed) { @@ -53,7 +85,8 @@ bool ShlFCExecutor::supports(const FCConfig& config) { ShlFCExecutor::ShlFCExecutor(const FCAttrs& attrs, const PostOps& postOps, const MemoryArgs& memory, - const ExecutorContext::CPtr context) { + const ExecutorContext::CPtr context) + : packedWeights(prepareWeightMemory(memory.at(ARG_WEI), context)) { const auto& srcDesc = memory.at(ARG_SRC)->getDescPtr(); const auto& weiDesc = memory.at(ARG_WEI)->getDescPtr(); const auto& dstDesc = memory.at(ARG_DST)->getDescPtr(); @@ -93,7 +126,7 @@ bool ShlFCExecutor::update(const MemoryArgs& memory) { void ShlFCExecutor::execute(const MemoryArgs& memory) { src.setData(memory.at(ARG_SRC)->getData()); - wei.setData(memory.at(ARG_WEI)->getData()); + wei.setData(packedWeights->getData()); dst.setData(memory.at(ARG_DST)->getData()); if (with_bias) { bias.setData(memory.at(ARG_BIAS)->getData()); diff --git a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.hpp b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.hpp index 76d742080abcfa..129b2e35867809 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.hpp @@ -36,6 +36,8 @@ class ShlFCExecutor : public Executor { ShlSession sess = {}; ShlFCParams params = {}; + const MemoryCPtr packedWeights; + bool with_bias = false; }; using ShlFCExecutorPtr = std::shared_ptr; diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index d6d127eb6981e4..86896ad3b4ca5f 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -746,15 +746,34 @@ void Subgraph::prepareParams() { const auto cache = context->getParamsCache(); auto builder = [this, cache](const SubgraphKey& key) -> std::shared_ptr { - const auto& snippet_config = ov::as_type_ptr(subgraph_attrs->snippet->update_runtime_config()); - // Firstly, find the schedule in the cache - const auto code_gen_result = cache->getOrCreate(SubgraphCodeGeneratorKey(subgraph_attrs, getBroadcastingMask(in_shapes)), - [&snippet_config](const SubgraphCodeGeneratorKey& key) -> std::shared_ptr { - return std::make_shared(key.attrs, snippet_config); - }); + const auto& snippet = subgraph_attrs->snippet; if (is_dynamic) { - return std::make_shared(key.attrs, code_gen_result.first, start_offset_in, start_offset_out, snippet_config); + // Dynamic case: + // 1. Generate JIT code if needed + // 2. Update runtime config with dynamic values + // If JIT code has been taken from cache, need to set cached kernel executor table for the configuration + // 3. Create SubgraphDynamicSpecializedExecutor + const auto code_gen_result = cache->getOrCreate(SubgraphCodeGeneratorKey(subgraph_attrs, getBroadcastingMask(in_shapes)), + [](const SubgraphCodeGeneratorKey& key) -> std::shared_ptr { + return std::make_shared(key.attrs, std::make_shared()); + }); + const auto& code_gen = code_gen_result.first; + // [148644] : Update Kernel table from SubgraphCodeGenerator when JIT code was already generated with specific Kernel table + if (code_gen_result.second == CacheEntryBase::LookUpStatus::Hit) { + snippet->get_runtime_configurator()->set_kernel_executor_table(code_gen->get()->lowering_result.kernel_executor_table); + } + const auto& snippet_config = ov::as_type_ptr(snippet->update_runtime_config()); + return std::make_shared(key.attrs, code_gen, start_offset_in, start_offset_out, snippet_config); } else { + // Static case: + // 1. Update runtime config to get static scheduling data (io data offsets, parallel domain) which will be compiled in JIT code + // 2. Generate JIT code with this static data if needed + // 3. Create SubgraphStaticExecutor + const auto& snippet_config = ov::as_type_ptr(snippet->update_runtime_config()); + const auto code_gen_result = cache->getOrCreate(SubgraphCodeGeneratorKey(subgraph_attrs, getBroadcastingMask(in_shapes)), + [&snippet_config](const SubgraphCodeGeneratorKey& key) -> std::shared_ptr { + return std::make_shared(key.attrs, snippet_config); + }); return std::make_shared(key.attrs, code_gen_result.first, start_offset_in, start_offset_out, snippet_config); } }; diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/subgraph_caching.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/subgraph_caching.cpp new file mode 100644 index 00000000000000..f9f17154dcca68 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/subgraph_caching.cpp @@ -0,0 +1,125 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Motivation: +// In a dynamic scenario, depending on the input shapes for the current node, +// - we can either generate a new jit kernel or get an existing one from the cache +// - we can either make shape inference or get existing output shapes from the cache +// But the current single layer tests do not allow checking the case when the same kernel can be used for different nodes. +// We check 2 Subgraphs with MatMuls inside to validate Kernel Executor table also + +// ----------- ----------- ----------- ----------- +// |input 0.0| |input 0.1| |input 1.0| |input 1.1| +// ----------- ----------- ----------- ----------- +// | | | | +// ------------------------------------ ------------------------------------ +// | MatMul 0 | | Matmul 1 | +// ------------------------------------ ------------------------------------ +// | | +// ------------------------------------ ------------------------------------ +// | Add 0 | | Add 1 | +// ------------------------------------ ------------------------------------ +// | | +// ---------------------------------------------------------------------------- +// | concat | +// ---------------------------------------------------------------------------- +// | +// -------- +// |output| +// -------- + +#include "snippets/op/subgraph.hpp" +#include "common_test_utils/common_utils.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" +#include "common_test_utils/node_builders/eltwise.hpp" +#include "common_test_utils/node_builders/constant.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "utils/cpu_test_utils.hpp" +#include "internal_properties.hpp" + +namespace ov { +namespace test { +using namespace ov::test::utils; + +typedef std::tuple< + std::vector, // Input Shapes + ElementType // Input precisions +> SubgraphCacheTestParams; + +class SubgraphCacheTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + std::vector inputShapes; + ElementType inputPrecision; + std::tie(inputShapes, inputPrecision) = obj.param; + + std::ostringstream results; + + for (size_t i = 0; i < inputShapes.size(); i++) { + results << "IS[" << i << "]=" << inputShapes[i]; + } + + results << "InPRC" << "=" << inputPrecision << "_"; + + return results.str(); + } + +protected: + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_CPU; + + std::vector inputShapes; + ElementType inputPrecision; + std::tie(inputShapes, inputPrecision) = this->GetParam(); + + init_input_shapes(inputShapes); + + // Enable Snippets + configuration.insert(ov::intel_cpu::snippets_mode(ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK)); + + ov::ParameterVector paramVec; + for (size_t i = 0; i < inputDynamicShapes.size(); i++) { + paramVec.push_back(std::make_shared(inputPrecision, inputDynamicShapes[i])); + } + + auto matmul0 = std::make_shared(paramVec[0], paramVec[1]); + auto matmul1 = std::make_shared(paramVec[2], paramVec[3]); + + auto const0 = utils::make_constant(matmul0->get_output_element_type(0), ov::Shape{1}); + auto const1 = utils::make_constant(matmul1->get_output_element_type(0), ov::Shape{1}); + + auto add0 = std::make_shared(matmul0, const0); + auto add1 = std::make_shared(matmul1, const1); + + auto concat = std::make_shared(ov::NodeVector{add0, add1}, -1); + function = std::make_shared(concat, paramVec, "Subgraph"); + } +}; + +TEST_P(SubgraphCacheTest, CompareWithRefs) { + run(); + + CPUTestUtils::CheckNumberOfNodesWithType(compiledModel, "MatMul", 0); + CPUTestUtils::CheckNumberOfNodesWithType(compiledModel, "Subgraph", 2); +} + +namespace { + +std::vector inputShapes { + {{1, 2, -1, -1}, {{1, 2, 10, 3}, {1, 2, 10, 3}, {1, 2, 10, 8}, {1, 2, 10, 3}}}, + {{1, 2, -1, -1}, {{1, 2, 3, 12}, {1, 2, 3, 12}, {1, 2, 8, 9}, {1, 2, 3, 12}}}, + {{1, 2, -1, -1}, {{1, 2, 10, 8}, {1, 2, 10, 3}, {1, 2, 10, 3}, {1, 2, 10, 8}}}, + {{1, 2, -1, -1}, {{1, 2, 8, 9}, {1, 2, 3, 12}, {1, 2, 3, 12}, {1, 2, 8, 9}}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_SubgraphCache, SubgraphCacheTest, + ::testing::Combine( + ::testing::Values(inputShapes), + ::testing::Values(ElementType::f32)), + SubgraphCacheTest::getTestCaseName); + +} // namespace +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/thirdparty/shl b/src/plugins/intel_cpu/thirdparty/shl index 3a7d230ab1ab39..9c7294c066edee 160000 --- a/src/plugins/intel_cpu/thirdparty/shl +++ b/src/plugins/intel_cpu/thirdparty/shl @@ -1 +1 @@ -Subproject commit 3a7d230ab1ab39b29222ec78cbc3f4e4c3bf7a56 +Subproject commit 9c7294c066edee808a47f2a714f84203cd643f9f diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp index eb51b1dfb37307..4e5c53d6b37e3e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp @@ -9,7 +9,6 @@ #include "openvino/core/except.hpp" #include "intel_gpu/primitives/primitive.hpp" -#include "intel_gpu/runtime/tensor.hpp" namespace cldnn { diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp index 2a0b81b2aba20d..b2497c6d711d7b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp @@ -160,6 +160,8 @@ struct non_max_suppression : public primitive_base { struct non_max_suppression_gather : primitive_base { CLDNN_DECLARE_PRIMITIVE(non_max_suppression_gather) + non_max_suppression_gather() : primitive_base("", {}) {} + /// @brief Constructs non_max_suppression_gather primitive. /// @param id This primitive id. /// @param inputs Input primitives ids. diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/unique.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/unique.hpp index 5dc7e61bc21734..5563bf8acf54d5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/unique.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/unique.hpp @@ -13,6 +13,8 @@ namespace cldnn { struct unique_count : primitive_base { CLDNN_DECLARE_PRIMITIVE(unique_count) + unique_count() : primitive_base("", {}) {} + /// @brief Constructs unique_count primitive. /// @param id This primitive id. /// @param input Input primitive id. @@ -45,6 +47,8 @@ struct unique_count : primitive_base { struct unique_gather : primitive_base { CLDNN_DECLARE_PRIMITIVE(unique_gather) + unique_gather() : primitive_base("", {}) {} + /// @brief Constructs unique_gather primitive. /// @param id This primitive id. /// @param inputs Input primitives ids. diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp index d0105b0e83a028..63f0311f675123 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp @@ -18,7 +18,7 @@ const uint32_t INTEL_VENDOR_ID = 0x8086; struct device { public: using ptr = std::shared_ptr; - virtual device_info get_info() const = 0; + virtual const device_info& get_info() const = 0; virtual memory_capabilities get_mem_caps() const = 0; virtual bool is_same(const device::ptr other) = 0; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp index 7e77ceb6785cb5..79e37d1890b78d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp @@ -94,7 +94,7 @@ class engine { bool supports_allocation(allocation_type type) const; /// Returns device structure which represents stores device capabilities - device_info get_device_info() const; + const device_info& get_device_info() const; /// Returns device object associated with the engine const device::ptr get_device() const; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp index aec9e8b5f497e6..3599e68301da29 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp @@ -7,6 +7,7 @@ #include "reshape_inst.h" #include "eltwise_inst.h" #include "select_inst.h" +#include "strided_slice_inst.h" #include "gather_inst.h" #include "pass_manager.h" @@ -78,6 +79,13 @@ bool mark_shape_of_subgraphs::can_mark_node(const program_node& node) { return false; } + // Exclude stride_slice primitive if it's input is big const ternsor, else CPU reference implementation + // will lead to huge performance drop. + if (node.is_type() && node.get_dependency(0).is_constant() && + node.get_dependency(0).get_output_layout().count() > 1024 * 1024) { + return false; + } + auto available_impls = node.type()->get_available_impls(node); auto cpu_impl_found = available_impls.find(impl_types::cpu) != available_impls.end(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp index f7b4db99afa092..5441d4a7930a51 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp @@ -4,7 +4,7 @@ #include "pass_manager.h" #include "program_helpers.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "convolution_inst.h" #include "deconvolution_inst.h" diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp index 64895c4b6f2814..f63f1bf4efbe21 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "intel_gpu/runtime/error_handler.hpp" #include "pass_manager.h" #include "program_helpers.h" #include "strided_slice_inst.h" diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index cdc56673f8c6a9..65cf9a692c91b8 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -937,14 +937,35 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf) } }; + const auto reorder_input_concat = [&p, &rf](typed_program_node& concat_node) { + auto output_layout = concat_node.get_output_layout(); + // Iterate over all dependencies of the concat node + for (size_t i = 0; i < concat_node.get_dependencies().size(); ++i) { + auto dep = concat_node.get_dependency_with_port(i); + const auto& input = dep.first; + auto input_layout = input->get_output_layout(); + // Change input data type of concat node from input format to output format + if (input_layout.format != output_layout.format) { + auto new_layout = input_layout; + new_layout.format = output_layout.format; + auto new_input = rf.get_reorder(input->id(), dep.second, input_layout, new_layout); + if (new_input.first) { + p.add_intermediate(new_input.first, concat_node, i); + concat_node.get_dependency_with_port(i).first->recalc_output_layout(); + } + } + } + }; + for (auto& prim : p.get_processing_order()) { - program_helpers::do_for_types( + program_helpers::do_for_types( *prim, reorder_input_detection_output, reorder_input_and_weights_deconvolution, reorder_convolution, reorder_input_fully_connected, - reorder_input_pooling); + reorder_input_pooling, + reorder_input_concat); } for (auto n : p.get_processing_order()) { diff --git a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp index 49b2ab5aa38c0b..300d93bc96f708 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp @@ -4,7 +4,7 @@ #include "condition_inst.h" #include "data_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" #include diff --git a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp index b11fb675f76196..f8aac08a07af04 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp @@ -2,11 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // #include "loop_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" #include "mutable_data_inst.h" #include "input_layout_inst.h" -#include "intel_gpu/runtime/error_handler.hpp" #include #include diff --git a/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp b/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp index d8cce52a0aa167..35b433933d1295 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp @@ -6,7 +6,7 @@ #include "data_inst.h" #include "prior_box_inst.h" #include "input_layout_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" #include @@ -54,7 +54,7 @@ class wait_for_events_impl : public primitive_impl { return make_unique(prior_box); } - void update_dispatch_data(const kernel_impl_params& impl_param) override { } + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override { } }; namespace detail { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp index 16084f47efea1f..e750303b955d77 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp @@ -5,9 +5,7 @@ #include "openvino/core/type/element_type_traits.hpp" #include "register.hpp" #include "activation_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/power.hpp" #include "openvino/op/tanh.hpp" @@ -290,7 +288,7 @@ struct activation_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const activation_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp index 7d11374f178c23..d03c49fb28efbe 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp @@ -3,9 +3,8 @@ // #include "assign_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" -#include "intel_gpu/runtime/error_handler.hpp" namespace cldnn { namespace cpu { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp index 515615f700a847..79a6b77f442cba 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "broadcast_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/broadcast.hpp" @@ -124,7 +122,7 @@ struct broadcast_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const broadcast_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp index 85fd52fa3a24b6..6b7a483bae7d8c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "concatenation_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/concat.hpp" @@ -111,7 +109,7 @@ struct concatenation_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const concatenation_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp index 7b58dcdb20010c..6633bca02da8d2 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp @@ -6,9 +6,7 @@ #include "register.hpp" #include "crop_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/slice.hpp" @@ -113,7 +111,7 @@ struct crop_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const crop_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index 364ac62d1d1510..c2a01b56c63740 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -3,7 +3,7 @@ // #include "detection_output_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" #include "cpu_impl_helpers.hpp" diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp index 67fd065412fe12..eb10f340d2656b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "eltwise_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/add.hpp" #include "openvino/op/multiply.hpp" @@ -205,7 +203,7 @@ struct eltwise_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const eltwise_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp index 5a6de3fd749e4f..242273a23dd000 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "gather_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/gather.hpp" @@ -114,7 +112,7 @@ struct gather_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const gather_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp index f38efcd5c0d30c..4783159d501404 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp @@ -6,7 +6,7 @@ #include "primitive_inst.h" #include "register.hpp" #include "cpu_impl_helpers.hpp" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include #include diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp index 6e89d4d25d8106..e49cb3a832f8ae 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp @@ -4,8 +4,7 @@ #include "proposal_inst.h" #include "intel_gpu/runtime/engine.hpp" -#include "implementation_map.hpp" -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" #include @@ -457,9 +456,7 @@ struct proposal_impl : typed_primitive_impl { // - image_info[3] = { img_height, img_width, img_depth } // - image_info[4] = { img_height, img_width, scale_min_bbox_y, scale_min_bbox_x } // - image_info[6] = { img_height, img_width, img_depth, scale_min_bbox_y, scale_min_bbox_x, scale_depth_index } - if (count != 3 && count != 4 && count != 6) { - CLDNN_ERROR_MESSAGE(arg.id(), "image_info must have either 3, 4 or 6 items"); - } + OPENVINO_ASSERT(one_of(count, {3, 4, 6}), arg.id(), "image_info must have either 3, 4 or 6 items"); } return make_unique(arg); diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp index 26515c0a35cd92..83142812f29e8b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "range_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/range.hpp" @@ -91,7 +89,7 @@ struct range_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const range_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp index 20a8a4afa0e8e3..6c16618ac816d0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp @@ -3,9 +3,8 @@ // #include "read_value_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "register.hpp" -#include "intel_gpu/runtime/error_handler.hpp" namespace cldnn { namespace cpu { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp index 80bd72f74528b2..5a3867f9d1582a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp @@ -4,7 +4,7 @@ #include "register.hpp" #include "reduce_inst.h" -#include "implementation_map.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/reduce_max.hpp" #include "openvino/op/reduce_sum.hpp" @@ -149,7 +149,7 @@ struct reduce_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const reduce_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp index 98c5d618aebcfa..1b6f145c4ceb2d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "reorder_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/convert.hpp" @@ -84,7 +82,7 @@ struct reorder_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const reorder_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp index 13b97cdf818726..1a329ea495ef82 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "scatter_update_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/scatter_update.hpp" @@ -106,7 +104,7 @@ struct scatter_update_impl : public typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} - void update_dispatch_data(const kernel_impl_params& impl_param) override {} + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} public: static std::unique_ptr create(const scatter_update_node& arg, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp index 47728050f9731c..9c9ab75f64ad59 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp @@ -4,9 +4,7 @@ #include "register.hpp" #include "select_inst.h" -#include "implementation_map.hpp" - -#include "intel_gpu/runtime/error_handler.hpp" +#include "impls/registry/implementation_map.hpp" #include "openvino/op/select.hpp" @@ -101,7 +99,7 @@ struct select_impl : public typed_primitive_impl