diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index 752e7be8de1616..ab6b96f62ffc2f 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -21,7 +21,7 @@ running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. .. csv-table:: :class: modeldata stripe - :name: supportedModelsTable + :name: supportedModelsTableOv :header-rows: 1 :file: ../../_static/download/llm_models.csv @@ -29,7 +29,7 @@ running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. .. csv-table:: :class: modeldata stripe - :name: supportedModelsTable + :name: supportedModelsTableOvms :header-rows: 1 :file: ../../_static/download/llm_models_ovms.csv diff --git a/docs/articles_en/assets/snippets/main.py b/docs/articles_en/assets/snippets/main.py index 4d5429cd4b7925..ab60e66db158de 100644 --- a/docs/articles_en/assets/snippets/main.py +++ b/docs/articles_en/assets/snippets/main.py @@ -9,7 +9,7 @@ from contextlib import redirect_stdout, redirect_stderr -skip_snippets = ["main.py", "__init__.py", "utils.py", "ov_common.py", "ov_stateful_model_intro.py"] +skip_snippets = ["main.py", "__init__.py", "utils.py", "ov_common.py", "ov_stateful_models_intro.py"] def import_python_modules(directory, subdirectory=""): for item in os.listdir(directory): diff --git a/docs/articles_en/assets/snippets/ov_stateful_model_intro.py b/docs/articles_en/assets/snippets/ov_stateful_models_intro.py similarity index 100% rename from docs/articles_en/assets/snippets/ov_stateful_model_intro.py rename to docs/articles_en/assets/snippets/ov_stateful_models_intro.py diff --git a/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst b/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst index a7db3317203045..b9e5c35fe1c065 100644 --- a/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst +++ b/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst @@ -51,43 +51,57 @@ Parameter/Result tensor names. If there are no tensor names, .. tab-set:: - .. tab-item:: C++ + .. tab-item:: Python + :sync: py .. tab-set:: .. tab-item:: Using tensor names + :sync: using-tensor-names - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp - :language: cpp + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py :fragment: [ov:make_stateful_tensor_names] - + + .. tab-item:: Using Parameter/Result operations + :sync: using-ops + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py :language: py + :fragment: [ov:make_stateful_ov_nodes] + + .. tab-item:: C++ + :sync: cpp + + .. tab-set:: + + .. tab-item:: Using tensor names + :sync: using-tensor-names + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp + :language: cpp :fragment: [ov:make_stateful_tensor_names] .. tab-item:: Using Parameter/Result operations + :sync: using-ops .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:make_stateful_ov_nodes] - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py - :language: py - :fragment: [ov:make_stateful_ov_nodes] .. tab-item:: command line + :sync: command-line .. tab-set:: .. tab-item:: Using tensor names + :sync: using-tensor-names .. code-block:: sh --input_model --transform "MakeStateful[param_res_names={'tensor_name_1':'tensor_name_4','tensor_name_3':'tensor_name_6'}]" - - .. _ov_ug_low_latency: LowLatency2 Transformation @@ -117,15 +131,20 @@ To apply LowLatency2 Transformation, follow the instruction below: .. tab-set:: + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:get_ov_model] + .. tab-item:: C++ + :sync: cpp .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:get_ov_model] - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py - :language: py - :fragment: [ov:get_ov_model] + 2. Change the number of iterations inside TensorIterator/Loop nodes in the model using the :doc:`Reshape <../changing-input-shape>` feature. @@ -136,15 +155,20 @@ To apply LowLatency2 Transformation, follow the instruction below: .. tab-set:: + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:reshape_ov_model] + .. tab-item:: C++ + :sync: cpp .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:reshape_ov_model] - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py - :language: py - :fragment: [ov:reshape_ov_model] + **Unrolling**: If the LowLatency2 transformation is applied to a model containing TensorIterator/Loop nodes with exactly one iteration inside, these nodes are unrolled. @@ -154,15 +178,19 @@ To apply LowLatency2 Transformation, follow the instruction below: .. tab-set:: + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:apply_low_latency_2] + .. tab-item:: C++ + :sync: cpp .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:apply_low_latency_2] - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py - :language: py - :fragment: [ov:apply_low_latency_2] (Optional) Use Const Initializer argument: @@ -174,15 +202,19 @@ To apply LowLatency2 Transformation, follow the instruction below: .. tab-set:: + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:low_latency_2_use_parameters] + .. tab-item:: C++ + :sync: cpp .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:low_latency_2_use_parameters] - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py - :language: py - :fragment: [ov:low_latency_2_use_parameters] .. image:: ../../../assets/images/llt2_use_const_initializer.svg @@ -197,15 +229,19 @@ To apply LowLatency2 Transformation, follow the instruction below: .. tab-set:: + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:low_latency_2] + .. tab-item:: C++ + :sync: cpp .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:low_latency_2] - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py - :language: py - :fragment: [ov:low_latency_2] 4. Use state API. See sections :doc:`OpenVINO State API <../stateful-models>`, @@ -231,15 +267,20 @@ To apply LowLatency2 Transformation, follow the instruction below: .. tab-set:: + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:replace_const] + .. tab-item:: C++ + :sync: cpp .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:replace_const] - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py - :language: py - :fragment: [ov:replace_const] + Stateful Model from Scratch ################################## @@ -256,15 +297,20 @@ a sink from `ov::Model` after deleting the node from the graph with the `delete_ .. tab-set:: + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:stateful_model] + .. tab-item:: C++ + :sync: cpp .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:stateful_model] - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py - :language: py - :fragment: [ov:stateful_model] + .. note::