Merge branch 'master' into river/loaded_from_cache

riverlijunjie · Jan 15, 2024 · 6851740 · 6851740
2 parents 477a1d3 + 938600f
commit 6851740
Show file tree

Hide file tree

Showing 47 changed files with 579 additions and 442 deletions.
diff --git a/cmake/developer_package/plugins/create_plugins_hpp.cmake b/cmake/developer_package/plugins/create_plugins_hpp.cmake
@@ -42,10 +42,10 @@ foreach(dev_map IN LISTS OV_DEVICE_MAPPING)
 
         # declarations
         set(OV_PLUGINS_DECLARATIONS "${OV_PLUGINS_DECLARATIONS}
-        IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_OV_CREATE_PLUGIN_FUNC});")
+        OV_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_OV_CREATE_PLUGIN_FUNC});")
         if(${actual_dev_name}_AS_EXTENSION)
             set(OV_PLUGINS_DECLARATIONS "${OV_PLUGINS_DECLARATIONS}
-            IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_OV_CREATE_EXTENSION_FUNC});")
+            OV_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_OV_CREATE_EXTENSION_FUNC});")
         else()
             set(_OV_CREATE_EXTENSION_FUNC "nullptr")
         endif()

diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake
@@ -80,7 +80,7 @@ function(ov_add_plugin)
             if(OV_PLUGIN_AS_EXTENSION)
                 # to distinguish functions creating extensions objects
                 target_compile_definitions(${OV_PLUGIN_NAME} PRIVATE
-                    IE_CREATE_EXTENSION=CreateExtensionShared${OV_PLUGIN_DEVICE_NAME})
+                    OV_CREATE_EXTENSION=CreateExtensionShared${OV_PLUGIN_DEVICE_NAME})
             endif()
         endif()
 

diff --git a/cmake/developer_package/plugins/plugins.hpp.in b/cmake/developer_package/plugins/plugins.hpp.in
@@ -9,13 +9,23 @@
 
 #ifdef OPENVINO_STATIC_LIBRARY
 
-#include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
+// The Macro used to create extensions for static library
+#define OV_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(_OV_CREATE_EXTENSION_FUNC) \
+    OPENVINO_EXTENSION_C_API void                                                  \
+    _OV_CREATE_EXTENSION_FUNC(std::vector<::ov::Extension::Ptr>& ext)
+
+// The Macro used to create plugin for static library
+#define OV_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(_OV_CREATE_PLUGIN_FUNC) \
+    OPENVINO_PLUGIN_API void                                                 \
+    _OV_CREATE_PLUGIN_FUNC(::std::shared_ptr<::ov::IPlugin> &plugin) noexcept(false)
 
 @OV_PLUGINS_DECLARATIONS@
 
+using CreateExtensionFunc = void(std::vector<::ov::Extension::Ptr>&);
+using CreatePluginEngineFunc = void(std::shared_ptr<::ov::IPlugin>&);
 struct Value {
-    InferenceEngine::CreatePluginEngineFunc * m_create_plugin_func;
-    InferenceEngine::CreateExtensionFunc * m_create_extension_func;
+    CreatePluginEngineFunc * m_create_plugin_func;
+    CreateExtensionFunc * m_create_extension_func;
     std::map<std::string, std::string> m_default_config;
 };
 

diff --git a/docs/articles_en/openvino_workflow/model_preparation.rst b/docs/articles_en/openvino_workflow/model_preparation.rst
@@ -15,7 +15,6 @@ Model Preparation
    Convert to OpenVINO Model <openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_IR>
    Conversion Parameters <openvino_docs_OV_Converter_UG_Conversion_Options>
    Setting Input Shapes <openvino_docs_OV_Converter_UG_prepare_model_convert_model_Converting_Model>
-   PyVision preprocessing <pytorch_vision>
 
 
 You can obtain a model in one of supported formats, **PyTorch, TensorFlow, TensorFlow Lite, ONNX, and PaddlePaddle**,

diff --git a/docs/articles_en/openvino_workflow/model_preparation/pytorch_vision.rst b/docs/articles_en/openvino_workflow/model_preparation/pytorch_vision.rst
diff --git a/...workflow/running_inference_with_openvino/dldt_deployment_optimization_guide.rst b/...workflow/running_inference_with_openvino/dldt_deployment_optimization_guide.rst
@@ -13,10 +13,11 @@ Optimize Inference
    openvino_docs_OV_UG_Precision_Control
    openvino_docs_deployment_optimization_guide_latency
    openvino_docs_deployment_optimization_guide_tput
-   openvino_docs_deployment_optimization_guide_tput_advanced
+   Advanced Throughput Options <openvino_docs_deployment_optimization_guide_tput_advanced>
    openvino_docs_OV_UG_Preprocessing_Overview
    openvino_docs_deployment_optimization_guide_internals
-   openvino_docs_memory_optimization_guide
+   Optimizing memory usage <openvino_docs_memory_optimization_guide>
+
 
 .. meta::
    :description: Improving inference performance involves model and runtime 

diff --git a/...envino/dldt_deployment_optimization_guide/dldt_deployment_optimization_tput.rst b/...envino/dldt_deployment_optimization_guide/dldt_deployment_optimization_tput.rst
@@ -10,8 +10,15 @@ Optimizing for Throughput
                  simultaneously which improves the device utilization.
 
 
-As described in the section on the :doc:`latency-specific considerations <openvino_docs_deployment_optimization_guide_latency>`, one of the possible use cases is *delivering every single request at the minimal delay*.
-Throughput, on the other hand, is about inference scenarios in which potentially **large number of inference requests are served simultaneously to improve the device utilization**.
+.. toctree::
+   :maxdepth: 1
+   :hidden:
+
+   Advanced Throughput Options <openvino_docs_deployment_optimization_guide_tput_advanced>
+
+
+As described in the section on the :doc:`latency-specific optimizations <openvino_docs_deployment_optimization_guide_latency>`, one of the possible use cases is delivering every single request with minimal delay.
+Throughput, on the other hand, is about inference scenarios in which potentially **large numbers of inference requests are served simultaneously to improve resource use**.
 
 The associated increase in latency is not linearly dependent on the number of requests executed in parallel.
 A trade-off between overall throughput and serial performance of individual requests can be achieved with the right performance configuration of OpenVINO.

diff --git a/...dt_deployment_optimization_guide/dldt_deployment_optimization_tput_advanced.rst b/...dt_deployment_optimization_guide/dldt_deployment_optimization_tput_advanced.rst
@@ -1,14 +1,14 @@
 .. {#openvino_docs_deployment_optimization_guide_tput_advanced}
 
-Using Advanced Throughput Options: Streams and Batching
+Advanced Throughput Options: Streams and Batching
 =======================================================
 
 
 .. meta::
    :description: With OpenVINO streams a device may handle processing multiple 
                  inference requests and the batching helps to saturate the 
                  device and leads to higher throughput.
-
+ 
 
 OpenVINO Streams
 ####################

diff --git a/...nference_with_openvino/dldt_deployment_optimization_guide/precision_control.rst b/...nference_with_openvino/dldt_deployment_optimization_guide/precision_control.rst
@@ -9,9 +9,8 @@ The choice of data types is essential to the inference runtime, which can have a
 1. Model storage precision (IR precision),
 2. Model inference precision.
 
-Previously, these 2 precisions were interrelated, and model storage precision could affect the inference precision in some devices (e.g. GPU did ``f16`` inference only for ``f16`` IRs).
-
-With the ``2023.0`` release this behavior has been changed and the inference precision no longer depends on the precision of IR. Now users have several knobs to find the balance between model performance and accuracy.
+Inference precision no longer depends on the precision of IR, which means that users
+have several options to find the balance between model performance and accuracy.
 
 Essentially, the IR precision becomes a way of compressing the model by reducing the precision of the weights, and it does not affect how the devices execute the model. This change clears up a lot of confusion where, for example, you couldn't execute a high-performance model on the GPU by default, and the behavior between devices was different. 
 

diff --git a/...nce_with_openvino/dldt_deployment_optimization_guide/preprocessing_overview.rst b/...nce_with_openvino/dldt_deployment_optimization_guide/preprocessing_overview.rst
@@ -11,6 +11,7 @@ Optimize Preprocessing
    openvino_docs_OV_UG_Preprocessing_Details
    openvino_docs_OV_UG_Layout_Overview
    openvino_docs_OV_UG_Preprocess_Usecase_save
+   Torchvision preprocessing converter <torchvision_preprocessing_converter>
 
 .. meta::
    :description: The preprocessing entails additional operations to transform 

diff --git a/...timization_guide/preprocessing_overview/torchvision_preprocessing_converter.rst b/...timization_guide/preprocessing_overview/torchvision_preprocessing_converter.rst
@@ -0,0 +1,71 @@
+.. {#torchvision_preprocessing_converter}
+
+Torchvision preprocessing converter
+=======================================
+
+
+.. meta::
+   :description: See how OpenVINO™ enables torchvision preprocessing
+                 to optimize model inference.
+
+
+The Torchvision-to-OpenVINO converter enables automatic translation of operators from the torchvision
+preprocessing pipeline to the OpenVINO format and embed them in your model. It is often used to adjust
+images serving as input for AI models to have proper dimensions or data types.
+
+As the converter is fully based on the **openvino.preprocess** module, you can implement the **torchvision.transforms**
+feature easily and without the use of external libraries, reducing the overall application complexity
+and enabling additional performance optimizations.
+
+
+.. note::
+
+   Not all torchvision transforms are supported yet. The following operations are available:
+
+   .. code-block::
+
+      transforms.Compose
+      transforms.Normalize
+      transforms.ConvertImageDtype
+      transforms.Grayscale
+      transforms.Pad
+      transforms.ToTensor
+      transforms.CenterCrop
+      transforms.Resize
+
+
+Example
+###################
+
+.. code-block:: py
+
+   preprocess_pipeline = torchvision.transforms.Compose(
+       [
+           torchvision.transforms.Resize(256, interpolation=transforms.InterpolationMode.NEAREST),
+           torchvision.transforms.CenterCrop((216, 218)),
+           torchvision.transforms.Pad((2, 3, 4, 5), fill=3),
+           torchvision.transforms.ToTensor(),
+           torchvision.transforms.ConvertImageDtype(torch.float32),
+           torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+       ]
+   )
+
+   torch_model = SimpleConvnet(input_channels=3)
+
+   torch.onnx.export(torch_model, torch.randn(1, 3, 224, 224), "test_convnet.onnx", verbose=False, input_names=["input"], output_names=["output"])
+   core = Core()
+   ov_model = core.read_model(model="test_convnet.onnx")
+
+   test_input = np.random.randint(255, size=(260, 260, 3), dtype=np.uint16)
+   ov_model = PreprocessConverter.from_torchvision(
+       model=ov_model, transform=preprocess_pipeline, input_example=Image.fromarray(test_input.astype("uint8"), "RGB")
+   )
+   ov_model = core.compile_model(ov_model, "CPU")
+   ov_input = np.expand_dims(test_input, axis=0)
+   output = ov_model.output(0)
+   ov_result = ov_model(ov_input)[output]
+
+
+
+
+
diff --git a/docs/dev/get_started.md b/docs/dev/get_started.md
@@ -18,4 +18,4 @@ Explore other resources to learn more about OpenVINO:
  * [OpenVINO Developer Documentation](./index.md)
  * [OpenVINO Samples](../../samples)
  * [OpenVINO Building Documentation](./building_documentation.md)
- * [CMake Options for Custom Compilation](./cmake_options_for_custom_comiplation.md)
+ * [CMake Options for Custom Compilation](./cmake_options_for_custom_compilation.md)
diff --git a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp
@@ -67,18 +67,19 @@ void AllocateBuffers::set_buffer_offset(const ExpressionPtr& buffer_expr, const
 bool AllocateBuffers::run(lowered::LinearIR& linear_ir) {
     OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::AllocateBuffers");
     m_buffer_scratchpad_size = 0;
-    PassPipeline pipeline;
+
     if (m_is_optimized_mode) {
         BufferClusters buffer_clusters;
+        PassPipeline pipeline;
         pipeline.register_pass<EnumerateExpressions>();
         pipeline.register_pass<IdentifyBuffers>();
         pipeline.register_pass<DefineBufferClusters>(buffer_clusters);
         pipeline.register_pass<SolveBufferMemory>(m_buffer_scratchpad_size, buffer_clusters);
         pipeline.register_pass<NormalizeBufferIDs>();
+        pipeline.run(linear_ir);
     } else {
-        pipeline.register_pass<InitBuffersDefault>(m_buffer_scratchpad_size);
+        InitBuffersDefault(m_buffer_scratchpad_size).run(linear_ir);
     }
-    pipeline.run(linear_ir);
 
     return m_buffer_scratchpad_size > 0;
 }

diff --git a/src/core/include/openvino/core/extension.hpp b/src/core/include/openvino/core/extension.hpp
@@ -28,24 +28,28 @@ class OPENVINO_API Extension {
 
     virtual ~Extension();
 };
+}  // namespace ov
 
+#ifndef OV_CREATE_EXTENSION
 /**
  * @brief The entry point for library with OpenVINO extensions
  *
  * @param vector of extensions
  */
 OPENVINO_EXTENSION_C_API
-void create_extensions(std::vector<Extension::Ptr>&);
+void create_extensions(std::vector<ov::Extension::Ptr>&);
 
-}  // namespace ov
+#    define OV_CREATE_EXTENSION create_extensions
+
+#endif
 
 /**
  * @brief Macro generates the entry point for the library
  *
  * @param vector of extensions
  */
-#define OPENVINO_CREATE_EXTENSIONS(extensions)                             \
-    OPENVINO_EXTENSION_C_API                                               \
-    void ::ov::create_extensions(std::vector<::ov::Extension::Ptr>& ext) { \
-        ext = extensions;                                                  \
+#define OPENVINO_CREATE_EXTENSIONS(extensions)                                                \
+    OPENVINO_EXTENSION_C_API void OV_CREATE_EXTENSION(std::vector<ov::Extension::Ptr>& ext);  \
+    OPENVINO_EXTENSION_C_API void OV_CREATE_EXTENSION(std::vector<ov::Extension::Ptr>& ext) { \
+        ext = extensions;                                                                     \
     }
diff --git a/src/inference/dev_api/cpp_interfaces/interface/ie_iplugin_internal.hpp b/src/inference/dev_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
@@ -19,6 +19,7 @@
 #include "ie_iextension.h"
 #include "ie_input_info.hpp"
 #include "ie_parameter.hpp"
+#include "openvino/core/extension.hpp"
 #include "openvino/runtime/iplugin.hpp"
 #include "openvino/util/pp.hpp"
 #include "so_ptr.hpp"
@@ -377,16 +378,6 @@ class INFERENCE_ENGINE_1_0_DEPRECATED INFERENCE_ENGINE_API_CLASS(IInferencePlugi
     bool _isNewAPI;                                                    //!< A flag which shows used API
 };
 
-/**
- * @private
- */
-using CreatePluginEngineFunc = void(std::shared_ptr<::ov::IPlugin>&);
-
-/**
- * @private
- */
-using CreateExtensionFunc = void(std::shared_ptr<IExtension>&);
-
 /**
  * @def IE_CREATE_PLUGIN
  * @brief Defines a name of a function creating plugin instance
@@ -428,17 +419,3 @@ convert_plugin(const std::shared_ptr<InferenceEngine::IInferencePlugin>& from);
         ie_plugin->SetVersion(version);                                               \
         plugin = convert_plugin(ie_plugin);                                           \
     }
-
-/**
- * @private
- */
-#define IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(_IE_CREATE_PLUGIN_FUNC) \
-    INFERENCE_PLUGIN_API(void)                                               \
-    _IE_CREATE_PLUGIN_FUNC(::std::shared_ptr<::ov::IPlugin>& plugin) noexcept(false)
-
-/**
- * @private
- */
-#define IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(_IE_CREATE_EXTENSION_FUNC) \
-    INFERENCE_EXTENSION_API(void)                                                  \
-    _IE_CREATE_EXTENSION_FUNC(::InferenceEngine::IExtensionPtr& ext)
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
@@ -703,9 +703,9 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const {
 
         if (desc.extensionCreateFunc) {  // static OpenVINO case
             try {
-                InferenceEngine::IExtensionPtr ext;
+                std::vector<ov::Extension::Ptr> ext;
                 desc.extensionCreateFunc(ext);
-                AddExtensionUnsafe(ext);
+                add_extensions_unsafe(ext);
             } catch (const InferenceEngine::GeneralError&) {
                 // the same extension can be registered multiple times - ignore it!
             }

diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp
@@ -26,6 +26,9 @@
 
 namespace ov {
 
+using CreateExtensionFunc = void(std::vector<::ov::Extension::Ptr>&);
+using CreatePluginEngineFunc = void(std::shared_ptr<::ov::IPlugin>&);
+
 const std::string DEFAULT_DEVICE_NAME = "DEFAULT_DEVICE";
 
 struct Parsed {
@@ -123,8 +126,8 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
         ov::util::FilePath libraryLocation;
         ov::AnyMap defaultConfig;
         std::vector<ov::util::FilePath> listOfExtentions;
-        InferenceEngine::CreatePluginEngineFunc* pluginCreateFunc = nullptr;
-        InferenceEngine::CreateExtensionFunc* extensionCreateFunc = nullptr;
+        CreatePluginEngineFunc* pluginCreateFunc = nullptr;
+        CreateExtensionFunc* extensionCreateFunc = nullptr;
 
         PluginDescriptor() = default;
 
@@ -136,9 +139,9 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
             this->listOfExtentions = listOfExtentions;
         }
 
-        PluginDescriptor(InferenceEngine::CreatePluginEngineFunc* pluginCreateFunc,
+        PluginDescriptor(CreatePluginEngineFunc* pluginCreateFunc,
                          const ov::AnyMap& defaultConfig = {},
-                         InferenceEngine::CreateExtensionFunc* extensionCreateFunc = nullptr) {
+                         CreateExtensionFunc* extensionCreateFunc = nullptr) {
             this->pluginCreateFunc = pluginCreateFunc;
             this->defaultConfig = defaultConfig;
             this->extensionCreateFunc = extensionCreateFunc;

diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -40,13 +40,11 @@ struct ImmediateSerialExecutor : public ov::threading::ITaskExecutor {
 CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
                              const std::shared_ptr<const ov::IPlugin>& plugin,
                              const Config& cfg,
-                             const ExtensionManager::Ptr& extMgr,
                              const bool loaded_from_cache)
     : ov::ICompiledModel::ICompiledModel(model, plugin),
       m_model(model),
       m_plugin(plugin),
       m_cfg{cfg},
-      extensionManager(extMgr),
       m_name{model->get_name()},
       m_loaded_from_cache(loaded_from_cache) {
     bool isFloatModel = !ov::op::util::has_op_with_type<ov::op::v0::FakeQuantize>(m_model);
@@ -125,7 +123,7 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
                         (m_cfg.lpTransformsMode == Config::On) &&
                         ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model);
 
-                    ctx = std::make_shared<GraphContext>(m_cfg, extensionManager, weightsCache, isQuantizedFlag);
+                    ctx = std::make_shared<GraphContext>(m_cfg, weightsCache, isQuantizedFlag);
                 }
                 const std::shared_ptr<const ov::Model> model = m_model;
                 graphLock._graph.CreateGraph(model, ctx);
@@ -306,7 +304,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
 }
 
 void CompiledModel::export_model(std::ostream& modelStream) const {
-    ModelSerializer serializer(modelStream, extensionManager);
+    ModelSerializer serializer(modelStream);
     serializer << m_model;
 }