Merge commit 'b3c2c386dc527f5b877680fbe1215b6a3e05968f' into scripts/…

…build-samples-for-pwsh
akashchi · Jan 16, 2024 · 8ee0ebd · 8ee0ebd
2 parents b2f091e + b3c2c38
commit 8ee0ebd
Show file tree

Hide file tree

Showing 34 changed files with 1,524 additions and 153 deletions.
diff --git a/cmake/developer_package/ncc_naming_style/openvino.style b/cmake/developer_package/ncc_naming_style/openvino.style
@@ -1,6 +1,6 @@
 # custom OpenVINO values
 CppMethod: '^(operator\W+|[a-z_\d]+|signaling_NaN|quiet_NaN|OPENVINO_OP)$'
-ClassName: '^([A-Z][\w]+|b?float16|numeric_limits|ngraph_error|stopwatch|unsupported_op)$'
+ClassName: '^([A-Z][\w]+|b?float16|float8_e4m3|float8_e5m2|numeric_limits|ngraph_error|stopwatch|unsupported_op)$'
 StructName: '^([A-Z][\w]+|element_type_traits|hash|oi_pair|stat)$'
 FunctionName: '^(operator\W+|[a-z_\d]+)|PrintTo$'
 Namespace: '^([a-z\d_]*|InferenceEngine)$'
@@ -18,7 +18,7 @@ VariableReference: '^\w+$'
 
 EnumName: '^[A-Z][\w]+$'
 # excepts element_type
-EnumConstantName: '^([A-Z\d_]+|undefined|dynamic|boolean|bf16|f16|f32|f64|i4|i8|i16|i32|i64|u1|u4|u8|u16|u32|u64|nf4|string|asymmetric|align_corners|round_prefer_floor|round_prefer_ceil|floor|ceil|simple|nearest|linear|linear_onnx|cubic|area|scales|sizes|half_pixel|tf_half_pixel_for_nn|pytorch_half_pixel|asymetric)$'
+EnumConstantName: '^([A-Z\d_]+|undefined|dynamic|boolean|bf16|f16|f32|f64|i4|i8|i16|i32|i64|u1|u4|u8|u16|u32|u64|nf4|f8e4m3|f8e5m2|string|asymmetric|align_corners|round_prefer_floor|round_prefer_ceil|floor|ceil|simple|nearest|linear|linear_onnx|cubic|area|scales|sizes|half_pixel|tf_half_pixel_for_nn|pytorch_half_pixel|asymetric)$'
 # TODO: align
 UsingDeclaration: '^.*$'
 TypedefName: '^.*$'

diff --git a/docs/articles_en/openvino_workflow/torch_compile.rst b/docs/articles_en/openvino_workflow/torch_compile.rst
@@ -5,7 +5,7 @@ PyTorch Deployment via "torch.compile"
 
 
 
-The ``torch.compile`` feature enables you to use OpenVINO for PyTorch-native applications. 
+The ``torch.compile`` feature enables you to use OpenVINO for PyTorch-native applications.
 It speeds up PyTorch code by JIT-compiling it into optimized kernels.
 By default, Torch code runs in eager-mode, but with the use of ``torch.compile`` it goes through the following steps:
 
@@ -20,7 +20,7 @@ By default, Torch code runs in eager-mode, but with the use of ``torch.compile``
 
 
 How to Use
-#################
+####################
 
 To use ``torch.compile``, you need to add an import statement and define one of the two available backends:
 
@@ -38,7 +38,7 @@ To use ``torch.compile``, you need to add an import statement and define one of
 
       .. code-block:: console
 
-         import openvino.torch 
+         import openvino.torch
          ...
          model = torch.compile(model, backend='openvino')
 
@@ -68,20 +68,60 @@ To use ``torch.compile``, you need to add an import statement and define one of
          :align: center
 
 
-Environment Variables
-+++++++++++++++++++++++++++
 
-* **OPENVINO_TORCH_BACKEND_DEVICE**: enables selecting a specific hardware device to run the application. 
-  By default, the OpenVINO backend for ``torch.compile`` runs PyTorch applications using the CPU. Setting 
-  this variable to GPU.0, for example, will make the application use the integrated graphics processor instead.
-* **OPENVINO_TORCH_MODEL_CACHING**: enables saving the optimized model files to a hard drive, after the first application run.
-  This makes them available for the following application executions, reducing the first-inference latency.
-  By default, this variable is set to ``False``. Setting it to ``True`` enables caching.
-* **OPENVINO_TORCH_CACHE_DIR**: enables defining a custom directory for the model files (if model caching set to ``True``).
-  By default, the OpenVINO IR is saved in the ``cache`` sub-directory, created in the application's root directory. 
+Options
+++++++++++++++++++++
+
+It is possible to use additional arguments for ``torch.compile`` to set the backend device,
+enable model caching, set the cache directory etc. You can use a dictionary of the available options:
+
+* ``device`` - enables selecting a specific hardware device to run the application.
+  By default, the OpenVINO backend for ``torch.compile`` runs PyTorch applications
+  on CPU. If you set this variable to ``GPU.0``, for example, the application will
+  use the integrated graphics processor instead.
+* ``model_caching`` - enables saving the optimized model files to a hard drive,
+  after the first application run. This makes them available for the following
+  application executions, reducing the first-inference latency. By default, this
+  variable is set to ``False``. Set it to ``True`` to enable caching.
+* ``cache_dir`` - enables defining a custom directory for the model files (if
+  ``model_caching`` is set to ``True``). By default, the OpenVINO IR is saved
+  in the cache sub-directory, created in the application's root directory.
+* ``config`` - enables passing any OpenVINO configuration option as a dictionary
+  to this variable. For details on the various options, refer to the
+  :ref:`OpenVINO Advanced Features <openvino-advanced-features>`.
+
+See the example below for details:
+
+.. code-block:: python
+
+   model = torch.compile(model, backend="openvino", options = {"device" : "CPU", "model_caching" : True, "cache_dir": "./model_cache"})
+
+You can also set OpenVINO specific configuration options by adding them as a dictionary under ``config`` key in ``options``:
+
+.. code-block:: python
+
+   opts = {"device" : "CPU", "config" : {"PERFORMANCE_HINT" : "LATENCY"}}
+   model = torch.compile(model, backend="openvino", options=opts)
+
+
+.. important::
+
+   The environment variables used in the previous release are still available but are not
+   recommended. They will be removed fully in future releases.
+
+   .. dropdown:: Click to view the deprecated options.
+
+      * ``OPENVINO_TORCH_BACKEND_DEVICE`` - enables selecting a specific hardware device to run the application.
+        By default, the OpenVINO backend for ``torch.compile`` runs PyTorch applications using the CPU. Setting
+        this variable to ``GPU.0``, for example, will make the application use the integrated graphics processor instead.
+      * ``OPENVINO_TORCH_MODEL_CACHING``- enables saving the optimized model files to a hard drive, after the first application run.
+        This makes them available for the following application executions, reducing the first-inference latency.
+        By default, this variable is set to ``False``. Setting it to ``True`` enables caching.
+      * ``OPENVINO_TORCH_CACHE_DIR``- enables defining a custom directory for the model files (if ``model_caching`` is set to ``True``).
+        By default, the OpenVINO IR is saved in the ``cache`` sub-directory, created in the application's root directory.
 
 Windows support
-++++++++++++++++++++++++++
++++++++++++++++++++++
 
 Currently, PyTorch does not support ``torch.compile`` feature on Windows officially. However, it can be accessed by running
 the below instructions:
@@ -112,10 +152,10 @@ the below instructions:
 Support for Automatic1111 Stable Diffusion WebUI
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-Automatic1111 Stable Diffusion WebUI is an open-source repository that hosts a browser-based interface for the Stable Diffusion 
-based image generation. It allows users to create realistic and creative images from text prompts. 
-Stable Diffusion WebUI is supported on Intel CPUs, Intel integrated GPUs, and Intel discrete GPUs by leveraging OpenVINO 
-``torch.compile`` capability. Detailed instructions are available in 
+Automatic1111 Stable Diffusion WebUI is an open-source repository that hosts a browser-based interface for the Stable Diffusion
+based image generation. It allows users to create realistic and creative images from text prompts.
+Stable Diffusion WebUI is supported on Intel CPUs, Intel integrated GPUs, and Intel discrete GPUs by leveraging OpenVINO
+``torch.compile`` capability. Detailed instructions are available in
 `Stable Diffusion WebUI repository. <https://github.com/openvinotoolkit/stable-diffusion-webui/wiki/Installation-on-Intel-Silicon>`__
 
 
@@ -125,10 +165,10 @@ Architecture
 The ``torch.compile`` feature is part of PyTorch 2.0, and is based on:
 
 * **TorchDynamo** - a Python-level JIT that hooks into the frame evaluation API in CPython,
-  (PEP 523) to dynamically modify Python bytecode right before it is executed (PyTorch operators 
-  that cannot be extracted to FX graph are executed in the native Python environment). 
-  It maintains the eager-mode capabilities using 
-  `Guards <https://pytorch.org/docs/stable/dynamo/guards-overview.html>`__ to ensure the 
+  (PEP 523) to dynamically modify Python bytecode right before it is executed (PyTorch operators
+  that cannot be extracted to FX graph are executed in the native Python environment).
+  It maintains the eager-mode capabilities using
+  `Guards <https://pytorch.org/docs/stable/dynamo/guards-overview.html>`__ to ensure the
   generated graphs are valid.
 
 * **AOTAutograd** - generates the backward graph corresponding to the forward graph captured by TorchDynamo.
@@ -138,15 +178,15 @@ The ``torch.compile`` feature is part of PyTorch 2.0, and is based on:
 
 
 
-When the PyTorch module is wrapped with ``torch.compile``, TorchDynamo traces the module and 
+When the PyTorch module is wrapped with ``torch.compile``, TorchDynamo traces the module and
 rewrites Python bytecode to extract sequences of PyTorch operations into an FX Graph,
-which can be optimized by the OpenVINO backend. The Torch FX graphs are first converted to 
-inlined FX graphs and the graph partitioning module traverses inlined FX graph to identify 
-operators supported by OpenVINO. 
+which can be optimized by the OpenVINO backend. The Torch FX graphs are first converted to
+inlined FX graphs and the graph partitioning module traverses inlined FX graph to identify
+operators supported by OpenVINO.
 
-All the supported operators are clustered into OpenVINO submodules, converted to the OpenVINO 
-graph using OpenVINO's PyTorch decoder, and executed in an optimized manner using OpenVINO runtime. 
-All unsupported operators fall back to the native PyTorch runtime on CPU. If the subgraph 
+All the supported operators are clustered into OpenVINO submodules, converted to the OpenVINO
+graph using OpenVINO's PyTorch decoder, and executed in an optimized manner using OpenVINO runtime.
+All unsupported operators fall back to the native PyTorch runtime on CPU. If the subgraph
 fails during OpenVINO conversion, the subgraph falls back to PyTorch's default inductor backend.
 
 

diff --git a/samples/python/benchmark/bert_benchmark/bert_benchmark.py b/samples/python/benchmark/bert_benchmark/bert_benchmark.py
@@ -57,7 +57,7 @@ def main():
     sst2_sentences = sst2['validation']['sentence']
     # Warm up
     encoded_warm_up = dict(tokenizer('Warm up sentence is here.', return_tensors='np'))
-    for _ in ireqs:
+    for _ in range(len(ireqs)):
         ireqs.start_async(encoded_warm_up)
     ireqs.wait_all()
     # Benchmark

diff --git a/samples/python/benchmark/throughput_benchmark/throughput_benchmark.py b/samples/python/benchmark/throughput_benchmark/throughput_benchmark.py
@@ -52,7 +52,7 @@ def main():
         for model_input in compiled_model.inputs:
             fill_tensor_random(ireq.get_tensor(model_input))
     # Warm up
-    for _ in ireqs:
+    for _ in range(len(ireqs)):
         ireqs.start_async()
     ireqs.wait_all()
     # Benchmark for seconds_to_run seconds and at least niter iterations

diff --git a/src/bindings/c/include/openvino/c/ov_common.h b/src/bindings/c/include/openvino/c/ov_common.h
@@ -187,6 +187,8 @@ typedef enum {
     U32,             //!< u32 element type
     U64,             //!< u64 element type
     NF4,             //!< nf4 element type
+    F8E4M3,          //!< f8e4m3 element type
+    F8E5M3,          //!< f8e5m2 element type
 } ov_element_type_e;
 
 /**
@@ -210,4 +212,4 @@ ov_free(const char* content);
  * @ingroup ov_base_c_api
  */
 OPENVINO_C_API(const char*)
-ov_get_last_err_msg();
+ov_get_last_err_msg();
diff --git a/src/bindings/c/src/ov_tensor.cpp b/src/bindings/c/src/ov_tensor.cpp
@@ -24,7 +24,9 @@ const std::map<ov_element_type_e, ov::element::Type> element_type_map = {
     {ov_element_type_e::U16, ov::element::u16},
     {ov_element_type_e::U32, ov::element::u32},
     {ov_element_type_e::U64, ov::element::u64},
-    {ov_element_type_e::NF4, ov::element::nf4}};
+    {ov_element_type_e::NF4, ov::element::nf4},
+    {ov_element_type_e::F8E4M3, ov::element::f8e4m3},
+    {ov_element_type_e::F8E5M3, ov::element::f8e5m2}};
 
 inline ov_element_type_e find_ov_element_type_e(ov::element::Type type) {
     for (auto iter = element_type_map.begin(); iter != element_type_map.end(); iter++) {

diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp
@@ -19,24 +19,16 @@ namespace type_helpers {
 
 const std::map<ov::element::Type, py::dtype>& ov_type_to_dtype() {
     static const std::map<ov::element::Type, py::dtype> ov_type_to_dtype_mapping = {
-        {ov::element::f16, py::dtype("float16")},
-        {ov::element::bf16, py::dtype("float16")},
-        {ov::element::f32, py::dtype("float32")},
-        {ov::element::f64, py::dtype("float64")},
-        {ov::element::i8, py::dtype("int8")},
-        {ov::element::i16, py::dtype("int16")},
-        {ov::element::i32, py::dtype("int32")},
-        {ov::element::i64, py::dtype("int64")},
-        {ov::element::u8, py::dtype("uint8")},
-        {ov::element::u16, py::dtype("uint16")},
-        {ov::element::u32, py::dtype("uint32")},
-        {ov::element::u64, py::dtype("uint64")},
-        {ov::element::boolean, py::dtype("bool")},
-        {ov::element::u1, py::dtype("uint8")},
-        {ov::element::u4, py::dtype("uint8")},
-        {ov::element::nf4, py::dtype("uint8")},
-        {ov::element::i4, py::dtype("int8")},
-        {ov::element::string, py::dtype("bytes_")},
+        {ov::element::f16, py::dtype("float16")},  {ov::element::bf16, py::dtype("float16")},
+        {ov::element::f32, py::dtype("float32")},  {ov::element::f64, py::dtype("float64")},
+        {ov::element::i8, py::dtype("int8")},      {ov::element::i16, py::dtype("int16")},
+        {ov::element::i32, py::dtype("int32")},    {ov::element::i64, py::dtype("int64")},
+        {ov::element::u8, py::dtype("uint8")},     {ov::element::u16, py::dtype("uint16")},
+        {ov::element::u32, py::dtype("uint32")},   {ov::element::u64, py::dtype("uint64")},
+        {ov::element::boolean, py::dtype("bool")}, {ov::element::u1, py::dtype("uint8")},
+        {ov::element::u4, py::dtype("uint8")},     {ov::element::nf4, py::dtype("uint8")},
+        {ov::element::i4, py::dtype("int8")},      {ov::element::f8e4m3, py::dtype("uint8")},
+        {ov::element::f8e5m2, py::dtype("uint8")}, {ov::element::string, py::dtype("bytes_")},
     };
     return ov_type_to_dtype_mapping;
 }

diff --git a/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp b/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp
@@ -50,6 +50,8 @@ void regclass_graph_Type(py::module m) {
     type.attr("u64") = ov::element::u64;
     type.attr("bf16") = ov::element::bf16;
     type.attr("nf4") = ov::element::nf4;
+    type.attr("f8e4m3") = ov::element::f8e4m3;
+    type.attr("f8e5m2") = ov::element::f8e5m2;
     type.attr("string") = ov::element::string;
 
     type.def("__hash__", &ov::element::Type::hash);

diff --git a/src/bindings/python/tests/test_graph/test_constant.py b/src/bindings/python/tests/test_graph/test_constant.py
@@ -411,3 +411,125 @@ def test_memory_sharing(shared_flag):
     else:
         assert not np.array_equal(ov_const.data, arr)
         assert not np.shares_memory(arr, ov_const.data)
+
+
+@pytest.mark.parametrize(("ov_type", "numpy_dtype"), [
+    (Type.f32, np.float32),
+    (Type.f16, np.float16),
+])
+def test_float_to_f8e5m2_constant(ov_type, numpy_dtype):
+    from openvino.runtime import opset12 as opset
+    import openvino as ov
+    data = np.array([4.75, 4.5, -5.25, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5,
+                     0.6, 0.7, 0.8, 0.9, 1, -0.0, -0.1, -0.2, -0.3,
+                     -0.4, -0.5, -0.6, -0.7, -0.8, -0.9, -1.0, 0.0000152587890625, 448, 500, 512, 57344], dtype=numpy_dtype)
+
+    compressed_const = opset.constant(data, dtype=ov.Type.f8e5m2, name="f8e5m2_constant")
+    convert = opset.convert(compressed_const, data.dtype)
+    parameter = opset.parameter(ov.PartialShape([-1]), ov_type)
+    add_op = opset.add(parameter, convert)
+    model = ov.Model([add_op], [parameter])
+
+    compiled = ov.compile_model(model)
+    tensor = np.zeros(data.shape, dtype=numpy_dtype)
+    result = compiled(tensor)[0]
+
+    target = [5.0, 4.0, -5.0, 0.0, 0.09375, 0.1875, 0.3125, 0.375, 0.5, 0.625, 0.75,
+              0.75, 0.875, 1.0, -0.0, -0.09375, -0.1875, -0.3125, -0.375,
+              -0.5, -0.625, -0.75, -0.75, -0.875, -1.0, 0.0000152587890625,
+              448, 512, 512, 57344]
+    target = np.array(target, dtype=numpy_dtype)
+
+    assert np.allclose(result, target)
+
+
+@pytest.mark.parametrize(("ov_type", "numpy_dtype"), [
+    (Type.f32, np.float32),
+    (Type.f16, np.float16),
+])
+def test_float_to_f8e4m3_constant(ov_type, numpy_dtype):
+    from openvino.runtime import opset12 as opset
+    import openvino as ov
+    data = np.array([4.75, 4.5, -5.25, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5,
+                     0.6, 0.7, 0.8, 0.9, 1, -0.0, -0.1, -0.2, -0.3,
+                     -0.4, -0.5, -0.6, -0.7, -0.8, -0.9, -1, 448, 512], dtype=numpy_dtype)
+
+    compressed_const = opset.constant(data, dtype=ov.Type.f8e4m3, name="f8e4m3_constant")
+    convert = opset.convert(compressed_const, data.dtype)
+    parameter = opset.parameter(ov.PartialShape([-1]), ov_type)
+    add_op = opset.add(parameter, convert)
+    model = ov.Model([add_op], [parameter])
+
+    compiled = ov.compile_model(model)
+    tensor = np.zeros(data.shape, dtype=numpy_dtype)
+    result = compiled(tensor)[0]
+
+    target = [5.0, 4.5, -5.0, 0.0, 0.1015625, 0.203125, 0.3125,
+              0.40625, 0.5, 0.625, 0.6875, 0.8125, 0.875, 1,
+              -0, -0.1015625, -0.203125, -0.3125, -0.40625, -0.5, -0.625,
+              -0.6875, -0.8125, -0.875, -1, 448, np.nan]
+    target = np.array(target, dtype=numpy_dtype)
+
+    assert np.allclose(result, target, equal_nan=True)
+
+
+@pytest.mark.parametrize(("ov_type", "numpy_dtype"), [
+    (Type.f32, np.float32),
+    (Type.f16, np.float16),
+])
+def test_float_to_f8e5m2_convert(ov_type, numpy_dtype):
+    from openvino.runtime import opset12 as opset
+    import openvino as ov
+    data = np.array([4.75, 4.5, -5.25, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5,
+                     0.6, 0.7, 0.8, 0.9, 1, -0.0, -0.1, -0.2, -0.3,
+                     -0.4, -0.5, -0.6, -0.7, -0.8, -0.9, -1.0, 0.0000152587890625, 448, 500, 512, 57344], dtype=numpy_dtype)
+
+    compressed_const = opset.constant(data, dtype=ov_type, name="fx_constant")
+    convert_to_fp8 = opset.convert(compressed_const, Type.f8e5m2)
+    convert_back = opset.convert(convert_to_fp8, ov_type)
+    parameter = opset.parameter(ov.PartialShape([-1]), ov_type)
+    add_op = opset.add(parameter, convert_back)
+    model = ov.Model([add_op], [parameter])
+
+    compiled = ov.compile_model(model)
+    tensor = np.zeros(data.shape, dtype=numpy_dtype)
+    result = compiled(tensor)[0]
+
+    target = [5.0, 4.0, -5.0, 0.0, 0.09375, 0.1875, 0.3125, 0.375, 0.5, 0.625, 0.75,
+              0.75, 0.875, 1.0, -0.0, -0.09375, -0.1875, -0.3125, -0.375,
+              -0.5, -0.625, -0.75, -0.75, -0.875, -1.0, 0.0000152587890625,
+              448, 512, 512, 57344]
+    target = np.array(target, dtype=numpy_dtype)
+
+    assert np.allclose(result, target)
+
+
+@pytest.mark.parametrize(("ov_type", "numpy_dtype"), [
+    (Type.f32, np.float32),
+    (Type.f16, np.float16),
+])
+def test_float_to_f8e4m3_convert(ov_type, numpy_dtype):
+    from openvino.runtime import opset12 as opset
+    import openvino as ov
+    data = np.array([4.75, 4.5, -5.25, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5,
+                     0.6, 0.7, 0.8, 0.9, 1, -0.0, -0.1, -0.2, -0.3,
+                     -0.4, -0.5, -0.6, -0.7, -0.8, -0.9, -1, 448, 512], dtype=numpy_dtype)
+
+    compressed_const = opset.constant(data, dtype=ov_type, name="fx_constant")
+    convert_to_fp8 = opset.convert(compressed_const, Type.f8e4m3)
+    convert_back = opset.convert(convert_to_fp8, ov_type)
+    parameter = opset.parameter(ov.PartialShape([-1]), ov_type)
+    add_op = opset.add(parameter, convert_back)
+    model = ov.Model([add_op], [parameter])
+
+    compiled = ov.compile_model(model)
+    tensor = np.zeros(data.shape, dtype=numpy_dtype)
+    result = compiled(tensor)[0]
+
+    target = [5.0, 4.5, -5.0, 0.0, 0.1015625, 0.203125, 0.3125,
+              0.40625, 0.5, 0.625, 0.6875, 0.8125, 0.875, 1,
+              -0, -0.1015625, -0.203125, -0.3125, -0.40625, -0.5, -0.625,
+              -0.6875, -0.8125, -0.875, -1, 448, np.nan]
+    target = np.array(target, dtype=numpy_dtype)
+
+    assert np.allclose(result, target, equal_nan=True)
diff --git a/src/core/include/ngraph/type/element_type.hpp b/src/core/include/ngraph/type/element_type.hpp
@@ -35,6 +35,8 @@ using ov::element::dynamic;
 using ov::element::f16;
 using ov::element::f32;
 using ov::element::f64;
+using ov::element::f8e4m3;
+using ov::element::f8e5m2;
 using ov::element::i16;
 using ov::element::i32;
 using ov::element::i4;