Merge pull request #56 from nosovmik/rebase8

Rebase8
mvafin · May 13, 2021 · 137aecf · 137aecf
2 parents e9b420a + 7c13281
commit 137aecf
Show file tree

Hide file tree

Showing 214 changed files with 4,195 additions and 8,558 deletions.
diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml
@@ -127,6 +127,7 @@ jobs:
     displayName: 'nGraph UT'
     continueOnError: false
 
+    # python3 $(WORK_DIR)/gtest-parallel/gtest_parallel.py $(BIN_DIR)/InferenceEngineUnitTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=InferenceEngineUnitTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
   - script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
     displayName: 'IE UT old'
     continueOnError: false
@@ -163,14 +164,6 @@ jobs:
     displayName: 'CPU FuncTests'
     continueOnError: false
 
-  - script: |
-      export DATA_PATH=$(MODELS_PATH)
-      export MODELS_PATH=$(MODELS_PATH)
-      python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
-    workingDirectory: $(WORK_DIR)
-    displayName: 'MklDnnFunctionalTests'
-    continueOnError: false
-
   - script: |
       export DATA_PATH=$(MODELS_PATH)
       export MODELS_PATH=$(MODELS_PATH)

diff --git a/.ci/azure/mac.yml b/.ci/azure/mac.yml
@@ -137,14 +137,6 @@ jobs:
     displayName: 'CPU FuncTests'
     continueOnError: false
 
-  - script: |
-      export DATA_PATH=$(MODELS_PATH)
-      export MODELS_PATH=$(MODELS_PATH)
-      python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke*:-smoke_MobileNet/ModelTransformationsTest.LPT/mobilenet_v2_tf_depthwise_batch1_inPluginDisabled_inTestDisabled_asymmetric* -- --gtest_print_time=1
-    workingDirectory: $(WORK_DIR)
-    displayName: 'MklDnnFunctionalTests'
-    continueOnError: false
-
   - script: |
       export DATA_PATH=$(MODELS_PATH)
       export MODELS_PATH=$(MODELS_PATH)

diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml
@@ -167,17 +167,6 @@ jobs:
     displayName: 'CPU FuncTests - IB'
     continueOnError: false
 
-    # Add for gtest-parallel, it hangs now (CVS-33386)
-    #python $(WORK_DIR)\gtest-parallel\gtest-parallel $(BIN_DIR)\MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
-  - script: |
-      set PATH=$(TEST_ENV_PATH)
-      set DATA_PATH=$(MODELS_PATH)
-      set MODELS_PATH=$(MODELS_PATH)
-      rem "$(IB_TESTCONSOLE)" $(BIN_DIR)\MklDnnFunctionalTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-MklDnnFunctionalTests-IB.xml
-      $(BIN_DIR)\MklDnnFunctionalTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-MklDnnFunctionalTests.xml
-    displayName: 'MklDnnFunctionalTests'
-    continueOnError: false
-
   - script: |
       set PATH=$(TEST_ENV_PATH)
       set DATA_PATH=$(MODELS_PATH)

diff --git a/docs/IE_DG/supported_plugins/CL_DNN.md b/docs/IE_DG/supported_plugins/CL_DNN.md
@@ -116,7 +116,7 @@ When specifying key values as raw strings (that is, when using Python API), omit
 | `KEY_CLDNN_SOURCES_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | Final optimized clDNN OpenCL sources dump output directory                                   |
 | `KEY_GPU_THROUGHPUT_STREAMS`  | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
 | `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO`                | `NO`              | Forces async requests (also from different executable networks) to execute serially.|
-
+| `KEY_CLDNN_MAX_NUM_THREADS` | `integer value` | `maximum # of HW threads available in host environment` |  Specifies the number of CPU threads that can be used for clDNN engine, e.g, JIT compilation of clDNN kernels or clDNN cpu kernel processing. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the clDNN kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while clDNN plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of clDNN networks that are optimized with multi-threading. |
 ## Note on Debug Capabilities of the GPU Plugin
 
 Inference Engine GPU plugin provides possibility to dump the user custom OpenCL&trade; kernels to a file to allow you to properly debug compilation issues in your custom kernels.

diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
@@ -0,0 +1,107 @@
+# Convert PyTorch\* RNN-T Model to the Intermediate Representation (IR) {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT}
+
+This instruction covers conversion of RNN-T model from [MLCommons](https://github.com/mlcommons) repository. Follow 
+the steps below to export a PyTorch* model into ONNX* before converting it to IR:
+
+**Step 1**. Clone RNN-T PyTorch implementation from MLCommons repository (revision r1.0). Make a shallow clone to pull 
+only RNN-T model without full repository. If you already have a full repository, skip this and go to **Step 2**:
+```bash
+git clone -b r1.0 -n https://github.com/mlcommons/inference rnnt_for_openvino --depth 1
+cd rnnt_for_openvino
+git checkout HEAD speech_recognition/rnnt 
+```
+
+**Step 2**. If you already have a full clone of MLCommons inference repository, create a folder for 
+pretrained PyTorch model, where conversion into IR will take place. You will also need to specify the path to 
+your full clone at **Step 5**. Skip this step if you have a shallow clone.
+
+```bash
+mkdir rnnt_for_openvino 
+cd rnnt_for_openvino
+```
+
+**Step 3**. Download pretrained weights for PyTorch implementation from https://zenodo.org/record/3662521#.YG21DugzZaQ.
+For UNIX*-like systems you can use wget:
+```bash
+wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt
+```
+The link was taken from `setup.sh` in the `speech_recoginitin/rnnt` subfolder. You will get exactly the same weights as 
+if you were following the steps from https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt.
+
+**Step 4**. Install required python* packages:
+```bash
+pip3 install torch toml
+```
+
+**Step 5**. Export RNN-T model into ONNX with the script below. Copy the code below into a file named 
+`export_rnnt_to_onnx.py` and run it in the current directory `rnnt_for_openvino`:
+
+> **NOTE**: If you already have a full clone of MLCommons inference repository, you need to
+> specify `mlcommons_inference_path` variable.
+
+```python
+import toml
+import torch
+import sys
+
+
+def load_and_migrate_checkpoint(ckpt_path):
+    checkpoint = torch.load(ckpt_path, map_location="cpu")
+    migrated_state_dict = {}
+    for key, value in checkpoint['state_dict'].items():
+        key = key.replace("joint_net", "joint.net")
+        migrated_state_dict[key] = value
+    del migrated_state_dict["audio_preprocessor.featurizer.fb"]
+    del migrated_state_dict["audio_preprocessor.featurizer.window"]
+    return migrated_state_dict
+
+
+mlcommons_inference_path = './'  # specify relative path for MLCommons inferene
+checkpoint_path = 'DistributedDataParallel_1576581068.9962234-epoch-100.pt'
+config_toml = 'speech_recognition/rnnt/pytorch/configs/rnnt.toml'
+config = toml.load(config_toml)
+rnnt_vocab = config['labels']['labels']
+sys.path.insert(0, mlcommons_inference_path + 'speech_recognition/rnnt/pytorch')
+
+from model_separable_rnnt import RNNT
+
+model = RNNT(config['rnnt'], len(rnnt_vocab) + 1, feature_config=config['input_eval'])
+model.load_state_dict(load_and_migrate_checkpoint(checkpoint_path))
+
+seq_length, batch_size, feature_length = 157, 1, 240
+inp = torch.randn([seq_length, batch_size, feature_length])
+feature_length = torch.LongTensor([seq_length])
+x_padded, x_lens = model.encoder(inp, feature_length)
+torch.onnx.export(model.encoder, (inp, feature_length), "rnnt_encoder.onnx", opset_version=12,
+                  input_names=['input.1', '1'], dynamic_axes={'input.1': {0: 'seq_len', 1: 'batch'}})
+
+symbol = torch.LongTensor([[20]])
+hidden = torch.randn([2, batch_size, 320]), torch.randn([2, batch_size, 320])
+g, hidden = model.prediction.forward(symbol, hidden)
+torch.onnx.export(model.prediction, (symbol, hidden), "rnnt_prediction.onnx", opset_version=12,
+                  input_names=['input.1', '1', '2'],
+                  dynamic_axes={'input.1': {0: 'batch'}, '1': {1: 'batch'}, '2': {1: 'batch'}})
+
+f = torch.randn([batch_size, 1, 1024])
+model.joint.forward(f, g)
+torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12,
+                  input_names=['0', '1'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}})
+```
+
+```bash
+python3 export_rnnt_to_onnx.py
+```
+
+After completing this step, the files rnnt_encoder.onnx, rnnt_prediction.onnx, and rnnt_joint.onnx will be saved in 
+the current directory. 
+
+**Step 6**. Run the conversion command:
+
+```bash
+python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input.1[157 1 240],1->157"
+python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "input.1[1 1],1[2 1 320],2[2 1 320]"
+python3 {path_to_openvino}/mo.py --input_model rnnt_joint.onnx --input "0[1 1 1024],1[1 1 320]"
+```
+Please note that hardcoded value for sequence length = 157 was taken from the MLCommons, but conversion to IR preserves 
+network [reshapeability](../../../../IE_DG/ShapeInference.md); this means you can change input shapes manually to any value either during conversion or 
+inference. 
diff --git a/docs/doxygen/ie_docs.xml b/docs/doxygen/ie_docs.xml
@@ -56,6 +56,7 @@ limitations under the License.
                             <tab type="user" title="Convert DLRM ONNX* Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_DLRM"/>
                             <tab type="usergroup" title="Converting Your PyTorch* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch">
                                 <tab type="user" title="Convert PyTorch* QuartzNet Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet"/>
+                                <tab type="user" title="Convert PyTorch* RNN-T Model " url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT"/>
                                 <tab type="user" title="Convert PyTorch* YOLACT Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT"/>
                                 <tab type="user" title="Convert PyTorch* F3Net Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net"/>
                             </tab>

diff --git a/docs/ops/movement/VariadicSplit_1.md b/docs/ops/movement/VariadicSplit_1.md
@@ -26,7 +26,7 @@ Where D is the rank of input tensor `data`. The sum of elements in `split_length
 * **2**: `axis`. Axis along `data` to split. A scalar of type `T2` with value from range `-rank(data) .. rank(data)-1`. Negative values address dimensions from the end. 
 **Required.**
 
-* **3**: `split_lengths`. A list containing the dimension values of each output tensor shape along the split `axis`. A 1D tensor of type `T2`. The number of elements in `split_lengths` determines the number of outputs. The sum of elements in `split_lengths` must match `data.shape[axis]`. In addition `split_lenghts` can contain a single `-1` element, which means, all remaining items along specified `axis` that are not consumed by other parts. **Required.**
+* **3**: `split_lengths`. A list containing the dimension values of each output tensor shape along the split `axis`. A 1D tensor of type `T2`. The number of elements in `split_lengths` determines the number of outputs. The sum of elements in `split_lengths` must match `data.shape[axis]`. In addition `split_lengths` can contain a single `-1` element, which means, all remaining items along specified `axis` that are not consumed by other parts. **Required.**
 
 **Outputs**
 

diff --git a/...ne/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx b/...ne/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx
@@ -6,6 +6,7 @@ from ..inference_engine.ie_api cimport IENetwork
 
 from libcpp cimport bool
 from libcpp.string cimport string
+from libc.stdint cimport int64_t
 
 
 def ApplyMOCTransformations(IENetwork network, bool cf):
@@ -16,8 +17,8 @@ def ApplyPOTTransformations(IENetwork network, string device):
     C.ApplyPOTTransformations(network.impl, device)
 
 
-def ApplyLowLatencyTransformation(IENetwork network):
-    C.ApplyLowLatencyTransformation(network.impl)
+def ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations=1):
+    C.ApplyLowLatencyTransformation(network.impl, num_iterations)
 
 
 def ApplyPruningTransformation(IENetwork network):

diff --git a/..._bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp b/..._bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp
@@ -26,8 +26,9 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet
     manager.run_passes(network.actual->getFunction());
 }
 
-void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network) {
+void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations) {
     ngraph::pass::Manager manager;
+    // TODO: pass num_iterations to LowLatency
     manager.register_pass<ngraph::pass::LowLatency>();
     manager.register_pass<ngraph::pass::UnrollTensorIterator>();
 

diff --git a/..._bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp b/..._bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp
@@ -15,7 +15,7 @@ void ApplyMOCTransformations(InferenceEnginePython::IENetwork network, bool cf);
 
 void ApplyPOTTransformations(InferenceEnginePython::IENetwork network, std::string device);
 
-void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network);
+void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations);
 
 void ApplyPruningTransformation(InferenceEnginePython::IENetwork network);
 

diff --git a/...ges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd b/...ges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd
@@ -3,6 +3,7 @@
 
 from libcpp cimport bool
 from libcpp.string cimport string
+from libc.stdint cimport int64_t
 
 from ..inference_engine.ie_api_impl_defs cimport IENetwork
 
@@ -11,7 +12,7 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi
 
     cdef void ApplyPOTTransformations(IENetwork network, string device)
 
-    cdef void ApplyLowLatencyTransformation(IENetwork network)
+    cdef void ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations)
 
     cdef void ApplyPruningTransformation(IENetwork network)
 

diff --git a/inference-engine/include/cpp/ie_executable_network.hpp b/inference-engine/include/cpp/ie_executable_network.hpp
@@ -35,8 +35,8 @@ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
     std::shared_ptr<IExecutableNetworkInternal> _impl;
     std::shared_ptr<details::SharedObjectLoader> _so;
 
-    explicit ExecutableNetwork(const std::shared_ptr<IExecutableNetworkInternal>&   impl,
-                               const std::shared_ptr<details::SharedObjectLoader>&  so);
+    ExecutableNetwork(const std::shared_ptr<IExecutableNetworkInternal>&   impl,
+                      const std::shared_ptr<details::SharedObjectLoader>&  so);
 
     friend class InferencePlugin;
 

diff --git a/inference-engine/include/cpp/ie_infer_request.hpp b/inference-engine/include/cpp/ie_infer_request.hpp
@@ -36,8 +36,8 @@ class INFERENCE_ENGINE_API_CLASS(InferRequest) {
     std::shared_ptr<IInferRequestInternal>          _impl;
     std::shared_ptr<details::SharedObjectLoader>    _so;
 
-    explicit InferRequest(const std::shared_ptr<IInferRequestInternal>&         impl,
-                          const std::shared_ptr<details::SharedObjectLoader>&   so);
+    InferRequest(const std::shared_ptr<IInferRequestInternal>&         impl,
+                 const std::shared_ptr<details::SharedObjectLoader>&   so);
 
     friend class ExecutableNetwork;
 

diff --git a/inference-engine/include/cpp/ie_memory_state.hpp b/inference-engine/include/cpp/ie_memory_state.hpp
@@ -11,9 +11,10 @@
 #pragma once
 
 #include <string>
+#include <memory>
 
+#include "ie_api.h"
 #include "ie_blob.h"
-#include "details/ie_so_loader.h"
 
 namespace InferenceEngine {
 
@@ -28,15 +29,15 @@ class IVariableStateInternal;
  */
 class INFERENCE_ENGINE_API_CLASS(VariableState) {
     std::shared_ptr<IVariableStateInternal> _impl = nullptr;
-    details::SharedObjectLoader::Ptr _so = nullptr;
+    std::shared_ptr<details::SharedObjectLoader> _so = nullptr;
 
     /**
      * @brief Constructs VariableState from the initialized std::shared_ptr
      * @param impl Initialized shared pointer
      * @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed.
      */
-    explicit VariableState(const std::shared_ptr<IVariableStateInternal>& impl,
-                           const details::SharedObjectLoader::Ptr& so = {});
+    VariableState(const std::shared_ptr<IVariableStateInternal>& impl,
+                  const std::shared_ptr<details::SharedObjectLoader>& so);
 
     friend class InferRequest;
     friend class ExecutableNetwork;

diff --git a/inference-engine/src/cldnn_engine/CMakeLists.txt b/inference-engine/src/cldnn_engine/CMakeLists.txt
@@ -40,8 +40,6 @@ target_include_directories(${TARGET_NAME} PRIVATE
 
 set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
 
-
-set_ie_threading_interface_for(clDNN_lib)
 # Failed because of OpenCL
 # ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
 

diff --git a/inference-engine/src/cldnn_engine/ops/matmul.cpp b/inference-engine/src/cldnn_engine/ops/matmul.cpp
@@ -60,8 +60,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
     auto shape_a = op->get_input_shape(0);
     auto shape_b = op->get_input_shape(1);
 
-    bool is_fc = ngraph::is_type<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1)) ||
-                 ngraph::is_type<ngraph::op::v0::FakeQuantize>(op->get_input_node_shared_ptr(1));
+    bool is_fc = IsNodeOnConstPath(op->get_input_node_shared_ptr(1));
     is_fc &= std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) { return x != 1; }) <= 2;
 
     if (is_fc) {

diff --git a/inference-engine/src/gna_plugin/gna_plugin.hpp b/inference-engine/src/gna_plugin/gna_plugin.hpp
@@ -110,6 +110,8 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
     InferenceEngine::IExecutableNetworkInternal::Ptr LoadNetwork(const InferenceEngine::CNNNetwork &network,
                                   const std::map<std::string, std::string> &config_map,
                                   InferenceEngine::RemoteContext::Ptr context) override { THROW_GNA_EXCEPTION << "Not implemented"; }
+    InferenceEngine::ExecutableNetwork LoadNetwork(const std::string &modelPath,
+                                  const std::map<std::string, std::string> &config_map) override { THROW_GNA_EXCEPTION << "Not implemented"; }
     bool Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &result);
     void SetCore(InferenceEngine::ICore*) noexcept override {}
     InferenceEngine::ICore* GetCore() const noexcept override {return nullptr;}