From 64e3604bc48f538433d5cd0482f1fec31e562b8d Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 23 Jul 2020 14:17:15 +0300 Subject: [PATCH] Updated documentation for 2020.4 (#1434) * Updated documentation for 2020.4 * Updated Core::ReadNetwork documentation (#1178) * Fixed docs Co-authored-by: Ilya Churaev --- docs/IE_DG/API_Changes.md | 71 ++++++++++++++++++- docs/IE_DG/OnnxImporterTutorial.md | 2 +- docs/IE_DG/inference_engine_intro.md | 17 +++-- docs/IE_DG/supported_plugins/GNA.md | 2 +- docs/IE_DG/supported_plugins/MULTI.md | 11 --- .../dldt_optimization_guide.md | 17 ++--- inference-engine/include/ie_core.hpp | 28 +++++--- 7 files changed, 107 insertions(+), 41 deletions(-) diff --git a/docs/IE_DG/API_Changes.md b/docs/IE_DG/API_Changes.md index f3a7c45417dde4..5a82cfd19ba7d7 100644 --- a/docs/IE_DG/API_Changes.md +++ b/docs/IE_DG/API_Changes.md @@ -19,7 +19,6 @@ Starting with the OpenVINO™ toolkit 2020.2 release, all of the features previo Therefore, ONNX RT Execution Provider for nGraph will be deprecated starting June 1, 2020 and will be completely removed on December 1, 2020. Users are recommended to migrate to the ONNX RT Execution Provider for OpenVINO™ toolkit as the unified solution for all AI inferencing on Intel® hardware. - ## 2020.4 ### New API @@ -33,6 +32,75 @@ Therefore, ONNX RT Execution Provider for nGraph will be deprecated starting Jun * METRIC_KEY(OPTIMIZATION_CAPABILITIES) * METRIC_VALUE(BF16) +### Deprecated API + + **Myriad Plugin API:** + + * VPU_CONFIG_KEY(IGNORE_IR_STATISTIC) + +### Removed API + + **Inference Engine NN Builder API:** + + * InferenceEngine::Builder::EltwiseLayer + * InferenceEngine::Builder::MemoryLayer + * InferenceEngine::Builder::ROIPoolingLayer + * InferenceEngine::Builder::DeconvolutionLayer + * InferenceEngine::Builder::ReLULayer + * InferenceEngine::Builder::TanHLayer + * InferenceEngine::Builder::InputLayer + * InferenceEngine::Builder::PoolingLayer + * InferenceEngine::Builder::CropLayer + * InferenceEngine::Builder::GRUSequenceLayer + * InferenceEngine::Builder::NormLayer + * InferenceEngine::Builder::LSTMSequenceLayer + * InferenceEngine::Builder::ClampLayer + * InferenceEngine::Builder::PSROIPoolingLayer + * InferenceEngine::Builder::Layer + * InferenceEngine::Builder::RNNSequenceLayer + * InferenceEngine::Builder::ReorgYoloLayer + * InferenceEngine::Builder::NormalizeLayer + * InferenceEngine::Builder::PriorBoxClusteredLayer + * InferenceEngine::Builder::MVNLayer + * InferenceEngine::Builder::PermuteLayer + * InferenceEngine::Builder::SimplerNMSLayer + * InferenceEngine::Builder::ConstLayer + * InferenceEngine::Builder::DeformableConvolutionLayer + * InferenceEngine::Builder::FullyConnectedLayer + * InferenceEngine::Builder::PriorBoxLayer + * InferenceEngine::Builder::SoftMaxLayer + * InferenceEngine::Builder::OutputLayer + * InferenceEngine::Builder::TileLayer + * InferenceEngine::Builder::SplitLayer + * InferenceEngine::Builder::PReLULayer + * InferenceEngine::Builder::RegionYoloLayer + * InferenceEngine::Builder::ReshapeLayer + * InferenceEngine::Builder::ConvolutionLayer + * InferenceEngine::Builder::DetectionOutputLayer + * InferenceEngine::Builder::ConcatLayer + * InferenceEngine::Builder::ELULayer + * InferenceEngine::Builder::GRNLayer + * InferenceEngine::Builder::LRNLayer + * InferenceEngine::Builder::ArgMaxLayer + * InferenceEngine::Builder::ReLU6Layer + * InferenceEngine::Builder::ScaleShiftLayer + * InferenceEngine::Builder::ProposalLayer + * InferenceEngine::Builder::SigmoidLayer + * InferenceEngine::Builder::ResampleLayer + * InferenceEngine::Builder::CTCGreedyDecoderLayer + * InferenceEngine::Builder::BatchNormalizationLayer + * InferenceEngine::Builder::LayerDecorator + * InferenceEngine::Builder::PowerLayer + * InferenceEngine::Builder::Network + * InferenceEngine::Builder::PortInfo + * InferenceEngine::Builder::Connection + * InferenceEngine::Builder::PortData + * InferenceEngine::Builder::Port + * InferenceEngine::Builder::ILayer + * InferenceEngine::Builder::INetworkIterator + * InferenceEngine::Builder::INetwork + * InferenceEngine::Builder::ILayer + ## 2020.2 ### New API @@ -273,7 +341,6 @@ Therefore, ONNX RT Execution Provider for nGraph will be deprecated starting Jun * InferenceEngine::Builder::INetwork * InferenceEngine::Builder::ILayer - **Plugin API:** * InferenceEngine::InferencePlugin C++ plugin wrapper class diff --git a/docs/IE_DG/OnnxImporterTutorial.md b/docs/IE_DG/OnnxImporterTutorial.md index 7b336f97a633fc..a63b0f9f44c4df 100644 --- a/docs/IE_DG/OnnxImporterTutorial.md +++ b/docs/IE_DG/OnnxImporterTutorial.md @@ -2,7 +2,7 @@ > **NOTE**: This tutorial is deprecated. Since OpenVINO™ 2020.4 version, Inference Engine enables reading ONNX models via the Inference Engine Core API > and there is no need to use directly the low-level ONNX* Importer API anymore. -> To read ONNX\* models, it's recommended to use the `Core::ReadNetwork()` method that provide a uniform way to read models from IR or ONNX format. +> To read ONNX\* models, it's recommended to use the InferenceEngine::Core::ReadNetwork method that provide a uniform way to read models from IR or ONNX format. This tutorial demonstrates how to use the ONNX\* Importer API. This API makes it possible to create an nGraph `Function` object from an imported ONNX model. diff --git a/docs/IE_DG/inference_engine_intro.md b/docs/IE_DG/inference_engine_intro.md index 0e54e11c5787fc..cb3b43fcab72dc 100644 --- a/docs/IE_DG/inference_engine_intro.md +++ b/docs/IE_DG/inference_engine_intro.md @@ -21,21 +21,30 @@ Modules in the Inference Engine component ### Core Inference Engine Libraries ### Your application must link to the core Inference Engine libraries: -* Linux* OS: +* Linux* OS: - `libinference_engine.so`, which depends on `libinference_engine_transformations.so` and `libngraph.so` - `libinference_engine_legacy.so`, which depends on `libtbb.so` -* Windows* OS: +* Windows* OS: - `inference_engine.dll`, which depends on `inference_engine_transformations.dll` and `ngraph.dll` - `inference_engine_legacy.dll`, which depends on `tbb.dll` The required C++ header files are located in the `include` directory. This library contains the classes to: -* Read the network (InferenceEngine::CNNNetReader) +* Create Inference Engine Core object to work with devices and read network (InferenceEngine::Core) * Manipulate network information (InferenceEngine::CNNNetwork) -* Create Inference Engine Core object to work with devices (InferenceEngine::Core) * Execute and pass inputs and outputs (InferenceEngine::ExecutableNetwork and InferenceEngine::InferRequest) +### Plugin Libraries to read a network object ### + +Starting from 2020.4 release, Inference Engine introduced a concept of `CNNNetwork` reader plugins. Such plugins can be automatically dynamically loaded by Inference Engine in runtime depending on file format: +* Linux* OS: + - `libinference_engine_ir_reader.so` to read a network from IR + - `libinference_engine_onnx_reader.so` to read a network from ONNX model format +* Windows* OS: + - `inference_engine_ir_reader.dll` to read a network from IR + - `inference_engine_onnx_reader.dll` to read a network from ONNX model format + ### Device-specific Plugin Libraries ### For each supported target device, Inference Engine provides a plugin — a DLL/shared library that contains complete implementation for inference on this particular device. The following plugins are available: diff --git a/docs/IE_DG/supported_plugins/GNA.md b/docs/IE_DG/supported_plugins/GNA.md index a51cd47ffdce03..3ddda708a47575 100644 --- a/docs/IE_DG/supported_plugins/GNA.md +++ b/docs/IE_DG/supported_plugins/GNA.md @@ -116,7 +116,7 @@ When specifying key values as raw strings (that is, when using Python API), omit ## How to Interpret Performance Counters -As a result of collecting performance counters using `InferenceEngine::IInferencePlugin::GetPerformanceCounts`, you can find various performance data about execution on GNA. +As a result of collecting performance counters using `InferenceEngine::InferRequest::GetPerformanceCounts`, you can find various performance data about execution on GNA. Returned map stores a counter description as a key, counter value is stored in the `realTime_uSec` field of the `InferenceEngineProfileInfo` structure. Current GNA implementation calculates counters for the whole utterance scoring and does not provide per-layer information. API allows to retrieve counter units in cycles, but they can be converted to seconds as follows: ``` diff --git a/docs/IE_DG/supported_plugins/MULTI.md b/docs/IE_DG/supported_plugins/MULTI.md index 2d30a5e4322ea8..4d382ecfa64b5b 100644 --- a/docs/IE_DG/supported_plugins/MULTI.md +++ b/docs/IE_DG/supported_plugins/MULTI.md @@ -41,17 +41,6 @@ Basically, there are three ways to specify the devices to be use by the "MULTI": ExecutableNetwork exec1 = ie.LoadNetwork(network, "MULTI", {{"MULTI_DEVICE_PRIORITIES", "HDDL,GPU"}}); //NEW IE-CENTRIC API, same as previous, but configuration of the "MULTI" is part of the name (so config is empty), also network-specific: ExecutableNetwork exec2 = ie.LoadNetwork(network, "MULTI:HDDL,GPU", {}); - - //Similarly for the deprecated (plugin-centric) API - //for example globally pre-configuring the plugin with the explicit option: - //auto plugin0 = PluginDispatcher().getPluginByDevice("MULTI"); - //plugin0.SetConfig({{"MULTI_DEVICE_PRIORITIES", "HDDL,GPU"}}); - //ExecutableNetwork exec3 = plugin.LoadNetwork(network, {}); - // part of the config for the LoadNetwork or device name - //ExecutableNetwork exec4 = plugin0.LoadNetwork(network, {{"MULTI_DEVICE_PRIORITIES", "HDDL,GPU"}}); - // part of the device name - //auto plugin1 = PluginDispatcher().getPluginByDevice("MULTI:HDDL,GPU"); - //ExecutableNetwork exec5 = plugin1.LoadNetwork(network, {}); ``` Notice that the priorities of the devices can be changed in real-time for the executable network: ```cpp diff --git a/docs/optimization_guide/dldt_optimization_guide.md b/docs/optimization_guide/dldt_optimization_guide.md index 38d9b224d6b5cf..616feba615f63c 100644 --- a/docs/optimization_guide/dldt_optimization_guide.md +++ b/docs/optimization_guide/dldt_optimization_guide.md @@ -272,15 +272,13 @@ using namespace InferenceEngine::PluginConfigParams; using namespace InferenceEngine::HeteroConfigParams; ... -enginePtr = dispatcher.getPluginByDevice("HETERO:FPGA,CPU"); -InferencePlugin plugin(enginePtr); -plugin.SetConfig({ {KEY_HETERO_DUMP_GRAPH_DOT, YES} }); +auto execNetwork = ie.LoadNetwork(network, "HETERO:FPGA,CPU", { {KEY_HETERO_DUMP_GRAPH_DOT, YES} }); ``` After enabling the configuration key, the heterogeneous plugin generates two files: - `hetero_affinity.dot` - per-layer affinities. This file is generated only if default fallback policy was executed (as otherwise you have set the affinities by yourself, so you know them). -- `hetero_subgraphs.dot` - affinities per sub-graph. This file is written to the disk during execution of `ICNNNetwork::LoadNetwork` for the heterogeneous plugin. +- `hetero_subgraphs.dot` - affinities per sub-graph. This file is written to the disk during execution of `Core::LoadNetwork` for the heterogeneous flow. You can use GraphViz\* utility or `.dot` converters (for example, to `.png` or `.pdf`), like xdot\*, available on Linux\* OS with `sudo apt-get install xdot`. Below is an example of the output trimmed to the two last layers (one executed on the FPGA and another on the CPU): @@ -439,16 +437,11 @@ Infer Request based API offers two types of request: Sync and Async. The Sync is More importantly, an infer request encapsulates the reference to the “executable” network and actual inputs/outputs. Now, when you load the network to the plugin, you get a reference to the executable network (you may consider that as a queue). Actual infer requests are created by the executable network: ```cpp -CNNNetReader network_reader; -network_reader.ReadNetwork("Model.xml"); -network_reader.ReadWeights("Model.bin"); -auto network = network_reader.getNetwork(); +Core ie; +auto network = ie.ReadNetwork("Model.xml", "Model.bin"); InferenceEngine::InputsDataMap input_info(network.getInputsInfo()); -InferenceEnginePluginPtr engine_ptr = PluginDispatcher(pluginDirs).getSuitablePlugin(TargetDevice::eGPU); -InferencePlugin plugin(engine_ptr); - -auto executable_network = plugin.LoadNetwork(network, {/*opt config*/}); +auto executable_network = ie.LoadNetwork(network, "GPU"); auto infer_request = executable_network.CreateInferRequest(); for (auto & item : inputInfo) { diff --git a/inference-engine/include/ie_core.hpp b/inference-engine/include/ie_core.hpp index 6a13a0f9aa1223..d40bb39c6aee54 100644 --- a/inference-engine/include/ie_core.hpp +++ b/inference-engine/include/ie_core.hpp @@ -87,10 +87,13 @@ class INFERENCE_ENGINE_API_CLASS(Core) { #ifdef ENABLE_UNICODE_PATH_SUPPORT /** - * @brief Reads IR xml and bin files - * @param modelPath path to IR file - * @param binPath path to bin file, if path is empty, will try to read bin file with the same name as xml and - * if bin file with the same name was not found, will load IR without weights. + * @brief Reads models from IR and ONNX formats + * @param modelPath path to model + * @param binPath path to data file + * For IR format (*.bin): + * * if path is empty, will try to read bin file with the same name as xml and + * * if bin file with the same name was not found, will load IR without weights. + * ONNX models with data files are not supported * @return CNNNetwork */ CNNNetwork ReadNetwork(const std::wstring& modelPath, const std::wstring& binPath = {}) const { @@ -99,17 +102,22 @@ class INFERENCE_ENGINE_API_CLASS(Core) { #endif /** - * @brief Reads IR xml and bin files - * @param modelPath path to IR file - * @param binPath path to bin file, if path is empty, will try to read bin file with the same name as xml and - * if bin file with the same name was not found, will load IR without weights. + * @brief Reads models from IR and ONNX formats + * @param modelPath path to model + * @param binPath path to data file + * For IR format (*.bin): + * * if \p binPath is empty, tries to read *.bin file with the same name as xml and + * * if *.bin file with the same name was not found, tries to load IR without weights. + * ONNX models with data files are not supported * @return CNNNetwork */ CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath = {}) const; /** - * @brief Reads IR xml and bin (with the same name) files - * @param model string with IR + * @brief Reads models from IR and ONNX formats + * @param model string with model in IR or ONNX format * @param weights shared pointer to constant blob with weights + * ONNX models doesn't support models with data blobs. + * For ONNX case the second parameter should contain empty blob. * @return CNNNetwork */ CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const;