[PP] Addded ability to preprocess inputs into plugin (openvinotoolkit…

…#857) desired format changed InferRequestInternal: - added _deviceInputs member to store plugin desired perprocessing targets - added default argument to preProcessingRequired to describe plugin specific desired preprocessing target - SetBlob and GetBlob to deal with plugin desired preprocessing targets (_deviceInputs) - added addInputPreProcessingFor helper method to avoid code duplication changed TEMPLATE plugin to use new functionality: - removed explicit presicion conversion (to use built-in one of InferRequestInternal) - _networkInputBlobs to use InferRequestInternal::_deviceInputs
mryzhov · Dec 15, 2020 · fec0a54 · fec0a54
1 parent b366f93
commit fec0a54
Show file tree

Hide file tree

Showing 9 changed files with 112 additions and 81 deletions.
diff --git a/docs/template_plugin/src/template_infer_request.cpp b/docs/template_plugin/src/template_infer_request.cpp
@@ -112,7 +112,7 @@ static void AllocateImpl(const BlobDataMap& blobDataMap,
 
 void TemplateInferRequest::allocateBlobs() {
     auto&& parameters = _executableNetwork->_function->get_parameters();
-    AllocateImpl(_networkInputs, _inputs, _networkInputBlobs, [&] (const std::string& blobName) {
+    AllocateImpl(_networkInputs, _inputs, _deviceInputs, [&] (const std::string& blobName) {
         return parameters.at(_executableNetwork->_inputIndex.at(blobName))->get_element_type();
     });
     auto&& results = _executableNetwork->_function->get_results();
@@ -176,21 +176,14 @@ void TemplateInferRequest::inferPreprocess() {
     auto start = Time::now();
     // NOTE: After InferRequestInternal::execDataPreprocessing call
     //       input can points to other memory region than it was allocated in constructor.
-    InferRequestInternal::execDataPreprocessing(_inputs);
-    for (auto&& input : _inputs) {
-        auto inputBlob = input.second;
-        auto networkInput = _networkInputBlobs[input.first];
-        if (inputBlob->getTensorDesc().getPrecision() == networkInput->getTensorDesc().getPrecision()) {
-            networkInput = inputBlob;
-        } else {
-            blobCopy(inputBlob, networkInput);
-        }
-        auto index = _executableNetwork->_inputIndex[input.first];
+    InferRequestInternal::execDataPreprocessing(_deviceInputs);
+    for (auto&& networkInput : _deviceInputs) {
+        auto index = _executableNetwork->_inputIndex[networkInput.first];
         const auto& parameter = _parameters[index];
         const auto& parameterShape = parameter->get_shape();
         const auto& parameterType = parameter->get_element_type();
         _inputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(parameterType, parameterShape,
-            InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput)->rmap().as<void*>());
+            InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput.second)->rmap().as<void*>());
     }
     for (auto&& output : _outputs) {
         auto outputBlob = output.second;

diff --git a/docs/template_plugin/src/template_infer_request.hpp b/docs/template_plugin/src/template_infer_request.hpp
@@ -63,7 +63,6 @@ class TemplateInferRequest : public InferenceEngine::InferRequestInternal {
     // for performance counters
     std::array<std::chrono::duration<float, std::micro>, numOfStages>   _durations;
 
-    InferenceEngine::BlobMap                                _networkInputBlobs;
     InferenceEngine::BlobMap                                _networkOutputBlobs;
     ngraph::ParameterVector                                 _parameters;
     ngraph::ResultVector                                    _results;

diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing.cpp
@@ -22,7 +22,7 @@ const std::vector<std::map<std::string, std::string>> configs = {
 INSTANTIATE_TEST_CASE_P(PreprocessingPrecisionConvertTestsViaSetInput, PreprocessingPrecisionConvertTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inputPrecisions),
-                                ::testing::Values(1, 2, 3, 4, 5),   // Number of input tensor channels
+                                ::testing::Values(4),   // Number of input tensor channels
                                 ::testing::Values(true),            // Use SetInput
                                 ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
                                 ::testing::ValuesIn(configs)),
@@ -31,7 +31,7 @@ INSTANTIATE_TEST_CASE_P(PreprocessingPrecisionConvertTestsViaSetInput, Preproces
 INSTANTIATE_TEST_CASE_P(PreprocessingPrecisionConvertTestsViaGetBlob, PreprocessingPrecisionConvertTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inputPrecisions),
-                                ::testing::Values(4, 5),       // Number of input tensor channels (blob_copy only supports 4d and 5d tensors)
+                                ::testing::Values(4),       // Number of input tensor channels (blob_copy only supports 4d and 5d tensors)
                                 ::testing::Values(false),      // use GetBlob
                                 ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
                                 ::testing::ValuesIn(configs)),

diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_request_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_request_internal.hpp
@@ -69,50 +69,46 @@ class InferRequestInternal : virtual public IInferRequestInternal {
      * @param data - a reference to input or output blob. The type of Blob must correspond to the network input
      * precision and size.
      */
-    void SetBlob(const char* name, const Blob::Ptr& data) override {
+    void SetBlob(const char* name, const Blob::Ptr& userBlob) override {
         OV_ITT_SCOPED_TASK(itt::domains::Plugin, "SetBlob");
         if (name == nullptr) {
             THROW_IE_EXCEPTION << NOT_FOUND_str + "Failed to set blob with empty name";
         }
-        if (!data) THROW_IE_EXCEPTION << NOT_ALLOCATED_str << "Failed to set empty blob with name: \'" << name << "\'";
-        const bool compoundBlobPassed = data->is<CompoundBlob>();
-        const bool remoteBlobPassed = data->is<RemoteBlob>();
-        if (!compoundBlobPassed && !remoteBlobPassed && data->buffer() == nullptr)
+        if (!userBlob) THROW_IE_EXCEPTION << NOT_ALLOCATED_str << "Failed to set empty blob with name: \'" << name << "\'";
+        const bool compoundBlobPassed = userBlob->is<CompoundBlob>();
+        const bool remoteBlobPassed   = userBlob->is<RemoteBlob>();
+        if (!compoundBlobPassed && !remoteBlobPassed && userBlob->buffer() == nullptr)
             THROW_IE_EXCEPTION << "Input data was not allocated. Input name: \'" << name << "\'";
-        if (data->size() == 0) {
+        if (userBlob->size() == 0) {
             THROW_IE_EXCEPTION << "Input data is empty. Input name: \'" << name << "\'";
         }
 
         InputInfo::Ptr foundInput;
         DataPtr foundOutput;
-        size_t dataSize = data->size();
+        size_t dataSize = userBlob->size();
         if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
-            if (foundInput->getPrecision() != data->getTensorDesc().getPrecision()) {
+            if (foundInput->getPrecision() != userBlob->getTensorDesc().getPrecision()) {
                 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str
                                    << "Failed to set Blob with precision not corresponding to user input precision";
             }
 
-            const bool preProcRequired = preProcessingRequired(foundInput, data);
+            auto& devBlob = _deviceInputs[name];
+            const bool preProcRequired = preProcessingRequired(foundInput, userBlob, devBlob);
             if (compoundBlobPassed && !preProcRequired) {
                 THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str
                                    << "cannot set compound blob: supported only for input pre-processing";
             }
 
             if (preProcRequired) {
-                if (_preProcData.find(name) == _preProcData.end()) {
-                    _preProcData.emplace(name, CreatePreprocDataHelper());
-                }
-                _preProcData[name]->isApplicable(data, _inputs[name]);
-                // Stores the given blob as ROI blob. It will be used to fill in network input
-                // during pre-processing
-                _preProcData[name]->setRoiBlob(data);
+                addInputPreProcessingFor(name, userBlob, devBlob ? devBlob : _inputs[name]);
             } else {
                 size_t inputSize = details::product(foundInput->getTensorDesc().getDims());
                 if (dataSize != inputSize) {
                     THROW_IE_EXCEPTION << "Input blob size is not equal network input size (" << dataSize
                                        << "!=" << inputSize << ").";
                 }
-                _inputs[name] = data;
+                _inputs[name] = userBlob;
+                devBlob = userBlob;
             }
         } else {
             if (compoundBlobPassed) {
@@ -124,11 +120,11 @@ class InferRequestInternal : virtual public IInferRequestInternal {
                 THROW_IE_EXCEPTION << "Output blob size is not equal network output size (" << dataSize
                                    << "!=" << outputSize << ").";
             }
-            if (foundOutput->getPrecision() != data->getTensorDesc().getPrecision()) {
+            if (foundOutput->getPrecision() != userBlob->getTensorDesc().getPrecision()) {
                 THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str
                                    << "Failed to set Blob with precision not corresponding to user output precision";
             }
-            _outputs[name] = data;
+            _outputs[name] = userBlob;
         }
     }
 
@@ -155,6 +151,12 @@ class InferRequestInternal : virtual public IInferRequestInternal {
                     foundInput->getTensorDesc().getLayout() != SCALAR
                     ? foundInput->getTensorDesc().getDims()
                     : oneVector);
+
+                if (auto devBlob = _deviceInputs[name]) {
+                    if (preProcessingRequired(foundInput, data, devBlob)) {
+                        addInputPreProcessingFor(name, data, devBlob);
+                    }
+                }
             }
         } else {
             data = _outputs[name];
@@ -233,24 +235,24 @@ class InferRequestInternal : virtual public IInferRequestInternal {
 protected:
     InferenceEngine::InputsDataMap _networkInputs;  //!< Holds information about network inputs info
     InferenceEngine::OutputsDataMap _networkOutputs;  //!< Holds information about network outputs data
-    InferenceEngine::BlobMap _inputs;  //!< A map of network input blobs
-    InferenceEngine::BlobMap _outputs;  //!< A map of network output blobs
-    std::map<std::string, PreProcessDataPtr> _preProcData;  //!< A map of pre-process data per input
+    InferenceEngine::BlobMap _inputs;  //!< A map of user passed blobs for network inputs
+    InferenceEngine::BlobMap _deviceInputs; //!< A map of actual network inputs, in plugin specific format
+    InferenceEngine::BlobMap _outputs;  //!< A map of user passed blobs for network outputs
+    std::map<std::string, PreProcessDataPtr> _preProcData;        //!< A map of pre-process data per input
     int m_curBatch;  //!< Current batch value used in dynamic batching
 
     /**
      * @brief A shared pointer to ExecutableNetworkInternal interface
      * @note Needed to correctly handle ownership between objects.
      */
     std::shared_ptr<ExecutableNetworkInternal> _exeNetwork;
-
     /**
      * @brief Checks and executes input data pre-processing if needed.
      * @param inputs Inputs blobs to perform preprocessing on
      * @param serial Whether to use multiple threads to execute the step
      */
-    void execDataPreprocessing(InferenceEngine::BlobMap& inputs, bool serial = false) {
-        for (auto& input : inputs) {
+    void execDataPreprocessing(InferenceEngine::BlobMap& preprocessedBlobs, bool serial = false) {
+        for (auto& input : preprocessedBlobs) {
             // If there is a pre-process entry for an input then it must be pre-processed
             // using preconfigured resize algorithm.
             auto it = _preProcData.find(input.first);
@@ -260,7 +262,6 @@ class InferRequestInternal : virtual public IInferRequestInternal {
             }
         }
     }
-
     /**
      * @brief Helper function to find input or output blob by name
      * @param name A name of input or output blob.
@@ -356,25 +357,52 @@ class InferRequestInternal : virtual public IInferRequestInternal {
     /**
      * @brief Checks whether pre-processing step is required for a given input
      * @param info InputInfo corresponding to input blob
-     * @param blob Input Blob object corresponding to input info
+     * @param userBlob Input Blob object corresponding to input info
+     * @param deviceBlob Blob object in plugin's desired format
      * @return `True` if pre-processing is required, `false` otherwise
      */
-    bool preProcessingRequired(const InputInfo::Ptr& info, const Blob::Ptr& blob) {
+    bool preProcessingRequired(const InputInfo::Ptr& info, const Blob::Ptr& userBlob, const Blob::Ptr& deviceBlob = nullptr) {
         // pre-processing is required if:
         // 1. resize algorithm is specified (resize required)
         // 2. color format specified:
         // 2.a. color format is not equal to network's expected (color conversion required)
         // 2.b. network's layout != blob's layout (reorder required)
+        // 3. precision conversion is required
+
         const auto& preProcessInfo = info->getPreProcess();
         const auto inputColorFormat = preProcessInfo.getColorFormat();
         // FIXME: support other network's input formats once the API is ready. Assuming input is in
         // the BGR format by default
         const auto networkColorFormat = ColorFormat::BGR;
-
         const bool colorFormatSpecified = inputColorFormat != ColorFormat::RAW;
+
+        auto blob_layout = [](const Blob::Ptr& b) { return b->getTensorDesc().getLayout();   };
+        auto blob_prec   = [](const Blob::Ptr& b) { return b->getTensorDesc().getPrecision();};
+
+        auto dst_layout = deviceBlob ? blob_layout(deviceBlob) : info->getLayout();
+        auto dst_prec   = deviceBlob ? blob_prec(deviceBlob)   : info->getPrecision();
+
+        //FIXME: remove the first part to allow any needed conversion?
+        const bool need_layout_conv = (colorFormatSpecified || deviceBlob) &&
+                                      (blob_layout(userBlob) != dst_layout);
+
         return preProcessInfo.getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE ||
                (colorFormatSpecified && inputColorFormat != networkColorFormat) ||
-               (colorFormatSpecified && info->getLayout() != blob->getTensorDesc().getLayout());
+               need_layout_conv ||
+               (blob_prec(userBlob) != dst_prec);
+    }
+
+    void addInputPreProcessingFor(const std::string& name, Blob::Ptr const& from, const Blob::Ptr& to) {
+        auto ppDataIt = _preProcData.find(name);
+        if (ppDataIt == _preProcData.end()) {
+            ppDataIt = (_preProcData.emplace(name, CreatePreprocDataHelper())).first;
+        }
+
+        auto& preproc_ptr = ppDataIt->second;
+        preproc_ptr->isApplicable(from,  to);
+        // Stores the given blob as ROI blob. It will be used to fill in network input
+        // during pre-processing
+        preproc_ptr->setRoiBlob(from);
     }
 };
 

diff --git a/inference-engine/src/preprocessing/ie_preprocess_data.cpp b/inference-engine/src/preprocessing/ie_preprocess_data.cpp
@@ -758,7 +758,7 @@ class PreProcessData : public IPreProcessData {
     /**
      * @brief ROI blob.
      */
-    Blob::Ptr _roiBlob = nullptr;
+    Blob::Ptr _userBlob = nullptr;
     Blob::Ptr _tmp1 = nullptr;
     Blob::Ptr _tmp2 = nullptr;
 
@@ -773,7 +773,7 @@ class PreProcessData : public IPreProcessData {
 
     Blob::Ptr getRoiBlob() const override;
 
-    void execute(Blob::Ptr &outBlob, const PreProcessInfo& info, bool serial, int batchSize = -1) override;
+    void execute(Blob::Ptr &preprocessedBlob, const PreProcessInfo &info, bool serial, int batchSize = -1) override;
 
     void Release() noexcept override;
 
@@ -790,38 +790,39 @@ void PreProcessData::Release() noexcept {
 }
 
 void PreProcessData::setRoiBlob(const Blob::Ptr &blob) {
-    _roiBlob = blob;
+    _userBlob = blob;
 }
 
 Blob::Ptr PreProcessData::getRoiBlob() const {
-    return _roiBlob;
+    return _userBlob;
 }
 
-void PreProcessData::execute(Blob::Ptr &outBlob, const PreProcessInfo& info, bool serial,
+void PreProcessData::execute(Blob::Ptr &preprocessedBlob, const PreProcessInfo &info, bool serial,
         int batchSize) {
     OV_ITT_SCOPED_TASK(itt::domains::IEPreproc, "Preprocessing");
 
     auto algorithm = info.getResizeAlgorithm();
     auto fmt = info.getColorFormat();
 
-    if (algorithm == NO_RESIZE && fmt == ColorFormat::RAW) {
-       THROW_IE_EXCEPTION << "Input pre-processing is called without the pre-processing info set: "
-                             "there's nothing to be done";
+    if (_userBlob == nullptr || preprocessedBlob == nullptr) {
+        THROW_IE_EXCEPTION << "Input pre-processing is called with null " << (_userBlob == nullptr ? "_userBlob" : "preprocessedBlob");
     }
 
-    if (_roiBlob == nullptr) {
-        THROW_IE_EXCEPTION << "Input pre-processing is called without ROI blob set";
-    }
-
-    batchSize = PreprocEngine::getCorrectBatchSize(batchSize, _roiBlob);
+    batchSize = PreprocEngine::getCorrectBatchSize(batchSize, _userBlob);
 
     if (!_preproc) {
         _preproc.reset(new PreprocEngine);
     }
-    if (_preproc->preprocessWithGAPI(_roiBlob, outBlob, algorithm, fmt, serial, batchSize)) {
+
+    if (_preproc->preprocessWithGAPI(_userBlob, preprocessedBlob, algorithm, fmt, serial, batchSize)) {
         return;
     }
 
+    if (algorithm == NO_RESIZE) {
+       THROW_IE_EXCEPTION << "Input pre-processing is called without the pre-processing info set: "
+                             "there's nothing to be done";
+    }
+
     if (batchSize > 1) {
         THROW_IE_EXCEPTION << "Batch pre-processing is unsupported in this mode. "
                               "Use default pre-processing instead to process batches.";
@@ -834,37 +835,37 @@ void PreProcessData::execute(Blob::Ptr &outBlob, const PreProcessInfo& info, boo
     }
 
     Blob::Ptr res_in, res_out;
-    if (_roiBlob->getTensorDesc().getLayout() == NHWC) {
-        if (!_tmp1 || _tmp1->size() != _roiBlob->size()) {
-            if (_roiBlob->getTensorDesc().getPrecision() == Precision::FP32) {
-                _tmp1 = make_shared_blob<float>({Precision::FP32, _roiBlob->getTensorDesc().getDims(), Layout::NCHW});
+    if (_userBlob->getTensorDesc().getLayout() == NHWC) {
+        if (!_tmp1 || _tmp1->size() != _userBlob->size()) {
+            if (_userBlob->getTensorDesc().getPrecision() == Precision::FP32) {
+                _tmp1 = make_shared_blob<float>({Precision::FP32, _userBlob->getTensorDesc().getDims(), Layout::NCHW});
             } else {
-                _tmp1 = make_shared_blob<uint8_t>({Precision::U8, _roiBlob->getTensorDesc().getDims(), Layout::NCHW});
+                _tmp1 = make_shared_blob<uint8_t>({Precision::U8, _userBlob->getTensorDesc().getDims(), Layout::NCHW});
             }
             _tmp1->allocate();
         }
 
         {
             OV_ITT_SCOPED_TASK(itt::domains::IEPreproc, "Reorder before");
-            blob_copy(_roiBlob, _tmp1);
+            blob_copy(_userBlob, _tmp1);
         }
         res_in = _tmp1;
     } else {
-        res_in = _roiBlob;
+        res_in = _userBlob;
     }
 
-    if (outBlob->getTensorDesc().getLayout() == NHWC) {
-        if (!_tmp2 || _tmp2->size() != outBlob->size()) {
-            if (outBlob->getTensorDesc().getPrecision() == Precision::FP32) {
-                _tmp2 = make_shared_blob<float>({Precision::FP32, outBlob->getTensorDesc().getDims(), Layout::NCHW});
+    if (preprocessedBlob->getTensorDesc().getLayout() == NHWC) {
+        if (!_tmp2 || _tmp2->size() != preprocessedBlob->size()) {
+            if (preprocessedBlob->getTensorDesc().getPrecision() == Precision::FP32) {
+                _tmp2 = make_shared_blob<float>({Precision::FP32, preprocessedBlob->getTensorDesc().getDims(), Layout::NCHW});
             } else {
-                _tmp2 = make_shared_blob<uint8_t>({Precision::U8, outBlob->getTensorDesc().getDims(), Layout::NCHW});
+                _tmp2 = make_shared_blob<uint8_t>({Precision::U8, preprocessedBlob->getTensorDesc().getDims(), Layout::NCHW});
             }
             _tmp2->allocate();
         }
         res_out = _tmp2;
     } else {
-        res_out = outBlob;
+        res_out = preprocessedBlob;
     }
 
     {
@@ -874,7 +875,7 @@ void PreProcessData::execute(Blob::Ptr &outBlob, const PreProcessInfo& info, boo
 
     if (res_out == _tmp2) {
         OV_ITT_SCOPED_TASK(itt::domains::IEPreproc, "Reorder after");
-        blob_copy(_tmp2, outBlob);
+        blob_copy(_tmp2, preprocessedBlob);
     }
 }