- [PrepProcessing] Addded ability to preprocess inputs into plugin

desired format
openvinotoolkit · Nov 12, 2020 · 72dc849 · 72dc849
1 parent be3df18
commit 72dc849
Show file tree

Hide file tree

Showing 10 changed files with 97 additions and 53 deletions.
diff --git a/docs/template_plugin/src/CMakeLists.txt b/docs/template_plugin/src/CMakeLists.txt
@@ -33,8 +33,8 @@ target_link_libraries(${TARGET_NAME} PRIVATE
 set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
 
 # ATTENTION: uncomment to register a plugin in the plugins.xml file
-# ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
-#                     POSSIBLE_PLUGINS ${TARGET_NAME})
+ ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
+                     POSSIBLE_PLUGINS ${TARGET_NAME})
 # [cmake:plugin]
 
 # ATTENTION: uncomment to install component

diff --git a/docs/template_plugin/src/template_infer_request.cpp b/docs/template_plugin/src/template_infer_request.cpp
@@ -112,7 +112,7 @@ static void AllocateImpl(const BlobDataMap& blobDataMap,
 
 void TemplateInferRequest::allocateBlobs() {
     auto&& parameters = _executableNetwork->_function->get_parameters();
-    AllocateImpl(_networkInputs, _inputs, _networkInputBlobs, [&] (const std::string& blobName) {
+    AllocateImpl(_networkInputs, _inputs, _deviceInputs, [&] (const std::string& blobName) {
         return parameters.at(_executableNetwork->_inputIndex.at(blobName))->get_element_type();
     });
     auto&& results = _executableNetwork->_function->get_results();
@@ -176,21 +176,14 @@ void TemplateInferRequest::inferPreprocess() {
     auto start = Time::now();
     // NOTE: After InferRequestInternal::execDataPreprocessing call
     //       input can points to other memory region than it was allocated in constructor.
-    InferRequestInternal::execDataPreprocessing(_inputs);
-    for (auto&& input : _inputs) {
-        auto inputBlob = input.second;
-        auto networkInput = _networkInputBlobs[input.first];
-        if (inputBlob->getTensorDesc().getPrecision() == networkInput->getTensorDesc().getPrecision()) {
-            networkInput = inputBlob;
-        } else {
-            blobCopy(inputBlob, networkInput);
-        }
-        auto index = _executableNetwork->_inputIndex[input.first];
+    InferRequestInternal::execDataPreprocessing(_deviceInputs);
+    for (auto&& networkInput : _deviceInputs) {
+        auto index = _executableNetwork->_inputIndex[networkInput.first];
         const auto& parameter = _parameters[index];
         const auto& parameterShape = parameter->get_shape();
         const auto& parameterType = parameter->get_element_type();
         _inputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(parameterType, parameterShape,
-            InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput)->rmap().as<void*>());
+            InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput.second)->rmap().as<void*>());
     }
     for (auto&& output : _outputs) {
         auto outputBlob = output.second;

diff --git a/docs/template_plugin/src/template_infer_request.hpp b/docs/template_plugin/src/template_infer_request.hpp
@@ -63,7 +63,7 @@ class TemplateInferRequest : public InferenceEngine::InferRequestInternal {
     // for performance counters
     std::array<std::chrono::duration<float, std::micro>, numOfStages>   _durations;
 
-    InferenceEngine::BlobMap                                _networkInputBlobs;
+//    InferenceEngine::BlobMap                                _networkInputBlobs;
     InferenceEngine::BlobMap                                _networkOutputBlobs;
     ngraph::ParameterVector                                 _parameters;
     ngraph::ResultVector                                    _results;

diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing.cpp
@@ -22,7 +22,7 @@ const std::vector<std::map<std::string, std::string>> configs = {
 INSTANTIATE_TEST_CASE_P(PreprocessingPrecisionConvertTestsViaSetInput, PreprocessingPrecisionConvertTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inputPrecisions),
-                                ::testing::Values(1, 2, 3, 4, 5),   // Number of input tensor channels
+                                ::testing::Values(4),   // Number of input tensor channels
                                 ::testing::Values(true),            // Use SetInput
                                 ::testing::Values("TEMPLATE"),
                                 ::testing::ValuesIn(configs)),
@@ -31,7 +31,7 @@ INSTANTIATE_TEST_CASE_P(PreprocessingPrecisionConvertTestsViaSetInput, Preproces
 INSTANTIATE_TEST_CASE_P(PreprocessingPrecisionConvertTestsViaGetBlob, PreprocessingPrecisionConvertTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inputPrecisions),
-                                ::testing::Values(4, 5),       // Number of input tensor channels (blob_copy only supports 4d and 5d tensors)
+                                ::testing::Values(4),       // Number of input tensor channels (blob_copy only supports 4d and 5d tensors)
                                 ::testing::Values(false),      // use GetBlob
                                 ::testing::Values("TEMPLATE"),
                                 ::testing::ValuesIn(configs)),

diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_request_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_request_internal.hpp
@@ -91,27 +91,23 @@ class InferRequestInternal : virtual public IInferRequestInternal {
                                    << "Failed to set Blob with precision not corresponding to user input precision";
             }
 
-            const bool preProcRequired = preProcessingRequired(foundInput, data);
+            auto& devBlob = _deviceInputs[name];
+            const bool preProcRequired = preProcessingRequired(foundInput, data, devBlob);
             if (compoundBlobPassed && !preProcRequired) {
                 THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str
                                    << "cannot set compound blob: supported only for input pre-processing";
             }
 
             if (preProcRequired) {
-                if (_preProcData.find(name) == _preProcData.end()) {
-                    _preProcData.emplace(name, CreatePreprocDataHelper());
-                }
-                _preProcData[name]->isApplicable(data, _inputs[name]);
-                // Stores the given blob as ROI blob. It will be used to fill in network input
-                // during pre-processing
-                _preProcData[name]->setRoiBlob(data);
+                addInputPreProcessingFor(name, foundInput, data, devBlob);
             } else {
                 size_t inputSize = details::product(foundInput->getTensorDesc().getDims());
                 if (dataSize != inputSize) {
                     THROW_IE_EXCEPTION << "Input blob size is not equal network input size (" << dataSize
                                        << "!=" << inputSize << ").";
                 }
                 _inputs[name] = data;
+                devBlob = data;
             }
         } else {
             if (compoundBlobPassed) {
@@ -154,6 +150,8 @@ class InferRequestInternal : virtual public IInferRequestInternal {
                     foundInput->getTensorDesc().getLayout() != SCALAR
                     ? foundInput->getTensorDesc().getDims()
                     : oneVector);
+
+                addInputPreProcessingFor(name, foundInput, data, _deviceInputs[name]);
             }
         } else {
             data = _outputs[name];
@@ -233,16 +231,16 @@ class InferRequestInternal : virtual public IInferRequestInternal {
     InferenceEngine::InputsDataMap _networkInputs;  //!< Holds information about network inputs info
     InferenceEngine::OutputsDataMap _networkOutputs;  //!< Holds information about network outputs data
     InferenceEngine::BlobMap _inputs;  //!< A map of network input blobs
+    InferenceEngine::BlobMap _deviceInputs;
     InferenceEngine::BlobMap _outputs;  //!< A map of network output blobs
-    std::map<std::string, PreProcessDataPtr> _preProcData;  //!< A map of pre-process data per input
+    std::map<std::string, PreProcessDataPtr> _preProcData;        //!< A map of pre-process data per input
     int m_curBatch;  //!< Current batch value used in dynamic batching
 
     /**
      * @brief A shared pointer to ExecutableNetworkInternal interface
      * @note Needed to correctly handle ownership between objects.
      */
     std::shared_ptr<ExecutableNetworkInternal> _exeNetwork;
-
     /**
      * @brief Checks and executes input data pre-processing if needed.
      * @param inputs Inputs blobs to perform preprocessing on
@@ -259,7 +257,6 @@ class InferRequestInternal : virtual public IInferRequestInternal {
             }
         }
     }
-
     /**
      * @brief Helper function to find input or output blob by name
      * @param name A name of input or output blob.
@@ -355,14 +352,17 @@ class InferRequestInternal : virtual public IInferRequestInternal {
      * @brief Checks whether pre-processing step is required for a given input
      * @param info InputInfo corresponding to input blob
      * @param blob Input Blob object corresponding to input info
+     * @param outBlob Blob object in plugin's desired format
      * @return `True` if pre-processing is required, `false` otherwise
      */
-    bool preProcessingRequired(const InputInfo::Ptr& info, const Blob::Ptr& blob) {
+    bool preProcessingRequired(const InputInfo::Ptr& info, const Blob::Ptr& blob, const Blob::Ptr& outBlob = nullptr) {
         // pre-processing is required if:
         // 1. resize algorithm is specified (resize required)
         // 2. color format specified:
         // 2.a. color format is not equal to network's expected (color conversion required)
         // 2.b. network's layout != blob's layout (reorder required)
+        // 3. precision conversion is required
+
         const auto& preProcessInfo = info->getPreProcess();
         const auto inputColorFormat = preProcessInfo.getColorFormat();
         // FIXME: support other network's input formats once the API is ready. Assuming input is in
@@ -372,8 +372,27 @@ class InferRequestInternal : virtual public IInferRequestInternal {
         const bool colorFormatSpecified = inputColorFormat != ColorFormat::RAW;
         return preProcessInfo.getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE ||
                (colorFormatSpecified && inputColorFormat != networkColorFormat) ||
-               (colorFormatSpecified && info->getLayout() != blob->getTensorDesc().getLayout());
+               (colorFormatSpecified && info->getLayout() != blob->getTensorDesc().getLayout()) ||
+               (colorFormatSpecified && outBlob && blob->getTensorDesc().getLayout() != outBlob->getTensorDesc().getLayout()) ||
+               (outBlob && blob->getTensorDesc().getPrecision() != outBlob->getTensorDesc().getPrecision())
+               ;
     }
+
+    void addInputPreProcessingFor(const std::string& name, InputInfo::Ptr info, Blob::Ptr const& inBlob, const Blob::Ptr& devBlob) {
+        if (preProcessingRequired(info, inBlob, devBlob)) {
+            auto ppDataIt = _preProcData.find(name);
+            if (ppDataIt == _preProcData.end()) {
+                ppDataIt = (_preProcData.emplace(name, CreatePreprocDataHelper())).first;
+            }
+
+            auto& preproc_ptr = ppDataIt->second;
+            preproc_ptr->isApplicable(inBlob,  devBlob);
+            // Stores the given blob as ROI blob. It will be used to fill in network input
+            // during pre-processing
+            preproc_ptr->setRoiBlob(inBlob);
+        }
+    }
+
 };
 
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/preprocessing/ie_preprocess_data.cpp b/inference-engine/src/preprocessing/ie_preprocess_data.cpp
@@ -774,6 +774,7 @@ class PreProcessData : public IPreProcessData {
     Blob::Ptr getRoiBlob() const override;
 
     void execute(Blob::Ptr &outBlob, const PreProcessInfo& info, bool serial, int batchSize = -1) override;
+    void execute(const Blob::Ptr& inBlob, Blob::Ptr &outBlob, const PreProcessInfo& info, bool serial, int batchSize = -1) override;
 
     void Release() noexcept override;
 
@@ -799,26 +800,33 @@ Blob::Ptr PreProcessData::getRoiBlob() const {
 
 void PreProcessData::execute(Blob::Ptr &outBlob, const PreProcessInfo& info, bool serial,
         int batchSize) {
+    if (_roiBlob == nullptr) {
+        THROW_IE_EXCEPTION << "Input pre-processing is called without ROI blob set";
+    }
+    execute(_roiBlob, outBlob, info, serial, batchSize);
+}
+void PreProcessData::execute(const Blob::Ptr& inBlob, Blob::Ptr &outBlob, const PreProcessInfo& info, bool serial, int batchSize) {
+
     OV_ITT_SCOPED_TASK(itt::domains::IEPreproc, "Preprocessing");
 
     auto algorithm = info.getResizeAlgorithm();
     auto fmt = info.getColorFormat();
 
-    if (algorithm == NO_RESIZE && fmt == ColorFormat::RAW) {
-       THROW_IE_EXCEPTION << "Input pre-processing is called without the pre-processing info set: "
-                             "there's nothing to be done";
-    }
+//    if (algorithm == NO_RESIZE && fmt == ColorFormat::RAW) {
+//       THROW_IE_EXCEPTION << "Input pre-processing is called without the pre-processing info set: "
+//                             "there's nothing to be done";
+//    }
 
-    if (_roiBlob == nullptr) {
-        THROW_IE_EXCEPTION << "Input pre-processing is called without ROI blob set";
+    if (inBlob == nullptr) {
+        THROW_IE_EXCEPTION << "Input pre-processing is called with null inBlob";
     }
 
-    batchSize = PreprocEngine::getCorrectBatchSize(batchSize, _roiBlob);
+    batchSize = PreprocEngine::getCorrectBatchSize(batchSize, inBlob);
 
     if (!_preproc) {
         _preproc.reset(new PreprocEngine);
     }
-    if (_preproc->preprocessWithGAPI(_roiBlob, outBlob, algorithm, fmt, serial, batchSize)) {
+    if (_preproc->preprocessWithGAPI(inBlob, outBlob, algorithm, fmt, serial, batchSize)) {
         return;
     }
 
@@ -834,23 +842,23 @@ void PreProcessData::execute(Blob::Ptr &outBlob, const PreProcessInfo& info, boo
     }
 
     Blob::Ptr res_in, res_out;
-    if (_roiBlob->getTensorDesc().getLayout() == NHWC) {
-        if (!_tmp1 || _tmp1->size() != _roiBlob->size()) {
-            if (_roiBlob->getTensorDesc().getPrecision() == Precision::FP32) {
-                _tmp1 = make_shared_blob<float>({Precision::FP32, _roiBlob->getTensorDesc().getDims(), Layout::NCHW});
+    if (inBlob->getTensorDesc().getLayout() == NHWC) {
+        if (!_tmp1 || _tmp1->size() != inBlob->size()) {
+            if (inBlob->getTensorDesc().getPrecision() == Precision::FP32) {
+                _tmp1 = make_shared_blob<float>({Precision::FP32, inBlob->getTensorDesc().getDims(), Layout::NCHW});
             } else {
-                _tmp1 = make_shared_blob<uint8_t>({Precision::U8, _roiBlob->getTensorDesc().getDims(), Layout::NCHW});
+                _tmp1 = make_shared_blob<uint8_t>({Precision::U8, inBlob->getTensorDesc().getDims(), Layout::NCHW});
             }
             _tmp1->allocate();
         }
 
         {
             OV_ITT_SCOPED_TASK(itt::domains::IEPreproc, "Reorder before");
-            blob_copy(_roiBlob, _tmp1);
+            blob_copy(inBlob, _tmp1);
         }
         res_in = _tmp1;
     } else {
-        res_in = _roiBlob;
+        res_in = inBlob;
     }
 
     if (outBlob->getTensorDesc().getLayout() == NHWC) {

diff --git a/inference-engine/src/preprocessing/ie_preprocess_data.hpp b/inference-engine/src/preprocessing/ie_preprocess_data.hpp
@@ -56,6 +56,8 @@ class IPreProcessData : public details::IRelease {
     virtual void execute(Blob::Ptr &outBlob, const PreProcessInfo& info, bool serial, int batchSize = -1) = 0;
 
     virtual void isApplicable(const Blob::Ptr &src, const Blob::Ptr &dst) = 0;
+
+    virtual void execute(const Blob::Ptr& inBlob, Blob::Ptr &outBlob, const PreProcessInfo& info, bool serial, int batchSize = -1) = 0;
 };
 
 INFERENCE_PRERPOC_PLUGIN_API(StatusCode) CreatePreProcessData(IPreProcessData *& data, ResponseDesc *resp) noexcept;

diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi.cpp b/inference-engine/src/preprocessing/ie_preprocess_gapi.cpp
@@ -24,6 +24,8 @@
 #include "ie_preprocess_itt.hpp"
 #include "debug.h"
 
+#include "blob_transform.hpp"
+
 #include "ie_parallel.hpp"
 
 #include <opencv2/gapi/fluid/gfluidkernel.hpp>  // GFluidOutputRois
@@ -276,8 +278,8 @@ void validateColorFormats(const G::Desc &in_desc,
     };
 
     // verify inputs/outputs and throw on error
-
-    if (output_color_format == ColorFormat::RAW) {
+    const bool color_conv_required = !((output_color_format == input_color_format) || (input_color_format == ColorFormat::RAW));
+    if (color_conv_required && (output_color_format == ColorFormat::RAW)) {
         THROW_IE_EXCEPTION << "Network's expected color format is unspecified";
     }
 
@@ -288,7 +290,7 @@ void validateColorFormats(const G::Desc &in_desc,
     verify_layout(in_layout, "Input blob");
     verify_layout(out_layout, "Network's blob");
 
-    if (input_color_format == ColorFormat::RAW) {
+    if (!color_conv_required) {
         // verify input and output have the same number of channels
         if (in_desc.d.C != out_desc.d.C) {
             THROW_IE_EXCEPTION << "Input and network expected blobs have different number of "
@@ -330,6 +332,18 @@ void validateTensorDesc(const TensorDesc& desc) {
     }
 }
 
+void assignBlobs(MemoryBlob::Ptr& dst, const MemoryBlob::Ptr& src ) {
+    dst = src;
+}
+
+void assignBlobs(MemoryBlob::Ptr& dst, const NV12Blob::Ptr& src ) {
+    IE_ASSERT(false && "logic error");
+}
+
+void assignBlobs(MemoryBlob::Ptr& dst, const I420Blob::Ptr& src ) {
+    IE_ASSERT(false && "logic error");
+}
+
 void validateBlob(const MemoryBlob::Ptr &) {}
 
 void validateBlob(const NV12Blob::Ptr &inBlob) {
@@ -949,6 +963,14 @@ bool PreprocEngine::preprocessBlob(const BlobTypePtr &inBlob, MemoryBlob::Ptr &o
                                             out_desc_ie.getDims(),
                                             out_fmt },
                                   algorithm };
+
+    if (std::get<0>(thisCall) == std::get<1>(thisCall)) {
+        //if requested output parameters match input blob no need to do anything
+//        assignBlobs(outBlob, inBlob);
+        blob_copy(inBlob, outBlob);
+        return true;
+    }
+
     const Update update = needUpdate(thisCall);
 
     Opt<cv::GComputation> _lastComputation;
@@ -986,7 +1008,7 @@ bool PreprocEngine::preprocessWithGAPI(const Blob::Ptr &inBlob, Blob::Ptr &outBl
         return false;
     }
 
-    const auto out_fmt = ColorFormat::BGR;  // FIXME: get expected color format from network
+    const auto out_fmt = (in_fmt == ColorFormat::RAW) ? ColorFormat::RAW : ColorFormat::BGR;  // FIXME: get expected color format from network
 
     // output is always a memory blob
     auto outMemoryBlob = as<MemoryBlob>(outBlob);

diff --git a/inference-engine/tests_deprecated/fluid_preproc/common/fluid_tests.cpp b/inference-engine/tests_deprecated/fluid_preproc/common/fluid_tests.cpp
@@ -1177,7 +1177,7 @@ TEST_P(PreprocTest, Performance)
     {
     case Precision::U8:   Blob2Img<Precision::U8>  (out_blob, out_mat, out_layout); break;
     case Precision::FP32: Blob2Img<Precision::FP32>(out_blob, out_mat, out_layout); break;
-    case Precision::U16:  Blob2Img<Precision::FP32>(out_blob, out_mat, out_layout); break;
+    case Precision::U16:  Blob2Img<Precision::U16>(out_blob, out_mat, out_layout); break;
     default: FAIL() << "Unsupported configuration";
     }
 
@@ -1198,7 +1198,7 @@ TEST_P(PreprocTest, Performance)
         ocv_out_mat = ocv_converted;
     }
 
-    EXPECT_LE(cv::norm(ocv_out_mat, out_mat, cv::NORM_INF), tolerance);
+    EXPECT_LE(cv::norm(ocv_out_mat, out_mat, cv::NORM_INF), tolerance)<<in_mat1 << std::endl << ocv_out_mat << std::endl << out_mat << std::endl;
 
 #if PERF_TEST
     // iterate testing, and print performance

diff --git a/inference-engine/tests_deprecated/fluid_preproc/cpu/fluid_tests_cpu.cpp b/inference-engine/tests_deprecated/fluid_preproc/cpu/fluid_tests_cpu.cpp
@@ -394,7 +394,7 @@ INSTANTIATE_TEST_CASE_P(ColorFormat_NV12, PreprocTest,
                                 Values(TEST_SIZES_PREPROC)));
 
 
-INSTANTIATE_TEST_CASE_P(DISABLED_PlainPrecisionConversions, PreprocTest,
+INSTANTIATE_TEST_CASE_P(PlainPrecisionConversions, PreprocTest,
                         Combine(Values(std::make_pair(IE::Precision::U16,IE::Precision::FP32),
                                        std::make_pair(IE::Precision::FP32,IE::Precision::U16)
                                 ),
@@ -415,5 +415,5 @@ INSTANTIATE_TEST_CASE_P(PrecisionConversionsPipelines, PreprocTest,
                                 Values(IE::ColorFormat::RAW),
                                 Values(IE::Layout::NHWC, IE::Layout::NCHW),
                                 Values(IE::Layout::NHWC, IE::Layout::NCHW),
-                                Values(std::make_pair(1, 1)/*, std::make_pair(3, 3)*/), //U16 Split and Merge are not there
+                                Values(std::make_pair(1, 1), std::make_pair(3, 3)), //U16 Split and Merge are not there
                                 Values(TEST_SIZES_PREPROC)));