Enable DynamicBatch related logics

Signed-off-by: Andrew Park <[email protected]>
ahnyoung-paul · May 26, 2022 · f2f83f8 · f2f83f8
1 parent f559f17
commit f2f83f8
Show file tree

Hide file tree

Showing 5 changed files with 247 additions and 245 deletions.
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp
@@ -16,11 +16,10 @@ namespace ov {
 namespace runtime {
 namespace intel_gpu {
 
-// TODO(Andrew): Enable below to support dynamic batch
-// struct buf_info {
-//     size_t buf_offset;
-//     size_t buf_size;
-// };
+struct buf_info {
+    size_t buf_offset;
+    size_t buf_size;
+};
 
 class CompiledModel;
 
@@ -48,8 +47,7 @@ class InferRequest : public InferenceEngine::IInferRequestInternal {
     void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override;
     void SetBlobs(const std::string& name, const std::vector<InferenceEngine::Blob::Ptr> &data) override;
 
-    // TODO(Andrew): Enable below to support dynamic batch
-    // void SetBatch(int batch = -1) override;
+    void SetBatch(int batch = -1) override;
     void SetGraph(std::shared_ptr<Graph> graph);
     void EnableProfiling() { m_useProfiling = true; }
     void EnableStreams() { m_useStreams = true; }
@@ -63,10 +61,9 @@ class InferRequest : public InferenceEngine::IInferRequestInternal {
     void enqueue();
     void wait();
 
-    // TODO(Andrew): Enable below to support dynamic batch
-    // void preprocess_dynamic();
-    // void enqueue_dynamic();
-    // void wait_dynamic();
+    void preprocess_dynamic();
+    void enqueue_dynamic();
+    void wait_dynamic();
 
     bool use_external_queue() const { return m_useExternalQueue; }
     void enable_external_queue() { m_useExternalQueue = true; }
@@ -83,10 +80,9 @@ class InferRequest : public InferenceEngine::IInferRequestInternal {
     bool m_useExternalQueue = false;
     std::shared_ptr<Graph> m_graph;
 
-    // TODO(Andrew): Enable below to support dynamic batch
     // dynamic batch stuff
-    // std::map<std::string, std::vector<buf_info>> batchInputs;
-    // std::map<std::string, std::vector<buf_info>> batchOutputs;
+    std::map<std::string, std::vector<buf_info>> batchInputs;
+    std::map<std::string, std::vector<buf_info>> batchOutputs;
     InferenceEngine::IStreamsExecutor* streamExecutor = nullptr;
 
     void prepare_input(const cldnn::primitive_id &inputName, InferenceEngine::Blob::Ptr &inputBlob,
@@ -97,22 +93,21 @@ class InferRequest : public InferenceEngine::IInferRequestInternal {
                                                 std::shared_ptr<InferenceEngine::IAllocator> alloc = nullptr);
     InferenceEngine::Blob::Ptr create_device_blob(const InferenceEngine::TensorDesc& desc);
 
-    void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr);
+    void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr);
     void copy_input_data(std::shared_ptr<cldnn::network> network, const cldnn::primitive_id &inputName,
-                         const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob);
+                         const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob,
+                         buf_info* bi = nullptr);
 
     InferenceEngine::Blob::Ptr create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem);
     void allocate_inputs();
     void allocate_outputs();
-    // TODO(Andrew): Enable below to support dynamic batch
-    // void allocate_inputs_dynamic();
-    // void allocate_outputs_dynamic();
+    void allocate_inputs_dynamic();
+    void allocate_outputs_dynamic();
 
     InferenceEngine::Blob::Ptr reinterpret_device_blob(InferenceEngine::Blob::Ptr data, const InferenceEngine::TensorDesc& new_desc);
 
     std::map<cldnn::primitive_id, cldnn::network_output> internal_outputs;
-    // TODO(Andrew): Enable below to support dynamic batch
-    // std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> internal_outputs_dynamic;
+    std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> internal_outputs_dynamic;
 };
 
 }  // namespace intel_gpu

diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp
@@ -16,7 +16,7 @@ namespace ov {
 namespace runtime {
 namespace intel_gpu {
 
-struct buf_info {
+struct buf_info_legacy {
     size_t buf_offset;
     size_t buf_size;
 };
@@ -81,8 +81,8 @@ class InferRequestLegacy : public InferenceEngine::IInferRequestInternal {
     std::shared_ptr<Graph> m_graph;
 
     // dynamic batch stuff
-    std::map<std::string, std::vector<buf_info>> batchInputs;
-    std::map<std::string, std::vector<buf_info>> batchOutputs;
+    std::map<std::string, std::vector<buf_info_legacy>> batchInputs;
+    std::map<std::string, std::vector<buf_info_legacy>> batchOutputs;
     InferenceEngine::IStreamsExecutor* streamExecutor = nullptr;
 
     void prepare_input(const cldnn::primitive_id &inputName, InferenceEngine::Blob::Ptr &inputBlob,
@@ -93,10 +93,10 @@ class InferRequestLegacy : public InferenceEngine::IInferRequestInternal {
                                                 std::shared_ptr<InferenceEngine::IAllocator> alloc = nullptr);
     InferenceEngine::Blob::Ptr create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout);
 
-    void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr);
+    void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info_legacy* bi = nullptr);
     void copy_input_data(std::shared_ptr<cldnn::network> network, const cldnn::primitive_id &inputName,
                          const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob,
-                         buf_info* bi = nullptr);
+                         buf_info_legacy* bi = nullptr);
 
     InferenceEngine::Blob::Ptr create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem);
     void allocate_inputs();