Skip to content

Commit

Permalink
Enable DynamicBatch related logics
Browse files Browse the repository at this point in the history
Signed-off-by: Andrew Park <[email protected]>
  • Loading branch information
andrew-k-park committed May 26, 2022
1 parent f559f17 commit f2f83f8
Show file tree
Hide file tree
Showing 5 changed files with 247 additions and 245 deletions.
37 changes: 16 additions & 21 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@ namespace ov {
namespace runtime {
namespace intel_gpu {

// TODO(Andrew): Enable below to support dynamic batch
// struct buf_info {
// size_t buf_offset;
// size_t buf_size;
// };
struct buf_info {
size_t buf_offset;
size_t buf_size;
};

class CompiledModel;

Expand Down Expand Up @@ -48,8 +47,7 @@ class InferRequest : public InferenceEngine::IInferRequestInternal {
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override;
void SetBlobs(const std::string& name, const std::vector<InferenceEngine::Blob::Ptr> &data) override;

// TODO(Andrew): Enable below to support dynamic batch
// void SetBatch(int batch = -1) override;
void SetBatch(int batch = -1) override;
void SetGraph(std::shared_ptr<Graph> graph);
void EnableProfiling() { m_useProfiling = true; }
void EnableStreams() { m_useStreams = true; }
Expand All @@ -63,10 +61,9 @@ class InferRequest : public InferenceEngine::IInferRequestInternal {
void enqueue();
void wait();

// TODO(Andrew): Enable below to support dynamic batch
// void preprocess_dynamic();
// void enqueue_dynamic();
// void wait_dynamic();
void preprocess_dynamic();
void enqueue_dynamic();
void wait_dynamic();

bool use_external_queue() const { return m_useExternalQueue; }
void enable_external_queue() { m_useExternalQueue = true; }
Expand All @@ -83,10 +80,9 @@ class InferRequest : public InferenceEngine::IInferRequestInternal {
bool m_useExternalQueue = false;
std::shared_ptr<Graph> m_graph;

// TODO(Andrew): Enable below to support dynamic batch
// dynamic batch stuff
// std::map<std::string, std::vector<buf_info>> batchInputs;
// std::map<std::string, std::vector<buf_info>> batchOutputs;
std::map<std::string, std::vector<buf_info>> batchInputs;
std::map<std::string, std::vector<buf_info>> batchOutputs;
InferenceEngine::IStreamsExecutor* streamExecutor = nullptr;

void prepare_input(const cldnn::primitive_id &inputName, InferenceEngine::Blob::Ptr &inputBlob,
Expand All @@ -97,22 +93,21 @@ class InferRequest : public InferenceEngine::IInferRequestInternal {
std::shared_ptr<InferenceEngine::IAllocator> alloc = nullptr);
InferenceEngine::Blob::Ptr create_device_blob(const InferenceEngine::TensorDesc& desc);

void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr);
void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr);
void copy_input_data(std::shared_ptr<cldnn::network> network, const cldnn::primitive_id &inputName,
const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob);
const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob,
buf_info* bi = nullptr);

InferenceEngine::Blob::Ptr create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem);
void allocate_inputs();
void allocate_outputs();
// TODO(Andrew): Enable below to support dynamic batch
// void allocate_inputs_dynamic();
// void allocate_outputs_dynamic();
void allocate_inputs_dynamic();
void allocate_outputs_dynamic();

InferenceEngine::Blob::Ptr reinterpret_device_blob(InferenceEngine::Blob::Ptr data, const InferenceEngine::TensorDesc& new_desc);

std::map<cldnn::primitive_id, cldnn::network_output> internal_outputs;
// TODO(Andrew): Enable below to support dynamic batch
// std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> internal_outputs_dynamic;
std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> internal_outputs_dynamic;
};

} // namespace intel_gpu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace ov {
namespace runtime {
namespace intel_gpu {

struct buf_info {
struct buf_info_legacy {
size_t buf_offset;
size_t buf_size;
};
Expand Down Expand Up @@ -81,8 +81,8 @@ class InferRequestLegacy : public InferenceEngine::IInferRequestInternal {
std::shared_ptr<Graph> m_graph;

// dynamic batch stuff
std::map<std::string, std::vector<buf_info>> batchInputs;
std::map<std::string, std::vector<buf_info>> batchOutputs;
std::map<std::string, std::vector<buf_info_legacy>> batchInputs;
std::map<std::string, std::vector<buf_info_legacy>> batchOutputs;
InferenceEngine::IStreamsExecutor* streamExecutor = nullptr;

void prepare_input(const cldnn::primitive_id &inputName, InferenceEngine::Blob::Ptr &inputBlob,
Expand All @@ -93,10 +93,10 @@ class InferRequestLegacy : public InferenceEngine::IInferRequestInternal {
std::shared_ptr<InferenceEngine::IAllocator> alloc = nullptr);
InferenceEngine::Blob::Ptr create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout);

void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr);
void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info_legacy* bi = nullptr);
void copy_input_data(std::shared_ptr<cldnn::network> network, const cldnn::primitive_id &inputName,
const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob,
buf_info* bi = nullptr);
buf_info_legacy* bi = nullptr);

InferenceEngine::Blob::Ptr create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem);
void allocate_inputs();
Expand Down
Loading

0 comments on commit f2f83f8

Please sign in to comment.