Skip to content

Commit

Permalink
Fix func test (openvinotoolkit#68)
Browse files Browse the repository at this point in the history
  • Loading branch information
yeonbok committed Aug 8, 2022
1 parent 1b02e65 commit d6088e9
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ class InferRequestLegacy : public InferenceEngine::IInferRequestInternal {
void allocate_inputs_dynamic();
void allocate_outputs_dynamic();

InferenceEngine::Blob::Ptr reinterpret_device_blob(InferenceEngine::Blob::Ptr data, const InferenceEngine::TensorDesc& new_desc);

std::map<cldnn::primitive_id, cldnn::network_output> internal_outputs;
std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> internal_outputs_dynamic;
};
Expand Down
61 changes: 43 additions & 18 deletions src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void copyToFloat(float* dst, const InferenceEngine::Blob* src) {
dst[i] = srcPtr[i];
}

template<typename T>
template<typename src_dt, typename dst_dt>
void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, ov::runtime::intel_gpu::buf_info* bi, cldnn::stream& stream) {
size_t n = (bi == nullptr) ? dst->size() : bi->buf_size;
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
Expand All @@ -56,12 +56,12 @@ void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, ov::runtime::
auto size = layout.get_tensor();

auto locked_dst = dst->buffer();
auto dst_ptr = locked_dst.as<T*>();
auto dst_ptr = locked_dst.as<dst_dt*>();
if (dst_ptr == nullptr) {
IE_THROW() << "Invalid output blob";
}
cldnn::mem_lock<T> src_lock{ src, stream };
T* src_ptr = src_lock.data();
cldnn::mem_lock<src_dt> src_lock{ src, stream };
src_dt* src_ptr = src_lock.data();
dst_ptr += offset;

if (layout.data_padding) {
Expand Down Expand Up @@ -898,12 +898,17 @@ void InferRequestLegacy::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst,
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::copy_output_data");
auto& stream = m_graph->GetNetwork()->get_stream();
switch (dst->getTensorDesc().getPrecision()) {
case Precision::FP32: copyResultToOutputBlob<float>(src, dst, bi, stream); break;
case Precision::FP16: copyResultToOutputBlob<uint16_t>(src, dst, bi, stream); break;
case Precision::I32: copyResultToOutputBlob<int32_t>(src, dst, bi, stream); break;
case Precision::I64: copyResultToOutputBlob<int64_t>(src, dst, bi, stream); break;
case Precision::U8: copyResultToOutputBlob<uint8_t>(src, dst, bi, stream); break;
case Precision::I8: copyResultToOutputBlob<int8_t>(src, dst, bi, stream); break;
case Precision::FP64: copyResultToOutputBlob<float, double>(src, dst, bi, stream); break;
case Precision::FP32: copyResultToOutputBlob<float, float>(src, dst, bi, stream); break;
case Precision::FP16: copyResultToOutputBlob<uint16_t, uint16_t>(src, dst, bi, stream); break;
case Precision::I64: copyResultToOutputBlob<int64_t, int64_t>(src, dst, bi, stream); break;
case Precision::I32: copyResultToOutputBlob<int32_t, int32_t>(src, dst, bi, stream); break;
case Precision::I16: copyResultToOutputBlob<float, int16_t>(src, dst, bi, stream); break;
case Precision::I8: copyResultToOutputBlob<int8_t, int8_t>(src, dst, bi, stream); break;
case Precision::U16: copyResultToOutputBlob<float, uint16_t>(src, dst, bi, stream); break;
case Precision::U32: copyResultToOutputBlob<int32_t, uint32_t>(src, dst, bi, stream); break;
case Precision::U64: copyResultToOutputBlob<int32_t, uint64_t>(src, dst, bi, stream); break;
case Precision::U8: copyResultToOutputBlob<uint8_t, uint8_t>(src, dst, bi, stream); break;
default: IE_THROW(NotImplemented) << "The plugin does not support output " << dst->getTensorDesc().getPrecision() << " precision";
}
}
Expand Down Expand Up @@ -1047,22 +1052,39 @@ void InferRequestLegacy::allocate_outputs() {
// while ExecutableNetwork contains proper ones. Thus replace dims with once from exec network
// Can be removed once 76176 is resolved.
desc.setDims(m_graph->GetOutputSize(no.first));

GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->verbose >= 2) {
GPU_DEBUG_COUT << "[" << no.first << ": output blob]" << std::endl;
}

outputsMap[no.first] = outputID;
if (m_graph->GetEngine()->use_unified_shared_memory()) {
// For USM case we create host blob using custom USM host allocator
// and then create shared device blob on top of this buffer
auto host_blob = create_host_blob(desc, std::make_shared<USMHostAllocator>(m_graph->GetContext().get()));


if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16 ||
desc.getPrecision() == Precision::U32 || desc.getPrecision() == Precision::U64 ||
desc.getPrecision() == Precision::FP64) {
TensorDesc device_blob_desc = desc;

if (desc.getPrecision() == Precision::U32 || desc.getPrecision() == Precision::U64)
device_blob_desc.setPrecision(Precision::I32);
else
device_blob_desc.setPrecision(Precision::FP32);

auto host_blob = create_host_blob(desc);
_outputs[no.first] = host_blob;
_deviceOutputs[no.first] = create_shared_device_blob(desc, output_layout, host_blob->buffer().as<void*>());
auto device_blob = create_device_blob(device_blob_desc, output_layout);
_deviceOutputs[no.first] = device_blob;
} else {
_outputs[no.first] = create_host_blob(desc);
_deviceOutputs[no.first] = create_device_blob(desc, output_layout);
if (m_graph->GetEngine()->use_unified_shared_memory()) {
// For USM case we create host blob using custom USM host allocator
// and then create shared device blob on top of this buffer
auto host_blob = create_host_blob(desc, std::make_shared<USMHostAllocator>(m_graph->GetContext().get()));
_outputs[no.first] = host_blob;
_deviceOutputs[no.first] = create_shared_device_blob(desc, output_layout, host_blob->buffer().as<void*>());
} else {
_outputs[no.first] = create_host_blob(desc);
_deviceOutputs[no.first] = create_device_blob(desc, output_layout);
}
}
}
}
Expand Down Expand Up @@ -1115,6 +1137,7 @@ void InferRequestLegacy::prepare_input(const cldnn::primitive_id& inputName, Blo
bool is_dev_input = remote_ptr != nullptr;

switch (prec) {
case Precision::FP64:
case Precision::FP32:
case Precision::FP16:
case Precision::I8:
Expand All @@ -1123,6 +1146,8 @@ void InferRequestLegacy::prepare_input(const cldnn::primitive_id& inputName, Blo
case Precision::I16:
case Precision::U16:
case Precision::I32:
case Precision::U32:
case Precision::U64:
case Precision::I64: {
auto impl = getBlobImpl(is_dev_input ?
remote_ptr :
Expand Down

0 comments on commit d6088e9

Please sign in to comment.