Skip to content

Commit

Permalink
Add custom allocator for std::vector
Browse files Browse the repository at this point in the history
  • Loading branch information
MirceaDan99 committed Sep 26, 2024
1 parent 507f31d commit 1405c53
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 70 deletions.
32 changes: 2 additions & 30 deletions src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,32 +151,6 @@ struct NetworkDescription final {
NetworkMetadata metadata;
};

/**
* @struct CompiledNetwork
* @brief Custom container for compiled network, used for export
* @var CompiledNetwork::data
* Pointer to the address of compiled network
* @var CompiledNetwork:size
* Size of the compiled network
* @var CompiledNetwork::ownedStorage
* Plugin owned compiled network storage that is required in case of a driver that
* doesn't support graph extension 1.7, as in this case plugin must create a copy of the compiled network.
* @note It's unsafe to store either data or size outside of the compiled network object as its destructor
* would release the owning container
*/

struct CompiledNetwork {
const uint8_t* data;
size_t size;
CompiledNetwork(const uint8_t* data, size_t size, std::vector<uint8_t> storage)
: data(data),
size(size),
ownedStorage(std::move(storage)) {}

private:
std::vector<uint8_t> ownedStorage;
};

/**
* @interface ICompiler
* @brief An interface to be implemented by a concrete compiler to provide
Expand Down Expand Up @@ -229,10 +203,8 @@ class ICompiler : public std::enable_shared_from_this<ICompiler> {
// Driver compiler can use this to release graphHandle, if we do not have executor
virtual void release([[maybe_unused]] std::shared_ptr<const NetworkDescription> networkDescription){};

virtual CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) {
return CompiledNetwork(networkDescription.compiledNetwork.data(),
networkDescription.compiledNetwork.size(),
networkDescription.compiledNetwork);
virtual std::vector<uint8_t> getCompiledNetwork(const NetworkDescription& networkDescription) {
return networkDescription.compiledNetwork;
}

protected:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class LevelZeroCompilerInDriver final : public ICompiler {

void release(std::shared_ptr<const NetworkDescription> networkDescription) override;

CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override;
std::vector<uint8_t> getCompiledNetwork(const NetworkDescription& networkDescription) override;

private:
NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const;
Expand All @@ -131,16 +131,12 @@ class LevelZeroCompilerInDriver final : public ICompiler {
template <typename T = TableExtension, typename std::enable_if_t<UseCopyForNativeBinary(T), bool> = true>
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& blob,
uint8_t*& blobPtr,
size_t& blobSize) const;
std::vector<uint8_t>& blob) const;

template <typename T = TableExtension, typename std::enable_if_t<!UseCopyForNativeBinary(T), bool> = true>
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& /* unusedBlob */,
uint8_t*& blobPtr,
size_t& blobSize) const;
std::vector<uint8_t>& blob) const;

template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr<const ov::Model>& model,
Expand Down
121 changes: 94 additions & 27 deletions src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,14 +362,61 @@ void LevelZeroCompilerInDriver<TableExtension>::release(std::shared_ptr<const Ne
_logger.debug("release completed");
}

/*
FAILED: src/plugins/intel_npu/src/compiler/CMakeFiles/openvino_npu_driver_compiler_adapter.dir/src/zero_compiler_in_driver.cpp.obj
C:\Work\mirceaau\ccache-4.8.2-windows-x86_64\ccache C:\PROGRA~2\MICROS~2\2019\BUILDT~1\VC\Tools\MSVC\1429~1.301\bin\Hostx64\x64\cl.exe /nologo /TP -DIN_OV_COMPONENT -DNPU_PLUGIN_DEVELOPER_BUILD -DOV_BUILD_POSTFIX=\"d\" -DOV_NATIVE_PARENT_PROJECT_ROOT_DIR=\"openvino\" -DOV_THREAD=OV_THREAD_TBB -DSNIPPETS_DEBUG_CAPS -DTBB_USE_DEBUG -D__TBB_NO_IMPLICIT_LINKAGE=1 -IC:\Work\mirceaau\openvino\src\plugins\intel_npu\src\compiler\include -IC:\Work\mirceaau\openvino\src\plugins\intel_npu\src\al\include -IC:\Work\mirceaau\openvino\src\plugins\intel_npu\src\utils\include -IC:\Work\mirceaau\openvino\src\inference\dev_api -IC:\Work\mirceaau\openvino\src\core\include -IC:\Work\mirceaau\openvino\src\frontends\common\include -IC:\Work\mirceaau\openvino\src\inference\include -IC:\Work\mirceaau\openvino\src\core\dev_api -IC:\Work\mirceaau\openvino\src\common\transformations\include -IC:\Work\mirceaau\openvino\src\common\low_precision_transformations\include -IC:\Work\mirceaau\openvino\src\common\itt\include -IC:\Work\mirceaau\openvino\src\common\util\include -IC:\Work\mirceaau\openvino\thirdparty\pugixml\src -IC:\Work\mirceaau\openvino\thirdparty\level_zero\level-zero\include -IC:\Work\mirceaau\openvino\src\plugins\intel_npu\thirdparty\level-zero-ext -IC:\Work\mirceaau\openvino\src\plugins\intel_npu\src\backend\include -external:IC:\Work\mirceaau\openvino\temp\tbb\include -external:W0 /DWIN32 /D_WINDOWS /GR /EHsc /D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS /EHsc /Gy /W3 /bigobj /MP /wd4251 /wd4275 /Z7 /Ob0 /Od /RTC1 -std:c++17 -MDd /d1trimfile:C:\Work\mirceaau\openvino\ /d1trimfile:C:/Work/mirceaau/openvino/ -WX /showIncludes /Fosrc\plugins\intel_npu\src\compiler\CMakeFiles\openvino_npu_driver_compiler_adapter.dir\src\zero_compiler_in_driver.cpp.obj /FdC:\Work\mirceaau\openvino\bin\intel64\Debug\ /FS -c C:\Work\mirceaau\openvino\src\plugins\intel_npu\src\compiler\src\zero_compiler_in_driver.cpp
C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.29.30133\include\vector(701): error C2440: 'static_cast': cannot convert from 'intel_npu::driverCompilerAdapter::PreAllocatedAllocator<uint8_t>' to 'intel_npu::driverCompilerAdapter::PreAllocatedAllocator<U>'
with
[
U=std::_Container_proxy
]
*/
template <typename T>
class PreAllocatedAllocator
{
public:
using value_type = T;

// Constructor accepts a pointer to the pre-allocated memory block
PreAllocatedAllocator(T* pre_allocated, std::size_t max_size)
: memory(pre_allocated), max_size(max_size), offset(0) {}

// Allocator constructor and copy constructor
PreAllocatedAllocator(const PreAllocatedAllocator<T>& other)
: memory(other.memory), max_size(other.max_size), offset(other.offset) {}

T* allocate(std::size_t n) {
if (offset + n > max_size) {
throw std::bad_alloc();
}
T* ptr = memory + offset;
offset += n;
return ptr;
}

void deallocate(T*, std::size_t) {
// Deallocate does nothing since memory is externally managed
}

template <typename U>
struct rebind {
using other = PreAllocatedAllocator<U>;
};

private:
T* memory;
std::size_t max_size;
std::size_t offset;

};

template <typename TableExtension>
template <typename T, std::enable_if_t<UseCopyForNativeBinary(T), bool>>
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& blob,
uint8_t*& blobPtr,
size_t& blobSize) const {
std::vector<uint8_t>& blob) const {
// Get blob size first
size_t blobSize = -1;
auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr);
blob.resize(blobSize);

Expand All @@ -395,51 +442,71 @@ void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditabl
uint64_t(result),
". ",
getLatestBuildError());

blobPtr = blob.data();
}

// Allocators of the same type are always equal
template <typename T1, typename T2>
bool operator==(const PreAllocatedAllocator<T1>&, const PreAllocatedAllocator<T2>&) { return true; }

template <typename T1, typename T2>
bool operator!=(const PreAllocatedAllocator<T1>&, const PreAllocatedAllocator<T2>&) { return false; }

template <typename TableExtension>
template <typename T, std::enable_if_t<!UseCopyForNativeBinary(T), bool>>
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& /* unusedBlob */,
uint8_t*& blobPtr,
size_t& blobSize) const {
// Get blob ptr and size
auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr);
std::vector<uint8_t>& blob) const {
// Get blob ptr and size
uint8_t* blobPtr = nullptr;
size_t blobSize = -1;

OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
"Failed to compile network. L0 pfnGetNativeBinary get blob size",
" result: ",
ze_result_to_string(result),
", code 0x",
std::hex,
uint64_t(result),
". ",
getLatestBuildError());
auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr);

OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
"Failed to compile network. L0 pfnGetNativeBinary get blob size",
" result: ",
ze_result_to_string(result),
", code 0x",
std::hex,
uint64_t(result),
". ",
getLatestBuildError());

// std::initializer_list<uint8_t> initializerListTmp(blobPtr, blobPtr + blobSize);

// std::vector<uint8_t> blobTmp = {initializerListTmp.begin(), initializerListTmp.end()};

// blob.swap(blobTmp);

/*placement_memory_allocator<uint8_t> pl(blobPtr);
std::vector<uint8_t, placement_memory_allocator<uint8_t>> tmpBlob(pl);
tmpBlob.reserve(blobSize);
tmpBlob.push_back(0);*/
// blob.swap(tmpBlob);

PreAllocatedAllocator allocator(blobPtr, blobSize);

std::vector<uint8_t, PreAllocatedAllocator<uint8_t>> tmpBlob(allocator);

std::cout << tmpBlob[0] << " " << tmpBlob[1] << std::endl;
}

template <typename TableExtension>
CompiledNetwork LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
std::vector<uint8_t> LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
const NetworkDescription& networkDescription) {
if (networkDescription.metadata.graphHandle != nullptr && networkDescription.compiledNetwork.size() == 0) {
_logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle");
ze_graph_handle_t graphHandle = static_cast<ze_graph_handle_t>(networkDescription.metadata.graphHandle);

uint8_t* blobPtr = nullptr;
size_t blobSize = -1;
std::vector<uint8_t> blob;

getNativeBinary(_graphDdiTableExt, graphHandle, blob, blobPtr, blobSize);
getNativeBinary(_graphDdiTableExt, graphHandle, blob);

_logger.info("LevelZeroCompilerInDriver getCompiledNetwork returning blob");
return CompiledNetwork(blobPtr, blobSize, std::move(blob));
return std::move(blob);
}
_logger.info("return the blob from network description");
return CompiledNetwork(networkDescription.compiledNetwork.data(),
networkDescription.compiledNetwork.size(),
networkDescription.compiledNetwork);
return networkDescription.compiledNetwork;
}

template <typename TableExtension>
Expand Down
11 changes: 5 additions & 6 deletions src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@ constexpr std::string_view NO_EXECUTOR_FOR_INFERENCE =
"Can't create infer request!\n"
"Please make sure that the device is available. Only exports can be made.";

std::uint32_t hash(const intel_npu::CompiledNetwork& blob) {
std::uint32_t hash(const std::vector<uint8_t>& blob) {
std::uint32_t result = 1171117u;
for (const uint8_t* it = blob.data; it != blob.data + blob.size; ++it) {
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
}
for (auto c : blob)
result = ((result << 7) + result) + static_cast<uint32_t>(c);
return result;
}

Expand Down Expand Up @@ -141,14 +140,14 @@ std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request(
void CompiledModel::export_model(std::ostream& stream) const {
_logger.debug("CompiledModel::export_model");
const auto blob = _compiler->getCompiledNetwork(*_networkPtr);
stream.write(reinterpret_cast<const char*>(blob.data), blob.size);
stream.write(reinterpret_cast<const char*>(blob.data()), blob.size());

if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
} else {
if (_logger.level() >= ov::log::Level::INFO) {
std::stringstream str;
str << "Blob size: " << blob.size << ", hash: " << std::hex << hash(blob);
str << "Blob size: " << blob.size() << ", hash: " << std::hex << hash(blob);
_logger.info(str.str().c_str());
}
_logger.info("Write blob to stream successfully.");
Expand Down

0 comments on commit 1405c53

Please sign in to comment.