Skip to content

Commit

Permalink
Add new Mallocator
Browse files Browse the repository at this point in the history
  • Loading branch information
MirceaDan99 committed Sep 26, 2024
1 parent 0e84419 commit 6019107
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 50 deletions.
94 changes: 83 additions & 11 deletions src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,85 @@ struct NetworkDescription final {
NetworkMetadata metadata;
};

template<class T>
struct Mallocator
{
typedef T value_type;

Mallocator() : _allocatedByUs(true) {}

Mallocator(void* preAllocatedMem, size_t size) : _size(size), _allocatedByUs(false), _constPreAllocatedMem(nullptr) {
_preAllocatedMem = static_cast<void*>(new(preAllocatedMem)char[_size * sizeof(T)]);
}

Mallocator(const void* constPreAllocatedMem, size_t size) : _size(size), _allocatedByUs(false), _preAllocatedMem(nullptr), _constPreAllocatedMem(constPreAllocatedMem) {}

template<class U>
constexpr Mallocator(const Mallocator <U>& other) noexcept {
// Workaround for MSFT std::_Container_Proxy
_allocatedByUs = true;
_preAllocatedMem = other._preAllocatedMem;
_constPreAllocatedMem = other._constPreAllocatedMem;
_size = other._size;
}

[[nodiscard]] T* allocate(std::size_t n)
{
if (n > std::numeric_limits<std::size_t>::max() / sizeof(T)) {
throw std::bad_array_new_length();
}

if (!_allocatedByUs) {
return (T*)_preAllocatedMem;
}

T* p = static_cast<T*>(std::malloc(n * sizeof(T)));
_preAllocatedMem = (void*)p;
return p;
}

void deallocate(T* p, std::size_t n) noexcept
{
if (_allocatedByUs) {
delete p;
}
}

const T* data() {
if (_allocatedByUs) {
return (const T*)_preAllocatedMem;
}
if (_preAllocatedMem != nullptr) {
return (const T*)_preAllocatedMem;
}
if (_constPreAllocatedMem != nullptr) {
return (const T*)_constPreAllocatedMem;
}
throw std::bad_alloc();
}

size_t size() {
return _size;
}
private:

void* _preAllocatedMem;
const void* _constPreAllocatedMem;
size_t _size;

bool _allocatedByUs;

template <typename>
friend struct Mallocator;
};

template<class T, class U>
bool operator==(const Mallocator <T>&, const Mallocator <U>&) { return true; }

template<class T, class U>
bool operator!=(const Mallocator <T>&, const Mallocator <U>&) { return false; }


/**
* @struct CompiledNetwork
* @brief Custom container for compiled network, used for export
Expand Down Expand Up @@ -229,17 +308,10 @@ class ICompiler : public std::enable_shared_from_this<ICompiler> {
// Driver compiler can use this to release graphHandle, if we do not have executor
virtual void release([[maybe_unused]] std::shared_ptr<const NetworkDescription> networkDescription){};

<<<<<<< HEAD
virtual CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) {
return CompiledNetwork(networkDescription.compiledNetwork.data(),
networkDescription.compiledNetwork.size(),
networkDescription.compiledNetwork);
=======
virtual CompiledNetwork getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) {
return CompiledNetwork{networkDescription->compiledNetwork.data(),
networkDescription->compiledNetwork.size(),
networkDescription->compiledNetwork};
>>>>>>> 7d2fd96a3c (Fix clang formats)
virtual std::vector<uint8_t, Mallocator<uint8_t>> getCompiledNetwork(const NetworkDescription& networkDescription) {
Mallocator<uint8_t> mal(networkDescription.compiledNetwork.data(),
networkDescription.compiledNetwork.size());
return std::vector<uint8_t, Mallocator<uint8_t>>(mal);
}

protected:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class LevelZeroCompilerAdapter final : public ICompiler {

void release(std::shared_ptr<const NetworkDescription> networkDescription) override;

CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override;
std::vector<uint8_t, Mallocator<uint8_t>> getCompiledNetwork(const NetworkDescription& networkDescription) override;

private:
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class LevelZeroCompilerInDriver final : public ICompiler {

void release(std::shared_ptr<const NetworkDescription> networkDescription) override;

CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override;
std::vector<uint8_t, Mallocator<uint8_t>> getCompiledNetwork(const NetworkDescription& networkDescription) override;

private:
NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const;
Expand All @@ -129,18 +129,12 @@ class LevelZeroCompilerInDriver final : public ICompiler {
std::vector<IODescriptor>& outputs) const;

template <typename T = TableExtension, typename std::enable_if_t<UseCopyForNativeBinary(T), bool> = true>
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& blob,
uint8_t*& blobPtr,
size_t& blobSize) const;
std::vector<uint8_t, Mallocator<uint8_t>> getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle) const;

template <typename T = TableExtension, typename std::enable_if_t<!UseCopyForNativeBinary(T), bool> = true>
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& /* unusedBlob */,
uint8_t*& blobPtr,
size_t& blobSize) const;
std::vector<uint8_t, Mallocator<uint8_t>> getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle) const;

template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr<const ov::Model>& model,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void LevelZeroCompilerAdapter::release(std::shared_ptr<const NetworkDescription>
apiAdapter->release(std::move(networkDescription));
}

CompiledNetwork LevelZeroCompilerAdapter::getCompiledNetwork(const NetworkDescription& networkDescription) {
std::vector<uint8_t, Mallocator<uint8_t>> LevelZeroCompilerAdapter::getCompiledNetwork(const NetworkDescription& networkDescription) {
_logger.info("getCompiledNetwork - using adapter to perform getCompiledNetwork(networkDescription)");
return apiAdapter->getCompiledNetwork(networkDescription);
}
Expand Down
52 changes: 30 additions & 22 deletions src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,11 +364,13 @@ void LevelZeroCompilerInDriver<TableExtension>::release(std::shared_ptr<const Ne

template <typename TableExtension>
template <typename T, std::enable_if_t<UseCopyForNativeBinary(T), bool>>
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& blob,
uint8_t*& blobPtr,
size_t& blobSize) const {
std::vector<uint8_t, Mallocator<uint8_t>> LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle) const {

Mallocator<uint8_t> mal;
std::vector<uint8_t, Mallocator<uint8_t>> blob(mal);
size_t blobSize;

// Get blob size first
auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr);
blob.resize(blobSize);
Expand Down Expand Up @@ -396,16 +398,17 @@ void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditabl
". ",
getLatestBuildError());

blobPtr = blob.data();
return blob;
}

template <typename TableExtension>
template <typename T, std::enable_if_t<!UseCopyForNativeBinary(T), bool>>
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& /* unusedBlob */,
uint8_t*& blobPtr,
size_t& blobSize) const {
std::vector<uint8_t, Mallocator<uint8_t>> LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle) const {

uint8_t* blobPtr;
size_t blobSize;

// Get blob ptr and size
auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr);

Expand All @@ -418,28 +421,33 @@ void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditabl
uint64_t(result),
". ",
getLatestBuildError());

Mallocator<uint8_t> mal(blobPtr, blobSize);
return std::vector<uint8_t, Mallocator<uint8_t>>(mal);

// Mallocator<uint8_t> mal(blobPtr, blobSize);
// const std::vector<uint8_t, Mallocator<uint8_t>> vec(mal);
// vec.reserve(blobSize);
// vec.resize(blobSize);
// uint8_t* c = vec.data();
}

template <typename TableExtension>
CompiledNetwork LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
std::vector<uint8_t, Mallocator<uint8_t>> LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
const NetworkDescription& networkDescription) {
if (networkDescription.metadata.graphHandle != nullptr && networkDescription.compiledNetwork.size() == 0) {
_logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle");
ze_graph_handle_t graphHandle = static_cast<ze_graph_handle_t>(networkDescription.metadata.graphHandle);

uint8_t* blobPtr = nullptr;
size_t blobSize = -1;
std::vector<uint8_t> blob;

getNativeBinary(_graphDdiTableExt, graphHandle, blob, blobPtr, blobSize);

_logger.info("LevelZeroCompilerInDriver getCompiledNetwork returning blob");
return CompiledNetwork(blobPtr, blobSize, std::move(blob));
return getNativeBinary(_graphDdiTableExt, graphHandle);
}
_logger.info("return the blob from network description");
return CompiledNetwork(networkDescription.compiledNetwork.data(),
networkDescription.compiledNetwork.size(),
networkDescription.compiledNetwork);

Mallocator<uint8_t> mal(networkDescription.compiledNetwork.data(), networkDescription.compiledNetwork.size());
return std::vector<uint8_t, Mallocator<uint8_t>>(mal);


}

template <typename TableExtension>
Expand Down
8 changes: 4 additions & 4 deletions src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ constexpr std::string_view NO_EXECUTOR_FOR_INFERENCE =
"Can't create infer request!\n"
"Please make sure that the device is available. Only exports can be made.";

std::uint32_t hash(const intel_npu::CompiledNetwork& blob) {
std::uint32_t hash(const std::vector<uint8_t, intel_npu::Mallocator<uint8_t>>& blob) {
std::uint32_t result = 1171117u;
for (const uint8_t* it = blob.data; it != blob.data + blob.size; ++it) {
for (const uint8_t* it = blob.get_allocator().data(); it != blob.data() + blob.get_allocator().size(); ++it) {
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
}
return result;
Expand Down Expand Up @@ -141,14 +141,14 @@ std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request(
void CompiledModel::export_model(std::ostream& stream) const {
_logger.debug("CompiledModel::export_model");
const auto blob = _compiler->getCompiledNetwork(*_networkPtr);
stream.write(reinterpret_cast<const char*>(blob.data), blob.size);
stream.write(reinterpret_cast<const char*>(blob.get_allocator().data()), blob.get_allocator().size());

if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
} else {
if (_logger.level() >= ov::log::Level::INFO) {
std::stringstream str;
str << "Blob size: " << blob.size << ", hash: " << std::hex << hash(blob);
str << "Blob size: " << blob.get_allocator().size() << ", hash: " << std::hex << hash(blob);
_logger.info(str.str().c_str());
}
_logger.info("Write blob to stream successfully.");
Expand Down

0 comments on commit 6019107

Please sign in to comment.