Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
smirnov-alexey committed Jan 9, 2025
1 parent 1186e32 commit e2c2d38
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 20 deletions.
14 changes: 12 additions & 2 deletions src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,12 @@ void ov::npuw::CompiledModel::CompiledModelDesc::serialize(
write(stream, is_remote);
write_weightless(stream, scales, const_to_offset);
write_weightless(stream, zerops, const_to_offset);
write_weightless_closure(stream, closure, closure_uid, const_to_offset);

// !!! FIXME !!!
// to serialize closures we need:
// 1) serialize CPU closures as scales/zerops
// 2) rest should be serialized from lazy_tensor
// 3) then LT should be evaluated and allocated (use bank for that) during deserialization
}

LOG_DEBUG("DONE.");
Expand Down Expand Up @@ -632,7 +637,12 @@ ov::npuw::CompiledModel::CompiledModelDesc ov::npuw::CompiledModel::CompiledMode

read_weightless(stream, desc.scales, weights_stream);
read_weightless(stream, desc.zerops, weights_stream);
read_weightless_closure(stream, desc.closure, weights_stream);

// !!! FIXME !!!
// to deserialize closures we need:
// 1) deserialize CPU closures as scales/zerops
// 2) rest should be deserialized via lazy_tensor
// 3) then LT should be evaluated and allocated (use bank for that)
}

LOG_DEBUG("DONE.");
Expand Down
9 changes: 0 additions & 9 deletions src/plugins/intel_npu/src/plugin/npuw/serialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,6 @@ void ov::npuw::s11n::write_weightless(std::ostream& stream,
}
}

void ov::npuw::s11n::write_weightless_closure(std::ostream& stream,
const std::vector<ov::Tensor>& var,
const std::vector<int64_t>& uids,
const std::unordered_map<const void*, std::size_t>& const_to_offset) {}

void ov::npuw::s11n::read_weightless(std::istream& stream,
std::vector<ov::Tensor>& var,
std::ifstream& weights_stream) {
Expand Down Expand Up @@ -251,7 +246,3 @@ void ov::npuw::s11n::read_weightless(std::istream& stream,
}
}
}

void ov::npuw::s11n::read_weightless_closure(std::istream& stream,
std::vector<ov::Tensor>& var,
std::ifstream& weights_stream) {}
9 changes: 1 addition & 8 deletions src/plugins/intel_npu/src/plugin/npuw/serialization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
#include <optional>
#include <string>
#include <tuple>
#include <unordered_set>
#include <unordered_map>
#include <unordered_set>
#include <vector>

const constexpr char* NPUW_NAME_IDENTIFIER = "NPUW_serialized_";
Expand Down Expand Up @@ -68,14 +68,7 @@ void read(std::istream& stream, std::vector<std::shared_ptr<ov::Node>>& var);
void write_weightless(std::ostream& stream,
const std::vector<ov::Tensor>& var,
const std::unordered_map<const void*, std::size_t>& const_to_offset);
void write_weightless_closure(std::ostream& stream,
const std::vector<ov::Tensor>& var,
const std::vector<int64_t>& uids,
const std::unordered_map<const void*, std::size_t>& const_to_offset);
// No allocation needed
void read_weightless(std::istream& stream, std::vector<ov::Tensor>& var, std::ifstream& weights_stream);
// With allocation where required
void read_weightless_closure(std::istream& stream, std::vector<ov::Tensor>& var, std::ifstream& weights_stream);

// Forward declaration
template <typename T1, typename T2>
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ void Bank::read_and_add_tensor(std::istream& stream, int64_t uid, const std::str
return;
}

// Need to allocate on device and copy deserialized tensor to that memory
// Need to allocate on device and read deserialized tensor into that memory
ov::SoPtr<ov::ITensor> remote_tensor;
ov::Tensor allocated_tensor;

Expand Down

0 comments on commit e2c2d38

Please sign in to comment.