diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index a0b931f7c38105..ed52d6ab76e9f4 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -559,7 +559,12 @@ void ov::npuw::CompiledModel::CompiledModelDesc::serialize( write(stream, is_remote); write_weightless(stream, scales, const_to_offset); write_weightless(stream, zerops, const_to_offset); - write_weightless_closure(stream, closure, closure_uid, const_to_offset); + + // !!! FIXME !!! + // to serialize closures we need: + // 1) serialize CPU closures as scales/zerops + // 2) rest should be serialized from lazy_tensor + // 3) then LT should be evaluated and allocated (use bank for that) during deserialization } LOG_DEBUG("DONE."); @@ -632,7 +637,12 @@ ov::npuw::CompiledModel::CompiledModelDesc ov::npuw::CompiledModel::CompiledMode read_weightless(stream, desc.scales, weights_stream); read_weightless(stream, desc.zerops, weights_stream); - read_weightless_closure(stream, desc.closure, weights_stream); + + // !!! FIXME !!! + // to deserialize closures we need: + // 1) deserialize CPU closures as scales/zerops + // 2) rest should be deserialized via lazy_tensor + // 3) then LT should be evaluated and allocated (use bank for that) } LOG_DEBUG("DONE."); diff --git a/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp b/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp index c85ff70944b19d..6adf2c12403b9e 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp @@ -208,11 +208,6 @@ void ov::npuw::s11n::write_weightless(std::ostream& stream, } } -void ov::npuw::s11n::write_weightless_closure(std::ostream& stream, - const std::vector& var, - const std::vector& uids, - const std::unordered_map& const_to_offset) {} - void ov::npuw::s11n::read_weightless(std::istream& stream, std::vector& var, std::ifstream& weights_stream) { @@ -251,7 +246,3 @@ void ov::npuw::s11n::read_weightless(std::istream& stream, } } } - -void ov::npuw::s11n::read_weightless_closure(std::istream& stream, - std::vector& var, - std::ifstream& weights_stream) {} diff --git a/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp b/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp index 541c4ed78a58d3..557c8dab3e6612 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp @@ -10,8 +10,8 @@ #include #include #include -#include #include +#include #include const constexpr char* NPUW_NAME_IDENTIFIER = "NPUW_serialized_"; @@ -68,14 +68,7 @@ void read(std::istream& stream, std::vector>& var); void write_weightless(std::ostream& stream, const std::vector& var, const std::unordered_map& const_to_offset); -void write_weightless_closure(std::ostream& stream, - const std::vector& var, - const std::vector& uids, - const std::unordered_map& const_to_offset); -// No allocation needed void read_weightless(std::istream& stream, std::vector& var, std::ifstream& weights_stream); -// With allocation where required -void read_weightless_closure(std::istream& stream, std::vector& var, std::ifstream& weights_stream); // Forward declaration template diff --git a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp index 8ed87c4c0b3af7..eff969f60f9da5 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp @@ -233,7 +233,7 @@ void Bank::read_and_add_tensor(std::istream& stream, int64_t uid, const std::str return; } - // Need to allocate on device and copy deserialized tensor to that memory + // Need to allocate on device and read deserialized tensor into that memory ov::SoPtr remote_tensor; ov::Tensor allocated_tensor;