Skip to content

Commit

Permalink
Update compiler DQ query
Browse files Browse the repository at this point in the history
  • Loading branch information
smirnov-alexey committed Jan 9, 2025
1 parent c839bf0 commit 1496131
Showing 1 changed file with 4 additions and 7 deletions.
11 changes: 4 additions & 7 deletions src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,13 +303,8 @@ struct NPUDesc {
std::optional<NPUDesc> extract_npu_descriptor(const std::shared_ptr<const ov::IPlugin>& plugin) {
const std::string arch = plugin->get_property(ov::device::architecture.name(), ov::AnyMap{}).as<std::string>();
const int64_t max_tiles = plugin->get_property(ov::intel_npu::max_tiles.name(), ov::AnyMap{}).as<int64_t>();

bool compiler_dq = false;
const auto device_caps =
plugin->get_property(ov::device::capabilities.name(), ov::AnyMap{}).as<std::vector<std::string>>();
if (std::find(device_caps.begin(), device_caps.end(), "COMPILER_DYNAMIC_QUANTIZATION") != device_caps.end()) {
compiler_dq = true;
}
const auto compiler_dq =
plugin->get_property(ov::intel_npu::compiler_dynamic_quantization.name(), ov::AnyMap{}).as<bool>();
return std::make_optional(NPUDesc{arch, max_tiles, compiler_dq});
}

Expand Down Expand Up @@ -359,6 +354,7 @@ ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model, c
}
if (npudesc.has_value() && npudesc->compiler_dq) {
config.emplace("NPUW_DQ_FULL", "NO");
config.emplace("NPU_COMPILATION_MODE_PARAMS", "enable-weights-dynamic-dequantization=true");
}
return config;
}
Expand All @@ -380,6 +376,7 @@ ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model,
}
if (npudesc.has_value() && npudesc->compiler_dq) {
config.emplace("NPUW_DQ_FULL", "NO");
config.emplace("NPU_COMPILATION_MODE_PARAMS", "enable-weights-dynamic-dequantization=true");
}
return config;
}
Expand Down

0 comments on commit 1496131

Please sign in to comment.