Skip to content

Commit

Permalink
StaticLLMPipeline: Update config (openvinotoolkit#969)
Browse files Browse the repository at this point in the history
  • Loading branch information
TolyaTalamanov authored Oct 16, 2024
1 parent d3bfaa5 commit be23fc6
Showing 1 changed file with 19 additions and 19 deletions.
38 changes: 19 additions & 19 deletions src/cpp/src/llm_pipeline_static.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ void enable_npuw_dq_if_allowed(ov::AnyMap& config,
const std::shared_ptr<ov::Model>& model) {
if (allow_to_enable_npuw_dq(model)) {
config["NPUW_DQ"] = "YES";
pop_option(config, "NPUW_ONLINE_AVOID");
}
}

Expand Down Expand Up @@ -213,33 +212,34 @@ void merge_config_with(ov::AnyMap& lhs, const ov::AnyMap& rhs) {
}
}

ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model) {
ov::AnyMap get_baseline_common_config() {
ov::AnyMap config = {
{ "NPU_USE_NPUW", "YES" },
{ "NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add" },
{ "NPU_USE_NPUW", "YES" },
{ "NPUW_FOLD", "YES" },
{ "NPUW_DCOFF_TYPE", "f16" },
{ "NPUW_DCOFF_SCALE", "YES" },
{ "NPUW_WEIGHTS_BANK", "shared" },
{ "NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add" },
{ "NPUW_ONLINE_AVOID", "P:RMSNorm/NPU" }
{ "NPUW_DCOFF_SCALE", "YES"},
{ "NPUW_WEIGHTS_BANK", "shared" },
{ "NPUW_PMM", "NO" }
};
return config;
}

ov::AnyMap get_default_common_config(const std::shared_ptr<ov::Model>& model) {
auto config = get_baseline_common_config();
config.emplace("NPUW_WEIGHTS_BANK_ALLOC", "CPU");
enable_npuw_dq_if_allowed(config, model);
return config;
}

ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model) {
return get_default_common_config(model);
}

ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model) {
ov::AnyMap config = {
{ "NPU_USE_NPUW", "YES" },
{ "NPUW_FOLD", "YES" },
{ "NPUW_DCOFF_TYPE", "f16" },
{ "NPUW_DCOFF_SCALE", "YES" },
{ "NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add" },
{ "NPUW_PARALLEL_COMPILE", "YES" },
{ "NPUW_FUNCALL_ASYNC", "YES" },
{ "NPUW_WEIGHTS_BANK", "shared" },
{ "NPUW_ONLINE_AVOID", "P:RMSNorm/NPU" }
};
enable_npuw_dq_if_allowed(config, model);
auto config = get_default_common_config(model);
config.emplace("NPUW_FUNCALL_ASYNC", "YES");
config.emplace("NPUW_PARALLEL_COMPILE", "YES");
return config;
}

Expand Down

0 comments on commit be23fc6

Please sign in to comment.