From 18b56c2bf222ed2ea28507eecca6bc2a3940698b Mon Sep 17 00:00:00 2001 From: Rafal Sapala Date: Wed, 24 Jul 2024 15:50:59 +0200 Subject: [PATCH 1/6] model properties --- .../continuous_batching_benchmark.cpp | 2 + .../genai/continuous_batching_pipeline.hpp | 4 ++ src/cpp/src/continuous_batching_pipeline.cpp | 29 ++++++++++- src/cpp/src/debug_utils.hpp | 49 ++++++++++++++++++- 4 files changed, 81 insertions(+), 3 deletions(-) diff --git a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp index a51b83d759..8dd0a1fea1 100644 --- a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp +++ b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp @@ -499,6 +499,8 @@ int main(int argc, char* argv[]) try { std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl; ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map); + // pipe.print_model_configuration(); + std::cout << "Setup finished, launching LLM executor, traffic simulation and statistics reporter threads" << std::endl; GenerationInfoCollector generation_info_collector; diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp index 626a51c5da..8cd4976734 100644 --- a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp +++ b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp @@ -61,6 +61,10 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { GenerationHandle add_request(uint64_t request_id, const ov::Tensor& input_ids, const ov::genai::GenerationConfig& sampling_params); GenerationHandle add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params); + std::vector get_model_configuration(); + + void print_model_configuration(); + void step(); bool has_non_finished_requests(); diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp index 8044eddc6c..84042109cf 100644 --- a/src/cpp/src/continuous_batching_pipeline.cpp +++ b/src/cpp/src/continuous_batching_pipeline.cpp @@ -29,6 +29,7 @@ class ContinuousBatchingPipeline::Impl { std::shared_ptr m_cache_manager; std::shared_ptr m_model_runner; std::shared_ptr m_sampler; + std::vector m_model_config_namevalues; // TODO (mzegla): GenerationConfig is request specific object // and pipeline only uses default rng_seed. @@ -97,7 +98,12 @@ class ContinuousBatchingPipeline::Impl { apply_paged_attention_transformations(model, device_config); - ov::InferRequest infer_request = core.compile_model(model, device_config.get_device(), plugin_config).create_infer_request(); + auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config); + /*read_properties([compiled_model](const std::string& key) { + return compiled_model.get_property(key); }, + m_model_config_namevalues); +*/ + ov::InferRequest infer_request = compiled_model.create_infer_request(); // setup KV caches m_cache_manager = std::make_shared(device_config); @@ -132,6 +138,17 @@ class ContinuousBatchingPipeline::Impl { return m_pipeline_metrics; } + std::vector get_model_configuration() { + return m_model_config_namevalues; + } + + void print_model_configuration() { + std::cout << "Loaded model configuration:" << std::endl; + for( auto prop : m_model_config_namevalues) { + std::cout << "\t" << prop << std::endl; + } + } + ov::genai::Tokenizer get_tokenizer() { return m_tokenizer; } @@ -412,7 +429,15 @@ PipelineMetrics ContinuousBatchingPipeline::get_metrics() const{ return m_impl->get_metrics(); } -GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params) { +std::vector ContinuousBatchingPipeline::get_model_configuration() { + return m_impl->get_model_configuration(); +} + +void ContinuousBatchingPipeline::print_model_configuration() { + return m_impl->print_model_configuration(); +} + +GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, std::string prompt, ov::genai::GenerationConfig sampling_params) { return m_impl->add_request(request_id, prompt, sampling_params); } diff --git a/src/cpp/src/debug_utils.hpp b/src/cpp/src/debug_utils.hpp index 12d3179e8d..ec191365cc 100644 --- a/src/cpp/src/debug_utils.hpp +++ b/src/cpp/src/debug_utils.hpp @@ -3,8 +3,12 @@ #pragma once -#include #include +#include +#include +#include + +#include #include @@ -29,3 +33,46 @@ void print_tensor(std::string name, ov::Tensor tensor) { print_array(tensor.data(), tensor.get_size()); } } + +std::string join(const std::vector& listOfStrings, const std::string delimiter) { + std::stringstream ss; + auto it = listOfStrings.cbegin(); + if (it == listOfStrings.end()) { + return ""; + } + for (; it != (listOfStrings.end() - 1); ++it) { + ss << *it << delimiter; + } + if (it != listOfStrings.end()) { + ss << *it; + } + return ss.str(); +} + +template +static void read_properties(PropertyExtractor&& property_extractor, std::vector& output_configuration_values) { + auto key = std::string("SUPPORTED_PROPERTIES"); // ov::supported_properties; + std::vector supported_config_keys; + try { + ov::Any value = property_extractor(key); + supported_config_keys = value.as>(); + } catch (...) { + std::cout << "Exception thrown from OpenVINO when requesting model property: " << key << std::endl; + return; + } + + for (auto& key : supported_config_keys) { + if (key == "SUPPORTED_PROPERTIES") + continue; + std::string value; + try { + ov::Any param_value = property_extractor(key); + value = param_value.as(); + } catch (...) { + std::cout << "WARNING: Exception thrown from OpenVINO when requesting model property: " << key << std::endl; + continue; + } + output_configuration_values.emplace_back(join({key, value}, ": ")); + } + std::sort(output_configuration_values.begin(), output_configuration_values.end()); +} From a7f74688dc2e019050d99204a149670faec8745b Mon Sep 17 00:00:00 2001 From: Rafal Sapala Date: Wed, 24 Jul 2024 17:14:13 +0200 Subject: [PATCH 2/6] Working --- .../continuous_batching_benchmark.cpp | 2 +- src/cpp/src/continuous_batching_pipeline.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp index 8dd0a1fea1..c1e62d9871 100644 --- a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp +++ b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp @@ -499,7 +499,7 @@ int main(int argc, char* argv[]) try { std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl; ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map); - // pipe.print_model_configuration(); + pipe.print_model_configuration(); std::cout << "Setup finished, launching LLM executor, traffic simulation and statistics reporter threads" << std::endl; diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp index 84042109cf..643146a332 100644 --- a/src/cpp/src/continuous_batching_pipeline.cpp +++ b/src/cpp/src/continuous_batching_pipeline.cpp @@ -99,10 +99,10 @@ class ContinuousBatchingPipeline::Impl { apply_paged_attention_transformations(model, device_config); auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config); - /*read_properties([compiled_model](const std::string& key) { + read_properties([compiled_model](const std::string& key) { return compiled_model.get_property(key); }, m_model_config_namevalues); -*/ + ov::InferRequest infer_request = compiled_model.create_infer_request(); // setup KV caches From e1686ed2416cd7db063b131bd783b3d48e10ae5b Mon Sep 17 00:00:00 2001 From: Rafal Sapala Date: Mon, 19 Aug 2024 15:23:25 +0200 Subject: [PATCH 3/6] Code review --- .../continuous_batching_benchmark.cpp | 2 +- .../genai/continuous_batching_pipeline.hpp | 4 +- src/cpp/src/continuous_batching_pipeline.cpp | 24 ++++------- src/cpp/src/debug_utils.hpp | 43 ------------------- src/cpp/src/utils.cpp | 15 +++++++ src/cpp/src/utils.hpp | 22 ++++++++++ 6 files changed, 48 insertions(+), 62 deletions(-) diff --git a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp index c1e62d9871..9043f80895 100644 --- a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp +++ b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp @@ -499,7 +499,7 @@ int main(int argc, char* argv[]) try { std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl; ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map); - pipe.print_model_configuration(); + std::cout << "Model configuration: " << std::endl << pipe.get_model_configuration_string(); std::cout << "Setup finished, launching LLM executor, traffic simulation and statistics reporter threads" << std::endl; diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp index 8cd4976734..232270a673 100644 --- a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp +++ b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp @@ -61,9 +61,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { GenerationHandle add_request(uint64_t request_id, const ov::Tensor& input_ids, const ov::genai::GenerationConfig& sampling_params); GenerationHandle add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params); - std::vector get_model_configuration(); - - void print_model_configuration(); + std::string get_model_configuration_string(); void step(); diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp index 643146a332..dd36f6976e 100644 --- a/src/cpp/src/continuous_batching_pipeline.cpp +++ b/src/cpp/src/continuous_batching_pipeline.cpp @@ -15,6 +15,7 @@ #include "text_callback_streamer.hpp" #include "timer.hpp" #include "debug_utils.hpp" +#include "utils.hpp" using namespace ov::genai; @@ -99,7 +100,7 @@ class ContinuousBatchingPipeline::Impl { apply_paged_attention_transformations(model, device_config); auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config); - read_properties([compiled_model](const std::string& key) { + ov::genai::utils::read_properties([compiled_model](const std::string& key) { return compiled_model.get_property(key); }, m_model_config_namevalues); @@ -138,15 +139,12 @@ class ContinuousBatchingPipeline::Impl { return m_pipeline_metrics; } - std::vector get_model_configuration() { - return m_model_config_namevalues; - } - - void print_model_configuration() { - std::cout << "Loaded model configuration:" << std::endl; + std::string get_model_configuration_string() { + std::string print_values = ""; for( auto prop : m_model_config_namevalues) { - std::cout << "\t" << prop << std::endl; + print_values = print_values + "\t" + prop + "\n"; } + return print_values; } ov::genai::Tokenizer get_tokenizer() { @@ -429,15 +427,11 @@ PipelineMetrics ContinuousBatchingPipeline::get_metrics() const{ return m_impl->get_metrics(); } -std::vector ContinuousBatchingPipeline::get_model_configuration() { - return m_impl->get_model_configuration(); -} - -void ContinuousBatchingPipeline::print_model_configuration() { - return m_impl->print_model_configuration(); +std::string ContinuousBatchingPipeline::get_model_configuration_string() { + return m_impl->get_model_configuration_string(); } -GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, std::string prompt, ov::genai::GenerationConfig sampling_params) { +GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params) { return m_impl->add_request(request_id, prompt, sampling_params); } diff --git a/src/cpp/src/debug_utils.hpp b/src/cpp/src/debug_utils.hpp index ec191365cc..8adeb28e2b 100644 --- a/src/cpp/src/debug_utils.hpp +++ b/src/cpp/src/debug_utils.hpp @@ -33,46 +33,3 @@ void print_tensor(std::string name, ov::Tensor tensor) { print_array(tensor.data(), tensor.get_size()); } } - -std::string join(const std::vector& listOfStrings, const std::string delimiter) { - std::stringstream ss; - auto it = listOfStrings.cbegin(); - if (it == listOfStrings.end()) { - return ""; - } - for (; it != (listOfStrings.end() - 1); ++it) { - ss << *it << delimiter; - } - if (it != listOfStrings.end()) { - ss << *it; - } - return ss.str(); -} - -template -static void read_properties(PropertyExtractor&& property_extractor, std::vector& output_configuration_values) { - auto key = std::string("SUPPORTED_PROPERTIES"); // ov::supported_properties; - std::vector supported_config_keys; - try { - ov::Any value = property_extractor(key); - supported_config_keys = value.as>(); - } catch (...) { - std::cout << "Exception thrown from OpenVINO when requesting model property: " << key << std::endl; - return; - } - - for (auto& key : supported_config_keys) { - if (key == "SUPPORTED_PROPERTIES") - continue; - std::string value; - try { - ov::Any param_value = property_extractor(key); - value = param_value.as(); - } catch (...) { - std::cout << "WARNING: Exception thrown from OpenVINO when requesting model property: " << key << std::endl; - continue; - } - output_configuration_values.emplace_back(join({key, value}, ": ")); - } - std::sort(output_configuration_values.begin(), output_configuration_values.end()); -} diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index 2bc20186be..f141278a5d 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -185,6 +185,21 @@ ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config return std::nullopt; } +std::string join(const std::vector& listOfStrings, const std::string delimiter) { + std::stringstream ss; + auto it = listOfStrings.cbegin(); + if (it == listOfStrings.end()) { + return ""; + } + for (; it != (listOfStrings.end() - 1); ++it) { + ss << *it << delimiter; + } + if (it != listOfStrings.end()) { + ss << *it; + } + return ss.str(); +} + } // namespace utils } // namespace genai } // namespace ov diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index 25acc1c87f..09a088f8ca 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -76,6 +76,28 @@ ov::genai::StreamerVariant get_streamer_from_map(const ov::AnyMap& config_map); ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config_map); +std::string join(const std::vector& listOfStrings, const std::string delimiter); + +template +static void read_properties(PropertyExtractor&& property_extractor, std::vector& output_configuration_values) { + auto key = std::string("SUPPORTED_PROPERTIES"); // ov::supported_properties; + std::vector supported_config_keys; + + ov::Any value = property_extractor(key); + supported_config_keys = value.as>(); + + for (auto& key : supported_config_keys) { + if (key == "SUPPORTED_PROPERTIES") + continue; + std::string value; + ov::Any param_value = property_extractor(key); + value = param_value.as(); + + output_configuration_values.emplace_back(join({key, value}, ": ")); + } + std::sort(output_configuration_values.begin(), output_configuration_values.end()); +} + } // namespace utils } // namespace genai } // namespace ov From 814bd8330daedba991667fed8ae362cbec80d5ba Mon Sep 17 00:00:00 2001 From: Rafal Sapala Date: Mon, 19 Aug 2024 15:25:14 +0200 Subject: [PATCH 4/6] Cleanup --- src/cpp/src/debug_utils.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cpp/src/debug_utils.hpp b/src/cpp/src/debug_utils.hpp index 8adeb28e2b..9fa6895191 100644 --- a/src/cpp/src/debug_utils.hpp +++ b/src/cpp/src/debug_utils.hpp @@ -3,10 +3,8 @@ #pragma once -#include #include -#include -#include +#include #include From 8fbb1c9a38f72d141c7cf58a7602618a80687716 Mon Sep 17 00:00:00 2001 From: Rafal Sapala Date: Tue, 20 Aug 2024 16:40:47 +0200 Subject: [PATCH 5/6] Full log flag --- .../continuous_batching_benchmark.cpp | 7 +++-- .../genai/continuous_batching_pipeline.hpp | 6 +++-- src/cpp/src/continuous_batching_pipeline.cpp | 26 ++++++++++++------- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp index 9043f80895..055e2805a6 100644 --- a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp +++ b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp @@ -428,7 +428,7 @@ int main(int argc, char* argv[]) try { options.add_options() ("n,num_prompts", "A number of prompts", cxxopts::value()->default_value("1000")) ("b,max_batch_size", "A maximum number of batched tokens", cxxopts::value()->default_value("256")) - ("dynamic_split_fuse", "Whether to use dynamic split-fuse or vLLM scheduling", cxxopts::value()->default_value("true")) + ("dynamic_split_fuse", "Whether to use dynamic split-fuse or vLLM scheduling. Use --dynamic_split_fuse=false to disable", cxxopts::value()->default_value("true")) ("m,model", "Path to model and tokenizers base directory", cxxopts::value()->default_value(".")) ("dataset", "Path to dataset .json file", cxxopts::value()->default_value("./ShareGPT_V3_unfiltered_cleaned_split.json")) ("max_input_len", "Max input length take from dataset", cxxopts::value()->default_value("1024")) @@ -437,6 +437,7 @@ int main(int argc, char* argv[]) try { ("cache_size", "Size of memory used for KV cache in GB. Default: 16", cxxopts::value()->default_value("16")) ("device", "Target device to run the model. Default: CPU", cxxopts::value()->default_value("CPU")) ("device_config", "Plugin configuration JSON. Example: '{\"MODEL_DISTRIBUTION_POLICY\":\"TENSOR_PARALLEL\",\"PERF_COUNT\":true}' Default: {\"PERF_COUNT\":true}", cxxopts::value()->default_value("{\"PERF_COUNT\":true}")) + ("full_log", "Whether to enable logging of additional information, like model configuration. Use --full_log=false to disable", cxxopts::value()->default_value("true")) ("h,help", "Print usage"); cxxopts::ParseResult result; @@ -464,6 +465,7 @@ int main(int argc, char* argv[]) try { const std::string device = result["device"].as(); const std::string device_config = result["device_config"].as(); const size_t cache_size = result["cache_size"].as(); + const bool full_log = result["full_log"].as(); // Create requests for generation Dataset dataset = filtered_dataset(models_path, dataset_path, num_prompts, max_input_len, max_output_len); @@ -488,6 +490,7 @@ int main(int argc, char* argv[]) try { std::cout << "\tMax output length: " << max_output_len << std::endl; std::cout << "\tTarget device: " << device << std::endl; std::cout << "\tPlugin configuration JSON: " << device_config << std::endl; + std::cout << "\tFull logging set to: " << full_log << std::endl; ov::AnyMap device_config_map = {}; if (!parse_plugin_config_string(device_config, device_config_map)) { @@ -497,7 +500,7 @@ int main(int argc, char* argv[]) try { // Benchmarking std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl; - ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map); + ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {}, full_log); std::cout << "Model configuration: " << std::endl << pipe.get_model_configuration_string(); diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp index 232270a673..c65dd7eab3 100644 --- a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp +++ b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp @@ -33,7 +33,8 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { const SchedulerConfig& scheduler_config, const std::string& device = "CPU", const ov::AnyMap& llm_plugin_config = {}, - const ov::AnyMap& tokenizer_plugin_config = {}); + const ov::AnyMap& tokenizer_plugin_config = {}, + const bool full_log = false); /** * @brief Constructs a ContinuousBatchingPipeline when ov::genai::Tokenizer is initialized manually using file from the different dirs. @@ -49,7 +50,8 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { const ov::genai::Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device="CPU", - const ov::AnyMap& plugin_config={} + const ov::AnyMap& plugin_config={}, + const bool full_log = false ); ov::genai::Tokenizer get_tokenizer(); diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp index dd36f6976e..2e587832e5 100644 --- a/src/cpp/src/continuous_batching_pipeline.cpp +++ b/src/cpp/src/continuous_batching_pipeline.cpp @@ -88,7 +88,7 @@ class ContinuousBatchingPipeline::Impl { } public: - Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config) : + Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config, const bool full_log) : m_tokenizer{tokenizer} { ov::Core core; @@ -100,9 +100,12 @@ class ContinuousBatchingPipeline::Impl { apply_paged_attention_transformations(model, device_config); auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config); - ov::genai::utils::read_properties([compiled_model](const std::string& key) { - return compiled_model.get_property(key); }, - m_model_config_namevalues); + + if (full_log) { + ov::genai::utils::read_properties([compiled_model](const std::string& key) { + return compiled_model.get_property(key); }, + m_model_config_namevalues); + } ov::InferRequest infer_request = compiled_model.create_infer_request(); @@ -128,8 +131,8 @@ class ContinuousBatchingPipeline::Impl { // read default generation config } - Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config) - : Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config} {} + Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config, const bool full_log) + : Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config, full_log} {} ov::genai::GenerationConfig get_config() const { return m_generation_config; @@ -403,8 +406,10 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::string& model const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, - const ov::AnyMap& tokenizer_plugin_config) { - m_impl = std::make_shared(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config); + const ov::AnyMap& tokenizer_plugin_config, + const bool full_log + ) { + m_impl = std::make_shared(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config, full_log); } ContinuousBatchingPipeline::ContinuousBatchingPipeline( @@ -412,8 +417,9 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, - const ov::AnyMap& plugin_config -) : m_impl{std::make_shared(model_path, tokenizer, scheduler_config, device, plugin_config)} {} + const ov::AnyMap& plugin_config, + const bool full_log +) : m_impl{std::make_shared(model_path, tokenizer, scheduler_config, device, plugin_config, full_log)} {} ov::genai::Tokenizer ContinuousBatchingPipeline::get_tokenizer() { return m_impl->get_tokenizer(); From f39478ff775a2a1f1a178f520201fc632ee88149 Mon Sep 17 00:00:00 2001 From: Rafal Sapala Date: Mon, 2 Sep 2024 12:23:40 +0200 Subject: [PATCH 6/6] Base on env --- .../continuous_batching_benchmark.cpp | 16 +++++--- .../genai/continuous_batching_pipeline.hpp | 8 ++-- src/cpp/src/continuous_batching_pipeline.cpp | 39 ++++++++----------- src/cpp/src/utils.cpp | 13 +++++++ src/cpp/src/utils.hpp | 2 + 5 files changed, 46 insertions(+), 32 deletions(-) diff --git a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp index 055e2805a6..c9d3f05187 100644 --- a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp +++ b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp @@ -437,7 +437,6 @@ int main(int argc, char* argv[]) try { ("cache_size", "Size of memory used for KV cache in GB. Default: 16", cxxopts::value()->default_value("16")) ("device", "Target device to run the model. Default: CPU", cxxopts::value()->default_value("CPU")) ("device_config", "Plugin configuration JSON. Example: '{\"MODEL_DISTRIBUTION_POLICY\":\"TENSOR_PARALLEL\",\"PERF_COUNT\":true}' Default: {\"PERF_COUNT\":true}", cxxopts::value()->default_value("{\"PERF_COUNT\":true}")) - ("full_log", "Whether to enable logging of additional information, like model configuration. Use --full_log=false to disable", cxxopts::value()->default_value("true")) ("h,help", "Print usage"); cxxopts::ParseResult result; @@ -465,7 +464,6 @@ int main(int argc, char* argv[]) try { const std::string device = result["device"].as(); const std::string device_config = result["device_config"].as(); const size_t cache_size = result["cache_size"].as(); - const bool full_log = result["full_log"].as(); // Create requests for generation Dataset dataset = filtered_dataset(models_path, dataset_path, num_prompts, max_input_len, max_output_len); @@ -478,6 +476,7 @@ int main(int argc, char* argv[]) try { scheduler_config.dynamic_split_fuse = dynamic_split_fuse, scheduler_config.max_num_seqs = 256, // not used if dynamic_split_fuse=True + std::cout << "To enable logging of additional information, like model configuration set environment variable OV_CB_FULL_LOG=1."; std::cout << "Benchmarking parameters: " << std::endl; std::cout << "\tMax number of batched tokens: " << scheduler_config.max_num_batched_tokens << std::endl; std::cout << "\tScheduling type: " << (scheduler_config.dynamic_split_fuse ? "dynamic split-fuse" : "vLLM") << std::endl; @@ -490,7 +489,6 @@ int main(int argc, char* argv[]) try { std::cout << "\tMax output length: " << max_output_len << std::endl; std::cout << "\tTarget device: " << device << std::endl; std::cout << "\tPlugin configuration JSON: " << device_config << std::endl; - std::cout << "\tFull logging set to: " << full_log << std::endl; ov::AnyMap device_config_map = {}; if (!parse_plugin_config_string(device_config, device_config_map)) { @@ -500,9 +498,17 @@ int main(int argc, char* argv[]) try { // Benchmarking std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl; - ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {}, full_log); + ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {}); - std::cout << "Model configuration: " << std::endl << pipe.get_model_configuration_string(); + // Enabled with env OV_CB_FULL_LOG=1 + std::string print_values = ""; + for (auto prop : pipe.get_model_configuration()) { + print_values = print_values + "\t" + prop + "\n"; + } + if (!print_values.empty()) + { + std::cout << "Model configuration: " << std::endl << print_values; + } std::cout << "Setup finished, launching LLM executor, traffic simulation and statistics reporter threads" << std::endl; diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp index c65dd7eab3..dd79b7ea73 100644 --- a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp +++ b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp @@ -33,8 +33,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { const SchedulerConfig& scheduler_config, const std::string& device = "CPU", const ov::AnyMap& llm_plugin_config = {}, - const ov::AnyMap& tokenizer_plugin_config = {}, - const bool full_log = false); + const ov::AnyMap& tokenizer_plugin_config = {}); /** * @brief Constructs a ContinuousBatchingPipeline when ov::genai::Tokenizer is initialized manually using file from the different dirs. @@ -50,8 +49,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { const ov::genai::Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device="CPU", - const ov::AnyMap& plugin_config={}, - const bool full_log = false + const ov::AnyMap& plugin_config={} ); ov::genai::Tokenizer get_tokenizer(); @@ -63,7 +61,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { GenerationHandle add_request(uint64_t request_id, const ov::Tensor& input_ids, const ov::genai::GenerationConfig& sampling_params); GenerationHandle add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params); - std::string get_model_configuration_string(); + std::vector get_model_configuration(); void step(); diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp index 2e587832e5..2529e68015 100644 --- a/src/cpp/src/continuous_batching_pipeline.cpp +++ b/src/cpp/src/continuous_batching_pipeline.cpp @@ -88,7 +88,7 @@ class ContinuousBatchingPipeline::Impl { } public: - Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config, const bool full_log) : + Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config) : m_tokenizer{tokenizer} { ov::Core core; @@ -101,11 +101,9 @@ class ContinuousBatchingPipeline::Impl { auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config); - if (full_log) { - ov::genai::utils::read_properties([compiled_model](const std::string& key) { - return compiled_model.get_property(key); }, - m_model_config_namevalues); - } + ov::genai::utils::read_properties([compiled_model](const std::string& key) { + return compiled_model.get_property(key); }, + m_model_config_namevalues); ov::InferRequest infer_request = compiled_model.create_infer_request(); @@ -131,8 +129,8 @@ class ContinuousBatchingPipeline::Impl { // read default generation config } - Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config, const bool full_log) - : Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config, full_log} {} + Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config) + : Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config} {} ov::genai::GenerationConfig get_config() const { return m_generation_config; @@ -142,12 +140,11 @@ class ContinuousBatchingPipeline::Impl { return m_pipeline_metrics; } - std::string get_model_configuration_string() { - std::string print_values = ""; - for( auto prop : m_model_config_namevalues) { - print_values = print_values + "\t" + prop + "\n"; - } - return print_values; + std::vector get_model_configuration() { + if (ov::genai::utils::is_full_log_env_enabled()) + return m_model_config_namevalues; + + return std::vector(); } ov::genai::Tokenizer get_tokenizer() { @@ -406,10 +403,9 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::string& model const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, - const ov::AnyMap& tokenizer_plugin_config, - const bool full_log + const ov::AnyMap& tokenizer_plugin_config ) { - m_impl = std::make_shared(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config, full_log); + m_impl = std::make_shared(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config); } ContinuousBatchingPipeline::ContinuousBatchingPipeline( @@ -417,9 +413,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, - const ov::AnyMap& plugin_config, - const bool full_log -) : m_impl{std::make_shared(model_path, tokenizer, scheduler_config, device, plugin_config, full_log)} {} + const ov::AnyMap& plugin_config +) : m_impl{std::make_shared(model_path, tokenizer, scheduler_config, device, plugin_config)} {} ov::genai::Tokenizer ContinuousBatchingPipeline::get_tokenizer() { return m_impl->get_tokenizer(); @@ -433,8 +428,8 @@ PipelineMetrics ContinuousBatchingPipeline::get_metrics() const{ return m_impl->get_metrics(); } -std::string ContinuousBatchingPipeline::get_model_configuration_string() { - return m_impl->get_model_configuration_string(); +std::vector ContinuousBatchingPipeline::get_model_configuration() { + return m_impl->get_model_configuration(); } GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params) { diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index f141278a5d..7845ca0f1e 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -3,6 +3,7 @@ #include "utils.hpp" #include +#include namespace ov { namespace genai { @@ -200,6 +201,18 @@ std::string join(const std::vector& listOfStrings, const std::strin return ss.str(); } +bool is_full_log_env_enabled() { + const char* environmentVariableBuffer = std::getenv("OV_CB_FULL_LOG"); + if (environmentVariableBuffer) { + auto result = std::__cxx11::stoul(environmentVariableBuffer); + if (result && result > 0) { + return true; + } + } + + return false; +} + } // namespace utils } // namespace genai } // namespace ov diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index 09a088f8ca..e7ec0e08ff 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -78,6 +78,8 @@ ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config std::string join(const std::vector& listOfStrings, const std::string delimiter); +bool is_full_log_env_enabled(); + template static void read_properties(PropertyExtractor&& property_extractor, std::vector& output_configuration_values) { auto key = std::string("SUPPORTED_PROPERTIES"); // ov::supported_properties;