diff --git a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp index 055e2805a6..c9d3f05187 100644 --- a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp +++ b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp @@ -437,7 +437,6 @@ int main(int argc, char* argv[]) try { ("cache_size", "Size of memory used for KV cache in GB. Default: 16", cxxopts::value()->default_value("16")) ("device", "Target device to run the model. Default: CPU", cxxopts::value()->default_value("CPU")) ("device_config", "Plugin configuration JSON. Example: '{\"MODEL_DISTRIBUTION_POLICY\":\"TENSOR_PARALLEL\",\"PERF_COUNT\":true}' Default: {\"PERF_COUNT\":true}", cxxopts::value()->default_value("{\"PERF_COUNT\":true}")) - ("full_log", "Whether to enable logging of additional information, like model configuration. Use --full_log=false to disable", cxxopts::value()->default_value("true")) ("h,help", "Print usage"); cxxopts::ParseResult result; @@ -465,7 +464,6 @@ int main(int argc, char* argv[]) try { const std::string device = result["device"].as(); const std::string device_config = result["device_config"].as(); const size_t cache_size = result["cache_size"].as(); - const bool full_log = result["full_log"].as(); // Create requests for generation Dataset dataset = filtered_dataset(models_path, dataset_path, num_prompts, max_input_len, max_output_len); @@ -478,6 +476,7 @@ int main(int argc, char* argv[]) try { scheduler_config.dynamic_split_fuse = dynamic_split_fuse, scheduler_config.max_num_seqs = 256, // not used if dynamic_split_fuse=True + std::cout << "To enable logging of additional information, like model configuration set environment variable OV_CB_FULL_LOG=1."; std::cout << "Benchmarking parameters: " << std::endl; std::cout << "\tMax number of batched tokens: " << scheduler_config.max_num_batched_tokens << std::endl; std::cout << "\tScheduling type: " << (scheduler_config.dynamic_split_fuse ? "dynamic split-fuse" : "vLLM") << std::endl; @@ -490,7 +489,6 @@ int main(int argc, char* argv[]) try { std::cout << "\tMax output length: " << max_output_len << std::endl; std::cout << "\tTarget device: " << device << std::endl; std::cout << "\tPlugin configuration JSON: " << device_config << std::endl; - std::cout << "\tFull logging set to: " << full_log << std::endl; ov::AnyMap device_config_map = {}; if (!parse_plugin_config_string(device_config, device_config_map)) { @@ -500,9 +498,17 @@ int main(int argc, char* argv[]) try { // Benchmarking std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl; - ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {}, full_log); + ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {}); - std::cout << "Model configuration: " << std::endl << pipe.get_model_configuration_string(); + // Enabled with env OV_CB_FULL_LOG=1 + std::string print_values = ""; + for (auto prop : pipe.get_model_configuration()) { + print_values = print_values + "\t" + prop + "\n"; + } + if (!print_values.empty()) + { + std::cout << "Model configuration: " << std::endl << print_values; + } std::cout << "Setup finished, launching LLM executor, traffic simulation and statistics reporter threads" << std::endl; diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp index c65dd7eab3..dd79b7ea73 100644 --- a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp +++ b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp @@ -33,8 +33,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { const SchedulerConfig& scheduler_config, const std::string& device = "CPU", const ov::AnyMap& llm_plugin_config = {}, - const ov::AnyMap& tokenizer_plugin_config = {}, - const bool full_log = false); + const ov::AnyMap& tokenizer_plugin_config = {}); /** * @brief Constructs a ContinuousBatchingPipeline when ov::genai::Tokenizer is initialized manually using file from the different dirs. @@ -50,8 +49,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { const ov::genai::Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device="CPU", - const ov::AnyMap& plugin_config={}, - const bool full_log = false + const ov::AnyMap& plugin_config={} ); ov::genai::Tokenizer get_tokenizer(); @@ -63,7 +61,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { GenerationHandle add_request(uint64_t request_id, const ov::Tensor& input_ids, const ov::genai::GenerationConfig& sampling_params); GenerationHandle add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params); - std::string get_model_configuration_string(); + std::vector get_model_configuration(); void step(); diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp index 2e587832e5..2529e68015 100644 --- a/src/cpp/src/continuous_batching_pipeline.cpp +++ b/src/cpp/src/continuous_batching_pipeline.cpp @@ -88,7 +88,7 @@ class ContinuousBatchingPipeline::Impl { } public: - Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config, const bool full_log) : + Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config) : m_tokenizer{tokenizer} { ov::Core core; @@ -101,11 +101,9 @@ class ContinuousBatchingPipeline::Impl { auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config); - if (full_log) { - ov::genai::utils::read_properties([compiled_model](const std::string& key) { - return compiled_model.get_property(key); }, - m_model_config_namevalues); - } + ov::genai::utils::read_properties([compiled_model](const std::string& key) { + return compiled_model.get_property(key); }, + m_model_config_namevalues); ov::InferRequest infer_request = compiled_model.create_infer_request(); @@ -131,8 +129,8 @@ class ContinuousBatchingPipeline::Impl { // read default generation config } - Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config, const bool full_log) - : Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config, full_log} {} + Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config) + : Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config} {} ov::genai::GenerationConfig get_config() const { return m_generation_config; @@ -142,12 +140,11 @@ class ContinuousBatchingPipeline::Impl { return m_pipeline_metrics; } - std::string get_model_configuration_string() { - std::string print_values = ""; - for( auto prop : m_model_config_namevalues) { - print_values = print_values + "\t" + prop + "\n"; - } - return print_values; + std::vector get_model_configuration() { + if (ov::genai::utils::is_full_log_env_enabled()) + return m_model_config_namevalues; + + return std::vector(); } ov::genai::Tokenizer get_tokenizer() { @@ -406,10 +403,9 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::string& model const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, - const ov::AnyMap& tokenizer_plugin_config, - const bool full_log + const ov::AnyMap& tokenizer_plugin_config ) { - m_impl = std::make_shared(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config, full_log); + m_impl = std::make_shared(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config); } ContinuousBatchingPipeline::ContinuousBatchingPipeline( @@ -417,9 +413,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, - const ov::AnyMap& plugin_config, - const bool full_log -) : m_impl{std::make_shared(model_path, tokenizer, scheduler_config, device, plugin_config, full_log)} {} + const ov::AnyMap& plugin_config +) : m_impl{std::make_shared(model_path, tokenizer, scheduler_config, device, plugin_config)} {} ov::genai::Tokenizer ContinuousBatchingPipeline::get_tokenizer() { return m_impl->get_tokenizer(); @@ -433,8 +428,8 @@ PipelineMetrics ContinuousBatchingPipeline::get_metrics() const{ return m_impl->get_metrics(); } -std::string ContinuousBatchingPipeline::get_model_configuration_string() { - return m_impl->get_model_configuration_string(); +std::vector ContinuousBatchingPipeline::get_model_configuration() { + return m_impl->get_model_configuration(); } GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params) { diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index f141278a5d..7845ca0f1e 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -3,6 +3,7 @@ #include "utils.hpp" #include +#include namespace ov { namespace genai { @@ -200,6 +201,18 @@ std::string join(const std::vector& listOfStrings, const std::strin return ss.str(); } +bool is_full_log_env_enabled() { + const char* environmentVariableBuffer = std::getenv("OV_CB_FULL_LOG"); + if (environmentVariableBuffer) { + auto result = std::__cxx11::stoul(environmentVariableBuffer); + if (result && result > 0) { + return true; + } + } + + return false; +} + } // namespace utils } // namespace genai } // namespace ov diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index 09a088f8ca..e7ec0e08ff 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -78,6 +78,8 @@ ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config std::string join(const std::vector& listOfStrings, const std::string delimiter); +bool is_full_log_env_enabled(); + template static void read_properties(PropertyExtractor&& property_extractor, std::vector& output_configuration_values) { auto key = std::string("SUPPORTED_PROPERTIES"); // ov::supported_properties;