diff --git a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp index a51b83d759..c9d3f05187 100644 --- a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp +++ b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp @@ -428,7 +428,7 @@ int main(int argc, char* argv[]) try { options.add_options() ("n,num_prompts", "A number of prompts", cxxopts::value()->default_value("1000")) ("b,max_batch_size", "A maximum number of batched tokens", cxxopts::value()->default_value("256")) - ("dynamic_split_fuse", "Whether to use dynamic split-fuse or vLLM scheduling", cxxopts::value()->default_value("true")) + ("dynamic_split_fuse", "Whether to use dynamic split-fuse or vLLM scheduling. Use --dynamic_split_fuse=false to disable", cxxopts::value()->default_value("true")) ("m,model", "Path to model and tokenizers base directory", cxxopts::value()->default_value(".")) ("dataset", "Path to dataset .json file", cxxopts::value()->default_value("./ShareGPT_V3_unfiltered_cleaned_split.json")) ("max_input_len", "Max input length take from dataset", cxxopts::value()->default_value("1024")) @@ -476,6 +476,7 @@ int main(int argc, char* argv[]) try { scheduler_config.dynamic_split_fuse = dynamic_split_fuse, scheduler_config.max_num_seqs = 256, // not used if dynamic_split_fuse=True + std::cout << "To enable logging of additional information, like model configuration set environment variable OV_CB_FULL_LOG=1."; std::cout << "Benchmarking parameters: " << std::endl; std::cout << "\tMax number of batched tokens: " << scheduler_config.max_num_batched_tokens << std::endl; std::cout << "\tScheduling type: " << (scheduler_config.dynamic_split_fuse ? "dynamic split-fuse" : "vLLM") << std::endl; @@ -497,7 +498,17 @@ int main(int argc, char* argv[]) try { // Benchmarking std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl; - ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map); + ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {}); + + // Enabled with env OV_CB_FULL_LOG=1 + std::string print_values = ""; + for (auto prop : pipe.get_model_configuration()) { + print_values = print_values + "\t" + prop + "\n"; + } + if (!print_values.empty()) + { + std::cout << "Model configuration: " << std::endl << print_values; + } std::cout << "Setup finished, launching LLM executor, traffic simulation and statistics reporter threads" << std::endl; diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp index 626a51c5da..dd79b7ea73 100644 --- a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp +++ b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp @@ -61,6 +61,8 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { GenerationHandle add_request(uint64_t request_id, const ov::Tensor& input_ids, const ov::genai::GenerationConfig& sampling_params); GenerationHandle add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params); + std::vector get_model_configuration(); + void step(); bool has_non_finished_requests(); diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp index 8044eddc6c..2529e68015 100644 --- a/src/cpp/src/continuous_batching_pipeline.cpp +++ b/src/cpp/src/continuous_batching_pipeline.cpp @@ -15,6 +15,7 @@ #include "text_callback_streamer.hpp" #include "timer.hpp" #include "debug_utils.hpp" +#include "utils.hpp" using namespace ov::genai; @@ -29,6 +30,7 @@ class ContinuousBatchingPipeline::Impl { std::shared_ptr m_cache_manager; std::shared_ptr m_model_runner; std::shared_ptr m_sampler; + std::vector m_model_config_namevalues; // TODO (mzegla): GenerationConfig is request specific object // and pipeline only uses default rng_seed. @@ -97,7 +99,13 @@ class ContinuousBatchingPipeline::Impl { apply_paged_attention_transformations(model, device_config); - ov::InferRequest infer_request = core.compile_model(model, device_config.get_device(), plugin_config).create_infer_request(); + auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config); + + ov::genai::utils::read_properties([compiled_model](const std::string& key) { + return compiled_model.get_property(key); }, + m_model_config_namevalues); + + ov::InferRequest infer_request = compiled_model.create_infer_request(); // setup KV caches m_cache_manager = std::make_shared(device_config); @@ -132,6 +140,13 @@ class ContinuousBatchingPipeline::Impl { return m_pipeline_metrics; } + std::vector get_model_configuration() { + if (ov::genai::utils::is_full_log_env_enabled()) + return m_model_config_namevalues; + + return std::vector(); + } + ov::genai::Tokenizer get_tokenizer() { return m_tokenizer; } @@ -388,7 +403,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::string& model const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, - const ov::AnyMap& tokenizer_plugin_config) { + const ov::AnyMap& tokenizer_plugin_config + ) { m_impl = std::make_shared(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config); } @@ -412,6 +428,10 @@ PipelineMetrics ContinuousBatchingPipeline::get_metrics() const{ return m_impl->get_metrics(); } +std::vector ContinuousBatchingPipeline::get_model_configuration() { + return m_impl->get_model_configuration(); +} + GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params) { return m_impl->add_request(request_id, prompt, sampling_params); } diff --git a/src/cpp/src/debug_utils.hpp b/src/cpp/src/debug_utils.hpp index 12d3179e8d..9fa6895191 100644 --- a/src/cpp/src/debug_utils.hpp +++ b/src/cpp/src/debug_utils.hpp @@ -6,6 +6,8 @@ #include #include +#include + #include template diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index 2bc20186be..7845ca0f1e 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -3,6 +3,7 @@ #include "utils.hpp" #include +#include namespace ov { namespace genai { @@ -185,6 +186,33 @@ ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config return std::nullopt; } +std::string join(const std::vector& listOfStrings, const std::string delimiter) { + std::stringstream ss; + auto it = listOfStrings.cbegin(); + if (it == listOfStrings.end()) { + return ""; + } + for (; it != (listOfStrings.end() - 1); ++it) { + ss << *it << delimiter; + } + if (it != listOfStrings.end()) { + ss << *it; + } + return ss.str(); +} + +bool is_full_log_env_enabled() { + const char* environmentVariableBuffer = std::getenv("OV_CB_FULL_LOG"); + if (environmentVariableBuffer) { + auto result = std::__cxx11::stoul(environmentVariableBuffer); + if (result && result > 0) { + return true; + } + } + + return false; +} + } // namespace utils } // namespace genai } // namespace ov diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index 25acc1c87f..e7ec0e08ff 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -76,6 +76,30 @@ ov::genai::StreamerVariant get_streamer_from_map(const ov::AnyMap& config_map); ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config_map); +std::string join(const std::vector& listOfStrings, const std::string delimiter); + +bool is_full_log_env_enabled(); + +template +static void read_properties(PropertyExtractor&& property_extractor, std::vector& output_configuration_values) { + auto key = std::string("SUPPORTED_PROPERTIES"); // ov::supported_properties; + std::vector supported_config_keys; + + ov::Any value = property_extractor(key); + supported_config_keys = value.as>(); + + for (auto& key : supported_config_keys) { + if (key == "SUPPORTED_PROPERTIES") + continue; + std::string value; + ov::Any param_value = property_extractor(key); + value = param_value.as(); + + output_configuration_values.emplace_back(join({key, value}, ": ")); + } + std::sort(output_configuration_values.begin(), output_configuration_values.end()); +} + } // namespace utils } // namespace genai } // namespace ov