Full log flag

openvinotoolkit · Aug 20, 2024 · 608b361 · 608b361
1 parent 350c88d
commit 608b361
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 14 deletions.
diff --git a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp
@@ -428,7 +428,7 @@ int main(int argc, char* argv[]) try {
     options.add_options()
     ("n,num_prompts", "A number of prompts", cxxopts::value<size_t>()->default_value("1000"))
     ("b,max_batch_size", "A maximum number of batched tokens", cxxopts::value<size_t>()->default_value("256"))
-    ("dynamic_split_fuse", "Whether to use dynamic split-fuse or vLLM scheduling", cxxopts::value<bool>()->default_value("true"))
+    ("dynamic_split_fuse", "Whether to use dynamic split-fuse or vLLM scheduling. Use --dynamic_split_fuse=false to disable", cxxopts::value<bool>()->default_value("true"))
     ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>()->default_value("."))
     ("dataset", "Path to dataset .json file", cxxopts::value<std::string>()->default_value("./ShareGPT_V3_unfiltered_cleaned_split.json"))
     ("max_input_len", "Max input length take from dataset", cxxopts::value<size_t>()->default_value("1024"))
@@ -437,6 +437,7 @@ int main(int argc, char* argv[]) try {
     ("cache_size", "Size of memory used for KV cache in GB. Default: 16", cxxopts::value<size_t>()->default_value("16"))
     ("device", "Target device to run the model. Default: CPU", cxxopts::value<std::string>()->default_value("CPU"))
     ("device_config", "Plugin configuration JSON. Example: '{\"MODEL_DISTRIBUTION_POLICY\":\"TENSOR_PARALLEL\",\"PERF_COUNT\":true}' Default: {\"PERF_COUNT\":true}", cxxopts::value<std::string>()->default_value("{\"PERF_COUNT\":true}"))
+    ("full_log", "Whether to enable logging of additional information, like model configuration. Use --full_log=false to disable", cxxopts::value<bool>()->default_value("true"))
     ("h,help", "Print usage");
 
     cxxopts::ParseResult result;
@@ -464,6 +465,7 @@ int main(int argc, char* argv[]) try {
     const std::string device = result["device"].as<std::string>();
     const std::string device_config = result["device_config"].as<std::string>();
     const size_t cache_size = result["cache_size"].as<size_t>();
+    const bool full_log = result["full_log"].as<bool>();
 
     // Create requests for generation
     Dataset dataset = filtered_dataset(models_path, dataset_path, num_prompts, max_input_len, max_output_len);
@@ -488,6 +490,7 @@ int main(int argc, char* argv[]) try {
     std::cout << "\tMax output length: " << max_output_len << std::endl;
     std::cout << "\tTarget device: " << device << std::endl;
     std::cout << "\tPlugin configuration JSON: " << device_config << std::endl;
+    std::cout << "\tFull logging set to: " << full_log << std::endl;
 
     ov::AnyMap device_config_map = {};
     if (!parse_plugin_config_string(device_config, device_config_map)) {
@@ -497,7 +500,7 @@ int main(int argc, char* argv[]) try {
 
     // Benchmarking
     std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl;
-    ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map);
+    ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {}, full_log);
 
     std::cout << "Model configuration: " << std::endl << pipe.get_model_configuration_string();
 

diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp
@@ -33,7 +33,8 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
                                const SchedulerConfig& scheduler_config,
                                const std::string& device = "CPU",
                                const ov::AnyMap& llm_plugin_config = {},
-                               const ov::AnyMap& tokenizer_plugin_config = {});
+                               const ov::AnyMap& tokenizer_plugin_config = {},
+                               const bool full_log = false);
 
     /**
     * @brief Constructs a ContinuousBatchingPipeline when ov::genai::Tokenizer is initialized manually using file from the different dirs.
@@ -49,7 +50,8 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
         const ov::genai::Tokenizer& tokenizer,
         const SchedulerConfig& scheduler_config,
         const std::string& device="CPU",
-        const ov::AnyMap& plugin_config={}
+        const ov::AnyMap& plugin_config={},
+        const bool full_log = false
     );
 
     ov::genai::Tokenizer get_tokenizer();

diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp
@@ -88,7 +88,7 @@ class ContinuousBatchingPipeline::Impl {
     }
 
 public:
-    Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config) :
+    Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config, const bool full_log) :
             m_tokenizer{tokenizer} {
         ov::Core core;
 
@@ -100,9 +100,12 @@ class ContinuousBatchingPipeline::Impl {
         apply_paged_attention_transformations(model, device_config);
 
         auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config);
-        ov::genai::utils::read_properties([compiled_model](const std::string& key) {
-            return compiled_model.get_property(key); },
-            m_model_config_namevalues);
+
+        if (full_log) {
+            ov::genai::utils::read_properties([compiled_model](const std::string& key) {
+                return compiled_model.get_property(key); },
+                m_model_config_namevalues);
+        }
 
         ov::InferRequest infer_request = compiled_model.create_infer_request();
 
@@ -128,8 +131,8 @@ class ContinuousBatchingPipeline::Impl {
         // read default generation config
     }
 
-    Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config)
-        : Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config} {}
+    Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config, const bool full_log)
+        : Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config, full_log} {}
 
     ov::genai::GenerationConfig get_config() const {
         return m_generation_config;
@@ -403,17 +406,20 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::string& model
                                                         const SchedulerConfig& scheduler_config,
                                                         const std::string& device,
                                                         const ov::AnyMap& llm_plugin_config,
-                                                        const ov::AnyMap& tokenizer_plugin_config) {
-    m_impl = std::make_shared<Impl>(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config);
+                                                        const ov::AnyMap& tokenizer_plugin_config,
+                                                        const bool full_log
+                                                        ) {
+    m_impl = std::make_shared<Impl>(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config, full_log);
 }
 
 ContinuousBatchingPipeline::ContinuousBatchingPipeline(
     const std::string& model_path,
     const Tokenizer& tokenizer,
     const SchedulerConfig& scheduler_config,
     const std::string& device,
-    const ov::AnyMap& plugin_config
-) : m_impl{std::make_shared<Impl>(model_path, tokenizer, scheduler_config, device, plugin_config)} {}
+    const ov::AnyMap& plugin_config,
+    const bool full_log
+) : m_impl{std::make_shared<Impl>(model_path, tokenizer, scheduler_config, device, plugin_config, full_log)} {}
 
 ov::genai::Tokenizer ContinuousBatchingPipeline::get_tokenizer() {
     return m_impl->get_tokenizer();