Base on env

openvinotoolkit · Sep 2, 2024 · 7eb692e · 7eb692e
1 parent fe6abc7
commit 7eb692e
Show file tree

Hide file tree

Showing 5 changed files with 46 additions and 32 deletions.
diff --git a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp
@@ -437,7 +437,6 @@ int main(int argc, char* argv[]) try {
     ("cache_size", "Size of memory used for KV cache in GB. Default: 16", cxxopts::value<size_t>()->default_value("16"))
     ("device", "Target device to run the model. Default: CPU", cxxopts::value<std::string>()->default_value("CPU"))
     ("device_config", "Plugin configuration JSON. Example: '{\"MODEL_DISTRIBUTION_POLICY\":\"TENSOR_PARALLEL\",\"PERF_COUNT\":true}' Default: {\"PERF_COUNT\":true}", cxxopts::value<std::string>()->default_value("{\"PERF_COUNT\":true}"))
-    ("full_log", "Whether to enable logging of additional information, like model configuration. Use --full_log=false to disable", cxxopts::value<bool>()->default_value("true"))
     ("h,help", "Print usage");
 
     cxxopts::ParseResult result;
@@ -465,7 +464,6 @@ int main(int argc, char* argv[]) try {
     const std::string device = result["device"].as<std::string>();
     const std::string device_config = result["device_config"].as<std::string>();
     const size_t cache_size = result["cache_size"].as<size_t>();
-    const bool full_log = result["full_log"].as<bool>();
 
     // Create requests for generation
     Dataset dataset = filtered_dataset(models_path, dataset_path, num_prompts, max_input_len, max_output_len);
@@ -478,6 +476,7 @@ int main(int argc, char* argv[]) try {
     scheduler_config.dynamic_split_fuse = dynamic_split_fuse,
     scheduler_config.max_num_seqs = 256, // not used if dynamic_split_fuse=True
 
+    std::cout << "To enable logging of additional information, like model configuration set environment variable OV_CB_FULL_LOG=1.";
     std::cout << "Benchmarking parameters: " << std::endl;
     std::cout << "\tMax number of batched tokens: " << scheduler_config.max_num_batched_tokens << std::endl;
     std::cout << "\tScheduling type: " << (scheduler_config.dynamic_split_fuse ? "dynamic split-fuse" : "vLLM") << std::endl;
@@ -490,7 +489,6 @@ int main(int argc, char* argv[]) try {
     std::cout << "\tMax output length: " << max_output_len << std::endl;
     std::cout << "\tTarget device: " << device << std::endl;
     std::cout << "\tPlugin configuration JSON: " << device_config << std::endl;
-    std::cout << "\tFull logging set to: " << full_log << std::endl;
 
     ov::AnyMap device_config_map = {};
     if (!parse_plugin_config_string(device_config, device_config_map)) {
@@ -500,9 +498,17 @@ int main(int argc, char* argv[]) try {
 
     // Benchmarking
     std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl;
-    ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {}, full_log);
+    ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {});
 
-    std::cout << "Model configuration: " << std::endl << pipe.get_model_configuration_string();
+    // Enabled with env OV_CB_FULL_LOG=1
+    std::string print_values = "";
+    for (auto prop : pipe.get_model_configuration()) {
+        print_values = print_values + "\t" + prop + "\n";
+    }
+    if (!print_values.empty())
+    {
+        std::cout << "Model configuration: " << std::endl << print_values;
+    }
 
     std::cout << "Setup finished, launching LLM executor, traffic simulation and statistics reporter threads" << std::endl;
 

diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp
@@ -33,8 +33,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
                                const SchedulerConfig& scheduler_config,
                                const std::string& device = "CPU",
                                const ov::AnyMap& llm_plugin_config = {},
-                               const ov::AnyMap& tokenizer_plugin_config = {},
-                               const bool full_log = false);
+                               const ov::AnyMap& tokenizer_plugin_config = {});
 
     /**
     * @brief Constructs a ContinuousBatchingPipeline when ov::genai::Tokenizer is initialized manually using file from the different dirs.
@@ -50,8 +49,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
         const ov::genai::Tokenizer& tokenizer,
         const SchedulerConfig& scheduler_config,
         const std::string& device="CPU",
-        const ov::AnyMap& plugin_config={},
-        const bool full_log = false
+        const ov::AnyMap& plugin_config={}
     );
 
     ov::genai::Tokenizer get_tokenizer();
@@ -63,7 +61,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
     GenerationHandle add_request(uint64_t request_id, const ov::Tensor& input_ids, const ov::genai::GenerationConfig& sampling_params);
     GenerationHandle add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params);
 
-    std::string get_model_configuration_string();
+    std::vector<std::string> get_model_configuration();
 
     void step();
 

diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp
@@ -88,7 +88,7 @@ class ContinuousBatchingPipeline::Impl {
     }
 
 public:
-    Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config, const bool full_log) :
+    Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config) :
             m_tokenizer{tokenizer} {
         ov::Core core;
 
@@ -101,11 +101,9 @@ class ContinuousBatchingPipeline::Impl {
 
         auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config);
 
-        if (full_log) {
-            ov::genai::utils::read_properties([compiled_model](const std::string& key) {
-                return compiled_model.get_property(key); },
-                m_model_config_namevalues);
-        }
+        ov::genai::utils::read_properties([compiled_model](const std::string& key) {
+            return compiled_model.get_property(key); },
+            m_model_config_namevalues);
 
         ov::InferRequest infer_request = compiled_model.create_infer_request();
 
@@ -131,8 +129,8 @@ class ContinuousBatchingPipeline::Impl {
         // read default generation config
     }
 
-    Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config, const bool full_log)
-        : Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config, full_log} {}
+    Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config)
+        : Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config} {}
 
     ov::genai::GenerationConfig get_config() const {
         return m_generation_config;
@@ -142,12 +140,11 @@ class ContinuousBatchingPipeline::Impl {
         return m_pipeline_metrics;
     }
 
-    std::string get_model_configuration_string() {
-        std::string print_values = "";
-        for( auto prop : m_model_config_namevalues) {
-            print_values = print_values + "\t" + prop + "\n";
-        }
-        return print_values;
+    std::vector<std::string> get_model_configuration() {
+        if (ov::genai::utils::is_full_log_env_enabled())
+            return m_model_config_namevalues;
+
+        return std::vector<std::string>();
     }
 
     ov::genai::Tokenizer get_tokenizer() {
@@ -406,20 +403,18 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::string& model
                                                         const SchedulerConfig& scheduler_config,
                                                         const std::string& device,
                                                         const ov::AnyMap& llm_plugin_config,
-                                                        const ov::AnyMap& tokenizer_plugin_config,
-                                                        const bool full_log
+                                                        const ov::AnyMap& tokenizer_plugin_config
                                                         ) {
-    m_impl = std::make_shared<Impl>(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config, full_log);
+    m_impl = std::make_shared<Impl>(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config);
 }
 
 ContinuousBatchingPipeline::ContinuousBatchingPipeline(
     const std::string& model_path,
     const Tokenizer& tokenizer,
     const SchedulerConfig& scheduler_config,
     const std::string& device,
-    const ov::AnyMap& plugin_config,
-    const bool full_log
-) : m_impl{std::make_shared<Impl>(model_path, tokenizer, scheduler_config, device, plugin_config, full_log)} {}
+    const ov::AnyMap& plugin_config
+) : m_impl{std::make_shared<Impl>(model_path, tokenizer, scheduler_config, device, plugin_config)} {}
 
 ov::genai::Tokenizer ContinuousBatchingPipeline::get_tokenizer() {
     return m_impl->get_tokenizer();
@@ -433,8 +428,8 @@ PipelineMetrics ContinuousBatchingPipeline::get_metrics() const{
     return m_impl->get_metrics();
 }
 
-std::string ContinuousBatchingPipeline::get_model_configuration_string() {
-    return m_impl->get_model_configuration_string();
+std::vector<std::string> ContinuousBatchingPipeline::get_model_configuration() {
+    return m_impl->get_model_configuration();
 }
 
 GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params) {

diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp
@@ -3,6 +3,7 @@
 
 #include "utils.hpp"
 #include <fstream>
+#include <utility>
 
 namespace ov {
 namespace genai {
@@ -200,6 +201,18 @@ std::string join(const std::vector<std::string>& listOfStrings, const std::strin
     return ss.str();
 }
 
+bool is_full_log_env_enabled() {
+    const char* environmentVariableBuffer = std::getenv("OV_CB_FULL_LOG");
+    if (environmentVariableBuffer) {
+        auto result = std::__cxx11::stoul(environmentVariableBuffer);
+        if (result && result > 0) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 }  // namespace utils
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp
@@ -78,6 +78,8 @@ ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config
 
 std::string join(const std::vector<std::string>& listOfStrings, const std::string delimiter);
 
+bool is_full_log_env_enabled();
+
 template <typename PropertyExtractor>
 static void read_properties(PropertyExtractor&& property_extractor, std::vector<std::string>& output_configuration_values) {
     auto key = std::string("SUPPORTED_PROPERTIES");  // ov::supported_properties;