Skip to content

Commit

Permalink
Base on env
Browse files Browse the repository at this point in the history
  • Loading branch information
rasapala committed Sep 2, 2024
1 parent fe6abc7 commit 7eb692e
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,6 @@ int main(int argc, char* argv[]) try {
("cache_size", "Size of memory used for KV cache in GB. Default: 16", cxxopts::value<size_t>()->default_value("16"))
("device", "Target device to run the model. Default: CPU", cxxopts::value<std::string>()->default_value("CPU"))
("device_config", "Plugin configuration JSON. Example: '{\"MODEL_DISTRIBUTION_POLICY\":\"TENSOR_PARALLEL\",\"PERF_COUNT\":true}' Default: {\"PERF_COUNT\":true}", cxxopts::value<std::string>()->default_value("{\"PERF_COUNT\":true}"))
("full_log", "Whether to enable logging of additional information, like model configuration. Use --full_log=false to disable", cxxopts::value<bool>()->default_value("true"))
("h,help", "Print usage");

cxxopts::ParseResult result;
Expand Down Expand Up @@ -465,7 +464,6 @@ int main(int argc, char* argv[]) try {
const std::string device = result["device"].as<std::string>();
const std::string device_config = result["device_config"].as<std::string>();
const size_t cache_size = result["cache_size"].as<size_t>();
const bool full_log = result["full_log"].as<bool>();

// Create requests for generation
Dataset dataset = filtered_dataset(models_path, dataset_path, num_prompts, max_input_len, max_output_len);
Expand All @@ -478,6 +476,7 @@ int main(int argc, char* argv[]) try {
scheduler_config.dynamic_split_fuse = dynamic_split_fuse,
scheduler_config.max_num_seqs = 256, // not used if dynamic_split_fuse=True

std::cout << "To enable logging of additional information, like model configuration set environment variable OV_CB_FULL_LOG=1.";
std::cout << "Benchmarking parameters: " << std::endl;
std::cout << "\tMax number of batched tokens: " << scheduler_config.max_num_batched_tokens << std::endl;
std::cout << "\tScheduling type: " << (scheduler_config.dynamic_split_fuse ? "dynamic split-fuse" : "vLLM") << std::endl;
Expand All @@ -490,7 +489,6 @@ int main(int argc, char* argv[]) try {
std::cout << "\tMax output length: " << max_output_len << std::endl;
std::cout << "\tTarget device: " << device << std::endl;
std::cout << "\tPlugin configuration JSON: " << device_config << std::endl;
std::cout << "\tFull logging set to: " << full_log << std::endl;

ov::AnyMap device_config_map = {};
if (!parse_plugin_config_string(device_config, device_config_map)) {
Expand All @@ -500,9 +498,17 @@ int main(int argc, char* argv[]) try {

// Benchmarking
std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl;
ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {}, full_log);
ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {});

std::cout << "Model configuration: " << std::endl << pipe.get_model_configuration_string();
// Enabled with env OV_CB_FULL_LOG=1
std::string print_values = "";
for (auto prop : pipe.get_model_configuration()) {
print_values = print_values + "\t" + prop + "\n";
}
if (!print_values.empty())
{
std::cout << "Model configuration: " << std::endl << print_values;
}

std::cout << "Setup finished, launching LLM executor, traffic simulation and statistics reporter threads" << std::endl;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
const SchedulerConfig& scheduler_config,
const std::string& device = "CPU",
const ov::AnyMap& llm_plugin_config = {},
const ov::AnyMap& tokenizer_plugin_config = {},
const bool full_log = false);
const ov::AnyMap& tokenizer_plugin_config = {});

/**
* @brief Constructs a ContinuousBatchingPipeline when ov::genai::Tokenizer is initialized manually using file from the different dirs.
Expand All @@ -50,8 +49,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
const ov::genai::Tokenizer& tokenizer,
const SchedulerConfig& scheduler_config,
const std::string& device="CPU",
const ov::AnyMap& plugin_config={},
const bool full_log = false
const ov::AnyMap& plugin_config={}
);

ov::genai::Tokenizer get_tokenizer();
Expand All @@ -63,7 +61,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
GenerationHandle add_request(uint64_t request_id, const ov::Tensor& input_ids, const ov::genai::GenerationConfig& sampling_params);
GenerationHandle add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params);

std::string get_model_configuration_string();
std::vector<std::string> get_model_configuration();

void step();

Expand Down
39 changes: 17 additions & 22 deletions src/cpp/src/continuous_batching_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class ContinuousBatchingPipeline::Impl {
}

public:
Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config, const bool full_log) :
Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config) :
m_tokenizer{tokenizer} {
ov::Core core;

Expand All @@ -101,11 +101,9 @@ class ContinuousBatchingPipeline::Impl {

auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config);

if (full_log) {
ov::genai::utils::read_properties([compiled_model](const std::string& key) {
return compiled_model.get_property(key); },
m_model_config_namevalues);
}
ov::genai::utils::read_properties([compiled_model](const std::string& key) {
return compiled_model.get_property(key); },
m_model_config_namevalues);

ov::InferRequest infer_request = compiled_model.create_infer_request();

Expand All @@ -131,8 +129,8 @@ class ContinuousBatchingPipeline::Impl {
// read default generation config
}

Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config, const bool full_log)
: Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config, full_log} {}
Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config)
: Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config} {}

ov::genai::GenerationConfig get_config() const {
return m_generation_config;
Expand All @@ -142,12 +140,11 @@ class ContinuousBatchingPipeline::Impl {
return m_pipeline_metrics;
}

std::string get_model_configuration_string() {
std::string print_values = "";
for( auto prop : m_model_config_namevalues) {
print_values = print_values + "\t" + prop + "\n";
}
return print_values;
std::vector<std::string> get_model_configuration() {
if (ov::genai::utils::is_full_log_env_enabled())
return m_model_config_namevalues;

return std::vector<std::string>();
}

ov::genai::Tokenizer get_tokenizer() {
Expand Down Expand Up @@ -406,20 +403,18 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::string& model
const SchedulerConfig& scheduler_config,
const std::string& device,
const ov::AnyMap& llm_plugin_config,
const ov::AnyMap& tokenizer_plugin_config,
const bool full_log
const ov::AnyMap& tokenizer_plugin_config
) {
m_impl = std::make_shared<Impl>(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config, full_log);
m_impl = std::make_shared<Impl>(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config);
}

ContinuousBatchingPipeline::ContinuousBatchingPipeline(
const std::string& model_path,
const Tokenizer& tokenizer,
const SchedulerConfig& scheduler_config,
const std::string& device,
const ov::AnyMap& plugin_config,
const bool full_log
) : m_impl{std::make_shared<Impl>(model_path, tokenizer, scheduler_config, device, plugin_config, full_log)} {}
const ov::AnyMap& plugin_config
) : m_impl{std::make_shared<Impl>(model_path, tokenizer, scheduler_config, device, plugin_config)} {}

ov::genai::Tokenizer ContinuousBatchingPipeline::get_tokenizer() {
return m_impl->get_tokenizer();
Expand All @@ -433,8 +428,8 @@ PipelineMetrics ContinuousBatchingPipeline::get_metrics() const{
return m_impl->get_metrics();
}

std::string ContinuousBatchingPipeline::get_model_configuration_string() {
return m_impl->get_model_configuration_string();
std::vector<std::string> ContinuousBatchingPipeline::get_model_configuration() {
return m_impl->get_model_configuration();
}

GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params) {
Expand Down
13 changes: 13 additions & 0 deletions src/cpp/src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "utils.hpp"
#include <fstream>
#include <utility>

namespace ov {
namespace genai {
Expand Down Expand Up @@ -200,6 +201,18 @@ std::string join(const std::vector<std::string>& listOfStrings, const std::strin
return ss.str();
}

bool is_full_log_env_enabled() {
const char* environmentVariableBuffer = std::getenv("OV_CB_FULL_LOG");
if (environmentVariableBuffer) {
auto result = std::__cxx11::stoul(environmentVariableBuffer);
if (result && result > 0) {
return true;
}
}

return false;
}

} // namespace utils
} // namespace genai
} // namespace ov
2 changes: 2 additions & 0 deletions src/cpp/src/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config

std::string join(const std::vector<std::string>& listOfStrings, const std::string delimiter);

bool is_full_log_env_enabled();

template <typename PropertyExtractor>
static void read_properties(PropertyExtractor&& property_extractor, std::vector<std::string>& output_configuration_values) {
auto key = std::string("SUPPORTED_PROPERTIES"); // ov::supported_properties;
Expand Down

0 comments on commit 7eb692e

Please sign in to comment.