Skip to content

Commit

Permalink
Full log flag
Browse files Browse the repository at this point in the history
  • Loading branch information
rasapala committed Aug 20, 2024
1 parent 350c88d commit 608b361
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ int main(int argc, char* argv[]) try {
options.add_options()
("n,num_prompts", "A number of prompts", cxxopts::value<size_t>()->default_value("1000"))
("b,max_batch_size", "A maximum number of batched tokens", cxxopts::value<size_t>()->default_value("256"))
("dynamic_split_fuse", "Whether to use dynamic split-fuse or vLLM scheduling", cxxopts::value<bool>()->default_value("true"))
("dynamic_split_fuse", "Whether to use dynamic split-fuse or vLLM scheduling. Use --dynamic_split_fuse=false to disable", cxxopts::value<bool>()->default_value("true"))
("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>()->default_value("."))
("dataset", "Path to dataset .json file", cxxopts::value<std::string>()->default_value("./ShareGPT_V3_unfiltered_cleaned_split.json"))
("max_input_len", "Max input length take from dataset", cxxopts::value<size_t>()->default_value("1024"))
Expand All @@ -437,6 +437,7 @@ int main(int argc, char* argv[]) try {
("cache_size", "Size of memory used for KV cache in GB. Default: 16", cxxopts::value<size_t>()->default_value("16"))
("device", "Target device to run the model. Default: CPU", cxxopts::value<std::string>()->default_value("CPU"))
("device_config", "Plugin configuration JSON. Example: '{\"MODEL_DISTRIBUTION_POLICY\":\"TENSOR_PARALLEL\",\"PERF_COUNT\":true}' Default: {\"PERF_COUNT\":true}", cxxopts::value<std::string>()->default_value("{\"PERF_COUNT\":true}"))
("full_log", "Whether to enable logging of additional information, like model configuration. Use --full_log=false to disable", cxxopts::value<bool>()->default_value("true"))
("h,help", "Print usage");

cxxopts::ParseResult result;
Expand Down Expand Up @@ -464,6 +465,7 @@ int main(int argc, char* argv[]) try {
const std::string device = result["device"].as<std::string>();
const std::string device_config = result["device_config"].as<std::string>();
const size_t cache_size = result["cache_size"].as<size_t>();
const bool full_log = result["full_log"].as<bool>();

// Create requests for generation
Dataset dataset = filtered_dataset(models_path, dataset_path, num_prompts, max_input_len, max_output_len);
Expand All @@ -488,6 +490,7 @@ int main(int argc, char* argv[]) try {
std::cout << "\tMax output length: " << max_output_len << std::endl;
std::cout << "\tTarget device: " << device << std::endl;
std::cout << "\tPlugin configuration JSON: " << device_config << std::endl;
std::cout << "\tFull logging set to: " << full_log << std::endl;

ov::AnyMap device_config_map = {};
if (!parse_plugin_config_string(device_config, device_config_map)) {
Expand All @@ -497,7 +500,7 @@ int main(int argc, char* argv[]) try {

// Benchmarking
std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl;
ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map);
ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {}, full_log);

std::cout << "Model configuration: " << std::endl << pipe.get_model_configuration_string();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
const SchedulerConfig& scheduler_config,
const std::string& device = "CPU",
const ov::AnyMap& llm_plugin_config = {},
const ov::AnyMap& tokenizer_plugin_config = {});
const ov::AnyMap& tokenizer_plugin_config = {},
const bool full_log = false);

/**
* @brief Constructs a ContinuousBatchingPipeline when ov::genai::Tokenizer is initialized manually using file from the different dirs.
Expand All @@ -49,7 +50,8 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
const ov::genai::Tokenizer& tokenizer,
const SchedulerConfig& scheduler_config,
const std::string& device="CPU",
const ov::AnyMap& plugin_config={}
const ov::AnyMap& plugin_config={},
const bool full_log = false
);

ov::genai::Tokenizer get_tokenizer();
Expand Down
26 changes: 16 additions & 10 deletions src/cpp/src/continuous_batching_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class ContinuousBatchingPipeline::Impl {
}

public:
Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config) :
Impl(const std::string& models_path, const Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& plugin_config, const bool full_log) :
m_tokenizer{tokenizer} {
ov::Core core;

Expand All @@ -100,9 +100,12 @@ class ContinuousBatchingPipeline::Impl {
apply_paged_attention_transformations(model, device_config);

auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config);
ov::genai::utils::read_properties([compiled_model](const std::string& key) {
return compiled_model.get_property(key); },
m_model_config_namevalues);

if (full_log) {
ov::genai::utils::read_properties([compiled_model](const std::string& key) {
return compiled_model.get_property(key); },
m_model_config_namevalues);
}

ov::InferRequest infer_request = compiled_model.create_infer_request();

Expand All @@ -128,8 +131,8 @@ class ContinuousBatchingPipeline::Impl {
// read default generation config
}

Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config)
: Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config} {}
Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string& device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config, const bool full_log)
: Impl{models_path, Tokenizer(models_path, tokenizer_plugin_config), scheduler_config, device, llm_plugin_config, full_log} {}

ov::genai::GenerationConfig get_config() const {
return m_generation_config;
Expand Down Expand Up @@ -403,17 +406,20 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::string& model
const SchedulerConfig& scheduler_config,
const std::string& device,
const ov::AnyMap& llm_plugin_config,
const ov::AnyMap& tokenizer_plugin_config) {
m_impl = std::make_shared<Impl>(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config);
const ov::AnyMap& tokenizer_plugin_config,
const bool full_log
) {
m_impl = std::make_shared<Impl>(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config, full_log);
}

ContinuousBatchingPipeline::ContinuousBatchingPipeline(
const std::string& model_path,
const Tokenizer& tokenizer,
const SchedulerConfig& scheduler_config,
const std::string& device,
const ov::AnyMap& plugin_config
) : m_impl{std::make_shared<Impl>(model_path, tokenizer, scheduler_config, device, plugin_config)} {}
const ov::AnyMap& plugin_config,
const bool full_log
) : m_impl{std::make_shared<Impl>(model_path, tokenizer, scheduler_config, device, plugin_config, full_log)} {}

ov::genai::Tokenizer ContinuousBatchingPipeline::get_tokenizer() {
return m_impl->get_tokenizer();
Expand Down

0 comments on commit 608b361

Please sign in to comment.