Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log model properties #677

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ int main(int argc, char* argv[]) try {
options.add_options()
("n,num_prompts", "A number of prompts", cxxopts::value<size_t>()->default_value("1000"))
("b,max_batch_size", "A maximum number of batched tokens", cxxopts::value<size_t>()->default_value("256"))
("dynamic_split_fuse", "Whether to use dynamic split-fuse or vLLM scheduling", cxxopts::value<bool>()->default_value("true"))
("dynamic_split_fuse", "Whether to use dynamic split-fuse or vLLM scheduling. Use --dynamic_split_fuse=false to disable", cxxopts::value<bool>()->default_value("true"))
("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>()->default_value("."))
("dataset", "Path to dataset .json file", cxxopts::value<std::string>()->default_value("./ShareGPT_V3_unfiltered_cleaned_split.json"))
("max_input_len", "Max input length take from dataset", cxxopts::value<size_t>()->default_value("1024"))
Expand Down Expand Up @@ -476,6 +476,7 @@ int main(int argc, char* argv[]) try {
scheduler_config.dynamic_split_fuse = dynamic_split_fuse,
scheduler_config.max_num_seqs = 256, // not used if dynamic_split_fuse=True

std::cout << "To enable logging of additional information, like model configuration set environment variable OV_CB_FULL_LOG=1.";
std::cout << "Benchmarking parameters: " << std::endl;
std::cout << "\tMax number of batched tokens: " << scheduler_config.max_num_batched_tokens << std::endl;
std::cout << "\tScheduling type: " << (scheduler_config.dynamic_split_fuse ? "dynamic split-fuse" : "vLLM") << std::endl;
Expand All @@ -497,7 +498,17 @@ int main(int argc, char* argv[]) try {

// Benchmarking
std::cout << "Loading models, creating pipelines, preparing environment..." << std::endl;
ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map);
ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config, device, device_config_map, {});

// Enabled with env OV_CB_FULL_LOG=1
std::string print_values = "";
for (auto prop : pipe.get_model_configuration()) {
print_values = print_values + "\t" + prop + "\n";
}
if (!print_values.empty())
{
std::cout << "Model configuration: " << std::endl << print_values;
}

std::cout << "Setup finished, launching LLM executor, traffic simulation and statistics reporter threads" << std::endl;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
GenerationHandle add_request(uint64_t request_id, const ov::Tensor& input_ids, const ov::genai::GenerationConfig& sampling_params);
GenerationHandle add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params);

std::vector<std::string> get_model_configuration();

void step();

bool has_non_finished_requests();
Expand Down
24 changes: 22 additions & 2 deletions src/cpp/src/continuous_batching_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "text_callback_streamer.hpp"
#include "timer.hpp"
#include "debug_utils.hpp"
#include "utils.hpp"

using namespace ov::genai;

Expand All @@ -29,6 +30,7 @@ class ContinuousBatchingPipeline::Impl {
std::shared_ptr<CacheManager> m_cache_manager;
std::shared_ptr<ModelRunner> m_model_runner;
std::shared_ptr<Sampler> m_sampler;
std::vector<std::string> m_model_config_namevalues;

// TODO (mzegla): GenerationConfig is request specific object
// and pipeline only uses default rng_seed.
Expand Down Expand Up @@ -97,7 +99,13 @@ class ContinuousBatchingPipeline::Impl {

apply_paged_attention_transformations(model, device_config);

ov::InferRequest infer_request = core.compile_model(model, device_config.get_device(), plugin_config).create_infer_request();
auto compiled_model = core.compile_model(model, device_config.get_device(), plugin_config);

ov::genai::utils::read_properties([compiled_model](const std::string& key) {
return compiled_model.get_property(key); },
m_model_config_namevalues);

ov::InferRequest infer_request = compiled_model.create_infer_request();

// setup KV caches
m_cache_manager = std::make_shared<CacheManager>(device_config);
Expand Down Expand Up @@ -132,6 +140,13 @@ class ContinuousBatchingPipeline::Impl {
return m_pipeline_metrics;
}

std::vector<std::string> get_model_configuration() {
if (ov::genai::utils::is_full_log_env_enabled())
return m_model_config_namevalues;

return std::vector<std::string>();
}

ov::genai::Tokenizer get_tokenizer() {
return m_tokenizer;
}
Expand Down Expand Up @@ -388,7 +403,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::string& model
const SchedulerConfig& scheduler_config,
const std::string& device,
const ov::AnyMap& llm_plugin_config,
const ov::AnyMap& tokenizer_plugin_config) {
const ov::AnyMap& tokenizer_plugin_config
) {
m_impl = std::make_shared<Impl>(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config);
}

Expand All @@ -412,6 +428,10 @@ PipelineMetrics ContinuousBatchingPipeline::get_metrics() const{
return m_impl->get_metrics();
}

std::vector<std::string> ContinuousBatchingPipeline::get_model_configuration() {
return m_impl->get_model_configuration();
}

GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, const std::string& prompt, const ov::genai::GenerationConfig& sampling_params) {
return m_impl->add_request(request_id, prompt, sampling_params);
}
Expand Down
2 changes: 2 additions & 0 deletions src/cpp/src/debug_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <string>
#include <iostream>

#include <openvino/openvino.hpp>

#include <openvino/runtime/tensor.hpp>

template <typename T>
Expand Down
28 changes: 28 additions & 0 deletions src/cpp/src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "utils.hpp"
#include <fstream>
#include <utility>

namespace ov {
namespace genai {
Expand Down Expand Up @@ -185,6 +186,33 @@ ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config
return std::nullopt;
}

std::string join(const std::vector<std::string>& listOfStrings, const std::string delimiter) {
std::stringstream ss;
auto it = listOfStrings.cbegin();
if (it == listOfStrings.end()) {
return "";
}
for (; it != (listOfStrings.end() - 1); ++it) {
ss << *it << delimiter;
}
if (it != listOfStrings.end()) {
ss << *it;
}
return ss.str();
}

bool is_full_log_env_enabled() {
const char* environmentVariableBuffer = std::getenv("OV_CB_FULL_LOG");
if (environmentVariableBuffer) {
auto result = std::__cxx11::stoul(environmentVariableBuffer);
if (result && result > 0) {
return true;
}
}

return false;
}

} // namespace utils
} // namespace genai
} // namespace ov
24 changes: 24 additions & 0 deletions src/cpp/src/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,30 @@ ov::genai::StreamerVariant get_streamer_from_map(const ov::AnyMap& config_map);

ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config_map);

std::string join(const std::vector<std::string>& listOfStrings, const std::string delimiter);

bool is_full_log_env_enabled();

template <typename PropertyExtractor>
static void read_properties(PropertyExtractor&& property_extractor, std::vector<std::string>& output_configuration_values) {
auto key = std::string("SUPPORTED_PROPERTIES"); // ov::supported_properties;
std::vector<ov::PropertyName> supported_config_keys;

ov::Any value = property_extractor(key);
supported_config_keys = value.as<std::vector<ov::PropertyName>>();

for (auto& key : supported_config_keys) {
if (key == "SUPPORTED_PROPERTIES")
continue;
std::string value;
ov::Any param_value = property_extractor(key);
value = param_value.as<std::string>();

output_configuration_values.emplace_back(join({key, value}, ": "));
}
std::sort(output_configuration_values.begin(), output_configuration_values.end());
}

} // namespace utils
} // namespace genai
} // namespace ov
Loading