-
Notifications
You must be signed in to change notification settings - Fork 191
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
740c914
commit 278b1b6
Showing
10 changed files
with
223 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright (C) 2023-2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
|
||
find_package(OpenVINOGenAI REQUIRED PATHS | ||
"${CMAKE_BINARY_DIR}" # Reuse the package from the build. | ||
${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO. | ||
) | ||
|
||
FetchContent_Declare(cxxopts | ||
URL https://github.com/jarro2783/cxxopts/archive/refs/tags/v3.1.1.tar.gz | ||
URL_HASH SHA256=523175f792eb0ff04f9e653c90746c12655f10cb70f1d5e6d6d9491420298a08) | ||
FetchContent_MakeAvailable(cxxopts) | ||
|
||
add_executable(benchmark_vanilla_genai benchmark_vanilla_genai.cpp) | ||
target_link_libraries(benchmark_vanilla_genai PRIVATE openvino::genai cxxopts::cxxopts) | ||
set_target_properties(benchmark_vanilla_genai PROPERTIES | ||
COMPILE_PDB_NAME benchmark_vanilla_genai | ||
# Ensure out of box LC_RPATH on macOS with SIP | ||
INSTALL_RPATH_USE_LINK_PATH ON) | ||
# target_compile_features(benchmark_vanilla_genai PRIVATE cxx_std_11) | ||
install(TARGETS benchmark_vanilla_genai | ||
RUNTIME DESTINATION samples_bin/ | ||
COMPONENT samples_bin | ||
EXCLUDE_FROM_ALL) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# benchmark OpenVINO GenAI sample | ||
|
65 changes: 65 additions & 0 deletions
65
samples/cpp/benchmark_vanilla_genai/benchmark_vanilla_genai.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// Copyright (C) 2023-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include "openvino/genai/llm_pipeline.hpp" | ||
#include <cxxopts.hpp> | ||
|
||
int main(int argc, char* argv[]) try { | ||
cxxopts::Options options("benchmark_vanilla_genai", "Help command"); | ||
|
||
options.add_options() | ||
("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value("The Sky is blue because")) | ||
("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>()->default_value(".")) | ||
("nw,num_warmup", "Number of warmup iterations", cxxopts::value<size_t>()->default_value(std::to_string(1))) | ||
("n,num_iter", "Number of iterations", cxxopts::value<size_t>()->default_value(std::to_string(1))) | ||
("d,device", "device", cxxopts::value<std::string>()->default_value("CPU")) | ||
("h,help", "Print usage"); | ||
|
||
cxxopts::ParseResult result; | ||
try { | ||
result = options.parse(argc, argv); | ||
} catch (const cxxopts::exceptions::exception& e) { | ||
std::cout << e.what() << "\n\n"; | ||
std::cout << options.help() << std::endl; | ||
return EXIT_FAILURE; | ||
} | ||
|
||
if (result.count("help")) { | ||
std::cout << options.help() << std::endl; | ||
return EXIT_SUCCESS; | ||
} | ||
|
||
std::string prompt = result["prompt"].as<std::string>(); | ||
const std::string model_path = result["model"].as<std::string>(); | ||
std::string device = result["device"].as<std::string>(); | ||
size_t num_warmup = result["num_warmup"].as<size_t>(); | ||
size_t num_iter = result["num_iter"].as<size_t>(); | ||
|
||
ov::genai::GenerationConfig config; | ||
config.max_new_tokens = 100; | ||
|
||
ov::genai::LLMPipeline pipe(model_path, device); | ||
|
||
for (size_t i = 0; i < num_warmup; i++) | ||
pipe.generate(prompt, config); | ||
|
||
ov::genai::GenerationMetrics metrics; | ||
for (size_t i = 0; i < num_iter; i++) { | ||
ov::genai::DecodedResults res = pipe.generate(prompt, config); | ||
metrics += res.metrics; | ||
metrics.load_time = res.metrics.load_time; | ||
} | ||
|
||
std::cout << "Load time: " << metrics.load_time << " ms" << std::endl; | ||
std::cout << "ttft: " << metrics.mean_ttft << " ± " << metrics.std_ttft << " ms" << std::endl; | ||
std::cout << "tpot: " << metrics.mean_tpot << " ± " << metrics.std_tpot << " ms" << std::endl; | ||
std::cout << "Tokens/s: " << metrics.get_tokens_per_sec().first << std::endl; | ||
|
||
return 0; | ||
} catch (const std::exception& error) { | ||
std::cerr << error.what() << '\n'; | ||
return EXIT_FAILURE; | ||
} catch (...) { | ||
std::cerr << "Non-exception object thrown\n"; | ||
return EXIT_FAILURE; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
// Copyright (C) 2023-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#pragma once | ||
|
||
#include <chrono> | ||
#include <numeric> | ||
#include <vector> | ||
#include <cmath> | ||
|
||
namespace ov { | ||
namespace genai { | ||
|
||
using TimePoints = std::vector<std::chrono::steady_clock::time_point>; | ||
|
||
struct GenerationMetrics { | ||
GenerationMetrics() = default; | ||
|
||
GenerationMetrics(const TimePoints& tok_times, size_t batch_size = 1); | ||
GenerationMetrics(const std::vector<float>& durations, const std::vector<float>& times_to_first_token, size_t batch_size = 1); | ||
|
||
// First token time. | ||
float mean_ttft; | ||
float std_ttft; | ||
std::vector<float> times_to_first_token; | ||
|
||
// Time per output token. | ||
float mean_tpot; | ||
float std_tpot; | ||
std::vector<float> durations; | ||
|
||
std::pair<float, float> get_tokens_per_sec() const; | ||
size_t batch_size; | ||
float load_time; | ||
|
||
GenerationMetrics operator+=(GenerationMetrics const& metrics) const; | ||
}; | ||
|
||
} // namespace genai | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// Copyright (C) 2023-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include "openvino/genai/generation_metrics.hpp" | ||
#include <tuple> | ||
|
||
namespace { | ||
|
||
std::pair<float, float> calc_mean_and_std(const std::vector<float>& durations) { | ||
float mean = std::accumulate(durations.begin(), durations.end(), 0.0f) / durations.size(); | ||
|
||
float sum_square_durations = std::accumulate(durations.begin(), durations.end(), 0.0f, | ||
[](const float& acc, const float& duration) -> float { | ||
return acc + duration * duration; | ||
}); | ||
float std = std::sqrt(sum_square_durations / durations.size() - mean * mean); | ||
return {mean, std}; | ||
} | ||
|
||
} // namespace | ||
|
||
namespace ov { | ||
namespace genai { | ||
|
||
|
||
GenerationMetrics::GenerationMetrics(const TimePoints& tok_times, size_t batch_size) { | ||
this->batch_size = batch_size; | ||
durations = std::vector<float>(tok_times.size() - 1); | ||
for (size_t i = 1; i < tok_times.size(); ++i) { | ||
durations[i - 1] = std::chrono::duration_cast<std::chrono::milliseconds>(tok_times[i] - tok_times[i - 1]).count(); | ||
} | ||
times_to_first_token.emplace_back(durations[0]); | ||
|
||
std::tie(mean_tpot, std_tpot) = calc_mean_and_std(durations); | ||
std::tie(mean_ttft, std_ttft) = calc_mean_and_std(times_to_first_token); | ||
} | ||
|
||
GenerationMetrics::GenerationMetrics(const std::vector<float>& durations_, const std::vector<float>& times_to_first_token_, size_t batch_size) | ||
: durations(durations_), times_to_first_token(times_to_first_token_) { | ||
this->batch_size = batch_size; | ||
std::tie(mean_tpot, std_tpot) = calc_mean_and_std(durations); | ||
std::tie(mean_ttft, std_ttft) = calc_mean_and_std(times_to_first_token); | ||
} | ||
|
||
GenerationMetrics GenerationMetrics::operator+=(GenerationMetrics const& metrics) const { | ||
std::vector<float> new_durations = durations; | ||
std::vector<float> new_times_to_first_token = times_to_first_token; | ||
new_durations.insert(new_durations.end(), metrics.durations.begin(), metrics.durations.end()); | ||
new_times_to_first_token.insert(new_times_to_first_token.end(), metrics.times_to_first_token.begin(), metrics.times_to_first_token.end()); | ||
|
||
return GenerationMetrics(new_durations, new_times_to_first_token); | ||
} | ||
|
||
std::pair<float, float> GenerationMetrics::get_tokens_per_sec() const { | ||
auto mean_tps = 1000.0f * batch_size / mean_tpot; | ||
auto std_tps = 1000.0f * std_tpot / (mean_tpot * mean_tpot); | ||
return {mean_tps, std_tps}; | ||
} | ||
|
||
|
||
} // namespace genai | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters