From ce4eb00ff94d53f5e1840bd2e3320356c9a86616 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 22 May 2024 11:35:55 +0200 Subject: [PATCH] Apply suggestions from code review Co-authored-by: Zlobin Vladimir Co-authored-by: Alexander Suvorov --- CMakeLists.txt | 10 ++++++++-- .../openvino/genai/generation_config.hpp | 17 +++++++++-------- src/cpp/include/openvino/genai/llm_pipeline.hpp | 6 +++--- src/cpp/src/generation_config.cpp | 1 - src/cpp/src/group_beam_searcher.hpp | 2 +- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 809327095c..ac392233a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,8 +4,14 @@ cmake_minimum_required(VERSION 3.15) -set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type") -set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebInfo" "MinSizeRel") +# Multi config generators such as Visual Studio ignore CMAKE_BUILD_TYPE. Multi config generators are configured with +# CMAKE_CONFIGURATION_TYPES, but limiting options in it completely removes such build options +get_property(GENERATOR_IS_MULTI_CONFIG_VAR GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) +if(NOT GENERATOR_IS_MULTI_CONFIG_VAR AND NOT DEFINED CMAKE_BUILD_TYPE) + message(STATUS "CMAKE_BUILD_TYPE is not defined, 'Release' will be used") + # Setting CMAKE_BUILD_TYPE as CACHE must go before project(). Otherwise project() sets its value and set() doesn't take an effect + set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...") +endif() project(openvino_genai VERSION 2024.2.0.0) diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index e1f2151d49..837fae21ad 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -14,9 +14,10 @@ namespace ov { /** - * @brief controls the stopping condition for grouped beam search. The following values are possible: - * "early", where the generation stops as soon as there are `num_beams` complete candidates; "heuristic", where an - * heuristic is applied and the generation stops when is it very unlikely to find better candidates; + * @brief controls the stopping condition for grouped beam search. The following values are possible: + * "early" stops as soon as there are `num_beams` complete candidates. + "heuristic" stops when is it unlikely to find better candidates. + "never" stops when there cannot be better candidates. */ enum class StopCriteria { early, heuristic, never }; @@ -25,11 +26,11 @@ enum class StopCriteria { early, heuristic, never }; * * @param max_length the maximum length the generated tokens can have. Corresponds to the length of the input prompt + * `max_new_tokens`. Its effect is overridden by `max_new_tokens`, if also set. - * @param max_new_tokens the maximum numbers of tokens to generate, ignoring the number of tokens in the prompt. + * @param max_new_tokens the maximum numbers of tokens to generate, excluding the number of tokens in the prompt. max_new_tokens has priority over max_length. * @param ignore_eos if set to true, then generation will not stop even if token is met. - * @param num_beams number of beams for beam search. 1 means no beam search. + * @param num_beams number of beams for beam search. 1 disables beam search. * @param num_beam_groups number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams. - * @param diversity_penalty this value is subtracted from a beam's score if it generates a token same as any beam from other group at a + * @param diversity_penalty this value is subtracted from a beam's score if it generates the same token as any beam from other group at a * particular time. Note that `diversity_penalty` is only effective if `group beam search` is enabled. * @param length_penalty exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to * the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log @@ -42,11 +43,11 @@ enum class StopCriteria { early, heuristic, never }; * heuristic is applied and the generation stops when is it very unlikely to find better candidates; * "never", where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm). * @param temperature the value used to modulate token probabilities for random sampling - * @param top_p if set to float < 1, only the smallest set of most probable tokens with probabilities + * @param top_p - if set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * @param top_k the number of highest probability vocabulary tokens to keep for top-k-filtering. * @param do_sample whether or not to use multinomial random sampling * that add up to `top_p` or higher are kept. - * @param repetition_penalty the parameter for repetition penalty. 1.0 means no penalty. + * @param repetition_penalty the parameter for repetition penalty. 1.0 means no penalty. See https://arxiv.org/pdf/1909.05858. * @param pad_token_id id of padding token * @param bos_token_id id of token * @param eos_token_id id of token diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp index 2a6e53eea6..1345b488f4 100644 --- a/src/cpp/include/openvino/genai/llm_pipeline.hpp +++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp @@ -65,7 +65,7 @@ class DecodedResults { class OPENVINO_GENAI_EXPORTS LLMPipeline { public: /** - * @brief Constructs a LLMPipeline when convert model xml/bin files, tokenizers and configuration and in the same dir. + * @brief Constructs an LLMPipeline from xml/bin files, tokenizers and configuration in the same dir. * * @param model_path Path to the dir model xml/bin files, tokenizers and generation_configs.json * @param device optional device @@ -105,8 +105,8 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline { template util::EnableIfAllStringAny generate( - std::string text, - Properties&&... properties) { + std::string text, + Properties&&... properties) { return generate(text, AnyMap{std::forward(properties)...}); } std::string generate(std::string text, const ov::AnyMap& config); diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp index f7cbcaa075..14fc370c59 100644 --- a/src/cpp/src/generation_config.cpp +++ b/src/cpp/src/generation_config.cpp @@ -37,7 +37,6 @@ GenerationConfig::GenerationConfig(std::string json_path) { read_json_param(data, "length_penalty", length_penalty); read_json_param(data, "num_return_sequences", num_return_sequences); read_json_param(data, "no_repeat_ngram_size", no_repeat_ngram_size); - // stop_criteria will be processed below read_json_param(data, "temperature", temperature); read_json_param(data, "top_p", top_p); read_json_param(data, "top_k", top_k); diff --git a/src/cpp/src/group_beam_searcher.hpp b/src/cpp/src/group_beam_searcher.hpp index 91f3ef4096..5362c9cfae 100644 --- a/src/cpp/src/group_beam_searcher.hpp +++ b/src/cpp/src/group_beam_searcher.hpp @@ -8,5 +8,5 @@ #include "openvino/genai/llm_pipeline.hpp" namespace ov { - EncodedResults beam_search(ov::InferRequest& lm, ov::Tensor prompts, ov::Tensor attentin_mask, GenerationConfig sampling_params); + EncodedResults beam_search(ov::InferRequest& lm, ov::Tensor prompts, ov::Tensor attentin_mask, GenerationConfig config); }