From ce4eb00ff94d53f5e1840bd2e3320356c9a86616 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@gmail.com>
Date: Wed, 22 May 2024 11:35:55 +0200
Subject: [PATCH] Apply suggestions from code review

Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>
Co-authored-by: Alexander Suvorov <alexander.suvorov@intel.com>
---
 CMakeLists.txt                                  | 10 ++++++++--
 .../openvino/genai/generation_config.hpp        | 17 +++++++++--------
 src/cpp/include/openvino/genai/llm_pipeline.hpp |  6 +++---
 src/cpp/src/generation_config.cpp               |  1 -
 src/cpp/src/group_beam_searcher.hpp             |  2 +-
 5 files changed, 21 insertions(+), 15 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 809327095c..ac392233a6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,8 +4,14 @@
 
 cmake_minimum_required(VERSION 3.15)
 
-set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type")
-set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebInfo" "MinSizeRel")
+# Multi config generators such as Visual Studio ignore CMAKE_BUILD_TYPE. Multi config generators are configured with
+# CMAKE_CONFIGURATION_TYPES, but limiting options in it completely removes such build options
+get_property(GENERATOR_IS_MULTI_CONFIG_VAR GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
+if(NOT GENERATOR_IS_MULTI_CONFIG_VAR AND NOT DEFINED CMAKE_BUILD_TYPE)
+    message(STATUS "CMAKE_BUILD_TYPE is not defined, 'Release' will be used")
+    # Setting CMAKE_BUILD_TYPE as CACHE must go before project(). Otherwise project() sets its value and set() doesn't take an effect
+    set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...")
+endif()
 
 project(openvino_genai VERSION 2024.2.0.0)
 
diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index e1f2151d49..837fae21ad 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -14,9 +14,10 @@
 namespace ov {
 
 /**
- * @brief controls the stopping condition for grouped beam search. The following values are  possible:
- *        "early", where the generation stops as soon as there are `num_beams` complete candidates; "heuristic", where an 
- *        heuristic is applied and the generation stops when is it very unlikely to find better candidates;
+ * @brief controls the stopping condition for grouped beam search. The following values are possible:
+ *        "early" stops as soon as there are `num_beams` complete candidates.
+          "heuristic" stops when is it unlikely to find better candidates.
+          "never" stops when there cannot be better candidates.
  */
 enum class StopCriteria { early, heuristic, never };
 
@@ -25,11 +26,11 @@ enum class StopCriteria { early, heuristic, never };
  * 
  * @param max_length the maximum length the generated tokens can have. Corresponds to the length of the input prompt +
  *        `max_new_tokens`. Its effect is overridden by `max_new_tokens`, if also set.
- * @param max_new_tokens the maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
+ * @param max_new_tokens the maximum numbers of tokens to generate, excluding the number of tokens in the prompt. max_new_tokens has priority over max_length.
  * @param ignore_eos if set to true, then generation will not stop even if <eos> token is met.
- * @param num_beams  number of beams for beam search. 1 means no beam search.
+ * @param num_beams number of beams for beam search. 1 disables beam search.
  * @param num_beam_groups number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
- * @param diversity_penalty this value is subtracted from a beam's score if it generates a token same as any beam from other group at a
+ * @param diversity_penalty this value is subtracted from a beam's score if it generates the same token as any beam from other group at a
  *        particular time. Note that `diversity_penalty` is only effective if `group beam search` is enabled.
  * @param length_penalty exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to
  *        the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log
@@ -42,11 +43,11 @@ enum class StopCriteria { early, heuristic, never };
  *        heuristic is applied and the generation stops when is it very unlikely to find better candidates;
  *        "never", where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm).
  * @param temperature the value used to modulate token probabilities for random sampling
- * @param top_p if set to float < 1, only the smallest set of most probable tokens with probabilities 
+ * @param top_p - if set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
  * @param top_k the number of highest probability vocabulary tokens to keep for top-k-filtering.
  * @param do_sample whether or not to use multinomial random sampling
  *        that add up to `top_p` or higher are kept.
- * @param repetition_penalty the parameter for repetition penalty. 1.0 means no penalty. 
+ * @param repetition_penalty the parameter for repetition penalty. 1.0 means no penalty. See https://arxiv.org/pdf/1909.05858.
  * @param pad_token_id id of padding token
  * @param bos_token_id id of <bos> token
  * @param eos_token_id id of <eos> token
diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp
index 2a6e53eea6..1345b488f4 100644
--- a/src/cpp/include/openvino/genai/llm_pipeline.hpp
+++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp
@@ -65,7 +65,7 @@ class DecodedResults {
 class OPENVINO_GENAI_EXPORTS LLMPipeline {
 public:
     /**
-    * @brief Constructs a LLMPipeline when convert model xml/bin files, tokenizers and configuration and in the same dir.
+    * @brief Constructs an LLMPipeline from xml/bin files, tokenizers and configuration in the same dir.
     *
     * @param model_path Path to the dir model xml/bin files, tokenizers and generation_configs.json
     * @param device optional device
@@ -105,8 +105,8 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
     
     template <typename... Properties>
     util::EnableIfAllStringAny<std::string, Properties...> generate(
-        std::string text,
-        Properties&&... properties) {
+            std::string text,
+            Properties&&... properties) {
         return generate(text, AnyMap{std::forward<Properties>(properties)...});
     }
     std::string generate(std::string text, const ov::AnyMap& config);
diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
index f7cbcaa075..14fc370c59 100644
--- a/src/cpp/src/generation_config.cpp
+++ b/src/cpp/src/generation_config.cpp
@@ -37,7 +37,6 @@ GenerationConfig::GenerationConfig(std::string json_path) {
     read_json_param(data, "length_penalty", length_penalty);
     read_json_param(data, "num_return_sequences", num_return_sequences);
     read_json_param(data, "no_repeat_ngram_size", no_repeat_ngram_size);
-    // stop_criteria will be processed below
     read_json_param(data, "temperature", temperature);
     read_json_param(data, "top_p", top_p);
     read_json_param(data, "top_k", top_k);
diff --git a/src/cpp/src/group_beam_searcher.hpp b/src/cpp/src/group_beam_searcher.hpp
index 91f3ef4096..5362c9cfae 100644
--- a/src/cpp/src/group_beam_searcher.hpp
+++ b/src/cpp/src/group_beam_searcher.hpp
@@ -8,5 +8,5 @@
 #include "openvino/genai/llm_pipeline.hpp"
 
 namespace ov {
-    EncodedResults beam_search(ov::InferRequest& lm, ov::Tensor prompts, ov::Tensor attentin_mask, GenerationConfig sampling_params);
+    EncodedResults beam_search(ov::InferRequest& lm, ov::Tensor prompts, ov::Tensor attentin_mask, GenerationConfig config);
 }