Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
Co-authored-by: Zlobin Vladimir <[email protected]>
Co-authored-by: Alexander Suvorov <[email protected]>
  • Loading branch information
3 people authored May 22, 2024
1 parent 5eb59ea commit ce4eb00
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 15 deletions.
10 changes: 8 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,14 @@

cmake_minimum_required(VERSION 3.15)

set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type")
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebInfo" "MinSizeRel")
# Multi config generators such as Visual Studio ignore CMAKE_BUILD_TYPE. Multi config generators are configured with
# CMAKE_CONFIGURATION_TYPES, but limiting options in it completely removes such build options
get_property(GENERATOR_IS_MULTI_CONFIG_VAR GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if(NOT GENERATOR_IS_MULTI_CONFIG_VAR AND NOT DEFINED CMAKE_BUILD_TYPE)
message(STATUS "CMAKE_BUILD_TYPE is not defined, 'Release' will be used")
# Setting CMAKE_BUILD_TYPE as CACHE must go before project(). Otherwise project() sets its value and set() doesn't take an effect
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...")
endif()

project(openvino_genai VERSION 2024.2.0.0)

Expand Down
17 changes: 9 additions & 8 deletions src/cpp/include/openvino/genai/generation_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
namespace ov {

/**
* @brief controls the stopping condition for grouped beam search. The following values are possible:
* "early", where the generation stops as soon as there are `num_beams` complete candidates; "heuristic", where an
* heuristic is applied and the generation stops when is it very unlikely to find better candidates;
* @brief controls the stopping condition for grouped beam search. The following values are possible:
* "early" stops as soon as there are `num_beams` complete candidates.
"heuristic" stops when is it unlikely to find better candidates.
"never" stops when there cannot be better candidates.
*/
enum class StopCriteria { early, heuristic, never };

Expand All @@ -25,11 +26,11 @@ enum class StopCriteria { early, heuristic, never };
*
* @param max_length the maximum length the generated tokens can have. Corresponds to the length of the input prompt +
* `max_new_tokens`. Its effect is overridden by `max_new_tokens`, if also set.
* @param max_new_tokens the maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
* @param max_new_tokens the maximum numbers of tokens to generate, excluding the number of tokens in the prompt. max_new_tokens has priority over max_length.
* @param ignore_eos if set to true, then generation will not stop even if <eos> token is met.
* @param num_beams number of beams for beam search. 1 means no beam search.
* @param num_beams number of beams for beam search. 1 disables beam search.
* @param num_beam_groups number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
* @param diversity_penalty this value is subtracted from a beam's score if it generates a token same as any beam from other group at a
* @param diversity_penalty this value is subtracted from a beam's score if it generates the same token as any beam from other group at a
* particular time. Note that `diversity_penalty` is only effective if `group beam search` is enabled.
* @param length_penalty exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to
* the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log
Expand All @@ -42,11 +43,11 @@ enum class StopCriteria { early, heuristic, never };
* heuristic is applied and the generation stops when is it very unlikely to find better candidates;
* "never", where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm).
* @param temperature the value used to modulate token probabilities for random sampling
* @param top_p if set to float < 1, only the smallest set of most probable tokens with probabilities
* @param top_p - if set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
* @param top_k the number of highest probability vocabulary tokens to keep for top-k-filtering.
* @param do_sample whether or not to use multinomial random sampling
* that add up to `top_p` or higher are kept.
* @param repetition_penalty the parameter for repetition penalty. 1.0 means no penalty.
* @param repetition_penalty the parameter for repetition penalty. 1.0 means no penalty. See https://arxiv.org/pdf/1909.05858.
* @param pad_token_id id of padding token
* @param bos_token_id id of <bos> token
* @param eos_token_id id of <eos> token
Expand Down
6 changes: 3 additions & 3 deletions src/cpp/include/openvino/genai/llm_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class DecodedResults {
class OPENVINO_GENAI_EXPORTS LLMPipeline {
public:
/**
* @brief Constructs a LLMPipeline when convert model xml/bin files, tokenizers and configuration and in the same dir.
* @brief Constructs an LLMPipeline from xml/bin files, tokenizers and configuration in the same dir.
*
* @param model_path Path to the dir model xml/bin files, tokenizers and generation_configs.json
* @param device optional device
Expand Down Expand Up @@ -105,8 +105,8 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {

template <typename... Properties>
util::EnableIfAllStringAny<std::string, Properties...> generate(
std::string text,
Properties&&... properties) {
std::string text,
Properties&&... properties) {
return generate(text, AnyMap{std::forward<Properties>(properties)...});
}
std::string generate(std::string text, const ov::AnyMap& config);
Expand Down
1 change: 0 additions & 1 deletion src/cpp/src/generation_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ GenerationConfig::GenerationConfig(std::string json_path) {
read_json_param(data, "length_penalty", length_penalty);
read_json_param(data, "num_return_sequences", num_return_sequences);
read_json_param(data, "no_repeat_ngram_size", no_repeat_ngram_size);
// stop_criteria will be processed below
read_json_param(data, "temperature", temperature);
read_json_param(data, "top_p", top_p);
read_json_param(data, "top_k", top_k);
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/group_beam_searcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@
#include "openvino/genai/llm_pipeline.hpp"

namespace ov {
EncodedResults beam_search(ov::InferRequest& lm, ov::Tensor prompts, ov::Tensor attentin_mask, GenerationConfig sampling_params);
EncodedResults beam_search(ov::InferRequest& lm, ov::Tensor prompts, ov::Tensor attentin_mask, GenerationConfig config);
}

0 comments on commit ce4eb00

Please sign in to comment.