Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into prefix_caching
Browse files Browse the repository at this point in the history
  • Loading branch information
popovaan committed Jul 23, 2024
2 parents 92d335c + 5d21486 commit cb5c784
Show file tree
Hide file tree
Showing 23 changed files with 188 additions and 113 deletions.
67 changes: 0 additions & 67 deletions .github/ISSUE_TEMPLATE/good_first_issue.yml

This file was deleted.

6 changes: 5 additions & 1 deletion .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ jobs:
cpp-greedy_causal_lm-windows:
runs-on: windows-latest
env:
PYTHONIOENCODING: "utf8"
defaults:
run:
shell: cmd
Expand Down Expand Up @@ -626,6 +628,8 @@ jobs:
cpp-continuous-batching-windows:
runs-on: windows-latest
env:
PYTHONIOENCODING: "utf8"
defaults:
run:
shell: cmd
Expand All @@ -648,7 +652,7 @@ jobs:
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
cmake -DCMAKE_BUILD_TYPE=Releas -S ./ -B ./build/
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release -j
- name: Run gtests
run: |
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/genai_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ jobs:
runs-on: windows-latest
env:
CMAKE_BUILD_PARALLEL_LEVEL: null
PYTHONIOENCODING: "utf8"
defaults:
run:
shell: cmd
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/genai_python_lib.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ jobs:
runs-on: windows-latest
env:
CMAKE_BUILD_PARALLEL_LEVEL: null
PYTHONIOENCODING: "utf8"
defaults:
run:
shell: cmd
Expand Down
26 changes: 24 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ project(OpenVINOGenAI
HOMEPAGE_URL "https://github.com/openvinotoolkit/openvino.genai"
LANGUAGES CXX)

option(INSTALL_GTEST "Enable installation of googletest. (Projects embedding googletest may want to turn this OFF.)" OFF)
option(RAPIDJSON_BUILD_DOC "Build rapidjson documentation." OFF)

# Find OpenVINODeveloperPackage first to compile with SDL flags
find_package(OpenVINODeveloperPackage QUIET
PATHS "${OpenVINO_DIR}")
Expand All @@ -40,13 +43,32 @@ find_file(spda_to_pa_header sdpa_to_paged_attention.hpp

include(cmake/features.cmake)

if(ENABLE_PYTHON)
# the following two calls are required for cross-compilation
if(OpenVINODeveloperPackage_DIR)
ov_find_python3(REQUIRED)
ov_detect_python_module_extension()
else()
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module)
else()
find_package(Python3 REQUIRED COMPONENTS Interpreter Development)
endif()
endif()
endif()

add_subdirectory(thirdparty)
add_subdirectory(src)
add_subdirectory(samples)
add_subdirectory(tests/cpp)

install(FILES LICENSE DESTINATION licensing COMPONENT licensing_genai RENAME LICENSE-GENAI)
install(FILES third-party-programs.txt DESTINATION licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt)
install(FILES LICENSE DESTINATION docs/licensing COMPONENT licensing_genai RENAME LICENSE-GENAI)
install(FILES third-party-programs.txt DESTINATION docs/licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt)
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
# Workaround https://gitlab.kitware.com/cmake/cmake/-/issues/2614
set(CPACK_COMPONENTS_ALL core_genai core_genai_dev cpp_samples_genai licensing_genai openvino_tokenizers openvino_tokenizers_licenses)
if(ENABLE_PYTHON)
list(APPEND CPACK_COMPONENTS_ALL pygenai_${Python3_VERSION_MAJOR}_${Python3_VERSION_MINOR})
endif()
include(CPack)
14 changes: 13 additions & 1 deletion samples/cpp/beam_search_causal_lm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B

`beam_search_causal_lm TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`

To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.

Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.

See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.

### Troubleshooting

#### Unicode characters encoding error on Windows

Example error:
```
UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
```

If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
14 changes: 13 additions & 1 deletion samples/cpp/chat_sample/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B

`chat_sample TinyLlama-1.1B-Chat-v1.0`

To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.

Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.

See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.

### Troubleshooting

#### Unicode characters encoding error on Windows

Example error:
```
UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
```

If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ int main(int argc, char* argv[]) try {
// vLLM specific params
scheduler_config.max_num_seqs = 2;

ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config);
// It's possible to construct a Tokenizer from a different path.
// If the Tokenizer isn't specified, it's loaded from the same folder.
ov::genai::ContinuousBatchingPipeline pipe(models_path, ov::genai::Tokenizer{models_path}, scheduler_config);
std::vector<ov::genai::GenerationResult> generation_results = pipe.generate(prompts, sampling_params);

for (size_t request_id = 0; request_id < generation_results.size(); ++request_id) {
Expand Down
14 changes: 13 additions & 1 deletion samples/cpp/greedy_causal_lm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B

`greedy_causal_lm TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`

To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.

Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.

See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.

### Troubleshooting

#### Unicode characters encoding error on Windows

Example error:
```
UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
```

If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
14 changes: 13 additions & 1 deletion samples/cpp/multinomial_causal_lm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B

`multinomial_causal_lm TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`

To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.

Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.

See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.

### Troubleshooting

#### Unicode characters encoding error on Windows

Example error:
```
UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
```

If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
14 changes: 13 additions & 1 deletion samples/cpp/prompt_lookup_decoding_lm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B

`prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "return 0;"`

To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.

Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.

See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.

### Troubleshooting

#### Unicode characters encoding error on Windows

Example error:
```
UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
```

If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
14 changes: 13 additions & 1 deletion samples/cpp/speculative_decoding_lm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,20 @@ optimum-cli export openvino --trust-remote-code --model meta-llama/Llama-2-7b-ch

`speculative_decoding_lm TinyLlama-1.1B-Chat-v1.0 Llama-2-7b-chat-hf "Why is the Sun yellow?"`

To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.

Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.

See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.

### Troubleshooting

#### Unicode characters encoding error on Windows

Example error:
```
UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
```

If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
19 changes: 18 additions & 1 deletion src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,24 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
const std::string& device = "CPU",
const ov::AnyMap& plugin_config = {});

std::shared_ptr<ov::genai::Tokenizer> get_tokenizer();
/**
* @brief Constructs a ContinuousBatchingPipeline when ov::genai::Tokenizer is initialized manually using file from the different dirs.
*
* @param model_path Path to the dir with model, tokenizer .xml/.bin files, and generation_configs.json
* @param scheduler_config
* @param tokenizer manually initialized ov::genai::Tokenizer
* @param device optional device
* @param plugin_config optional plugin_config
*/
ContinuousBatchingPipeline(
const std::string& model_path,
const ov::genai::Tokenizer& tokenizer,
const SchedulerConfig& scheduler_config,
const std::string& device="CPU",
const ov::AnyMap& plugin_config={}
);

ov::genai::Tokenizer get_tokenizer();

ov::genai::GenerationConfig get_config() const;

Expand Down
4 changes: 2 additions & 2 deletions src/cpp/include/openvino/genai/llm_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
);

/**
* @brief Constructs a LLMPipeline when ov::Tokenizer is initialized manually using file from the different dirs.
* @brief Constructs a LLMPipeline when ov::genai::Tokenizer is initialized manually using file from the different dirs.
*
* @param model_path Path to the dir with model, tokenizer .xml/.bin files, and generation_configs.json
* @param tokenizer manually initialized ov::Tokenizer
* @param tokenizer manually initialized ov::genai::Tokenizer
* @param device optional device
* @param plugin_config optional plugin_config
*/
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/include/openvino/genai/tokenizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ struct TokenizedInputs {
class OPENVINO_GENAI_EXPORTS Tokenizer {
public:
/**
* @brief ov::Tokenizer constructor.
* @brief ov::genai::Tokenizer constructor.
* @param tokenizer_path openvino_tokenizer.xml and openvino_detokenizer.xml should be located in the tokenizer_path
*/
Tokenizer(const std::string& tokenizer_path);
Expand Down
Loading

0 comments on commit cb5c784

Please sign in to comment.