Skip to content

Commit

Permalink
Merge releases/2024/5 into master (openvinotoolkit#1168)
Browse files Browse the repository at this point in the history
Co-authored-by: yatarkan <[email protected]>
Co-authored-by: Ilya Lavrenov <[email protected]>
Co-authored-by: TolyaTalamanov <[email protected]>
Co-authored-by: wgzintel <[email protected]>
Co-authored-by: Sergey Lyalin <[email protected]>
  • Loading branch information
6 people authored Nov 8, 2024
1 parent 809d93e commit 747c5d2
Show file tree
Hide file tree
Showing 12 changed files with 420 additions and 147 deletions.
23 changes: 23 additions & 0 deletions .github/actions/build_app/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: 'Build App'
inputs:
ov_dir:
description: 'Directory where OpenVINO is installed'
default: './ov'
required: false
build_dir:
description: 'Directory where the app is built'
default: './build'
required: false
build_target:
description: 'Target to build'
default: ''
required: false
runs:
using: "composite"
steps:
- name: Build app
shell: bash
run: |
source ${{ inputs.ov_dir }}/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ inputs.build_dir }}
cmake --build ${{ inputs.build_dir }} --config Release ${{ inputs.build_target && format('--target {0}', inputs.build_target) || '' }} -j
18 changes: 18 additions & 0 deletions .github/actions/install_openvino/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: 'Install OpenVINO'
inputs:
ov_link:
description: 'URL to download OpenVINO'
required: true
ov_dir:
description: 'Directory to install OpenVINO'
default: './ov'
required: false
runs:
using: "composite"
steps:
- name: 'Install OpenVINO'
shell: bash
run: |
mkdir ${{ inputs.ov_dir }}
curl ${{ inputs.ov_link }} | tar --directory ${{ inputs.ov_dir }} --strip-components 1 -xz
sudo ${{ inputs.ov_dir }}/install_dependencies/install_openvino_dependencies.sh
15 changes: 15 additions & 0 deletions .github/actions/install_python_deps/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: 'Install Python Dependencies'
inputs:
ov_dir:
description: 'Directory where OpenVINO is installed'
default: './ov'
required: false
runs:
using: "composite"
steps:
- name: Install Python dependencies
shell: bash
run: |
source ${{ inputs.ov_dir }}/setupvars.sh
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
58 changes: 58 additions & 0 deletions .github/scripts/generate_reference_llava.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import argparse
from pathlib import Path
from optimum.intel.openvino import OVModelForVisualCausalLM
from transformers import AutoProcessor
from PIL import Image

IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".bmp"]


def main(model_path: str, images_path: str):
print(f"Selected model: {model_path}\n")

if Path(images_path).is_file():
image_files = [Path(images_path)]
else:
image_files = sorted(
[f for f in Path(images_path).glob("*") if f.is_file() and f.suffix.lower() in IMAGE_EXTENSIONS],
key=lambda x: x.name
)

if not image_files:
raise FileNotFoundError(f"No images found in '{images_path}' directory. Supported formats: {IMAGE_EXTENSIONS}")

images = []
for file in image_files:
images.append(
Image.open(file).convert("RGB")
)

print("Images:", image_files)

model = OVModelForVisualCausalLM.from_pretrained(model_path, trust_remote_code=True)
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)

conversation = [{
"role": "user",
"content": [
*[{"type": "image"} for _ in images],
{"type": "text", "text": "Describe the images."},
],
}]

prompt = processor.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
print(prompt)
inputs = processor(text=[prompt], images=images, return_tensors="pt")
result = model.generate(**inputs, max_new_tokens=100, do_sample=False)
decoded = processor.tokenizer.batch_decode(result[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
print(decoded)
with open("ref.txt", "w") as f:
f.write(f"question:\n{decoded}\n----------\nquestion:\n")


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model_path", type=str, required=True, help="Path to the model.")
parser.add_argument("-i", "--images_path", type=str, required=True, help="Path to the directory with images.")
args = parser.parse_args()
main(args.model_path, args.images_path)
96 changes: 50 additions & 46 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ jobs:
echo "Chat sample python" passed
visual_language_chat_sample-ubuntu:
visual_language_chat_sample-ubuntu-minicpm_v2_6:
runs-on: ubuntu-22.04-16-cores
steps:
- uses: actions/checkout@v4
Expand All @@ -722,21 +722,13 @@ jobs:
- uses: actions/setup-python@v4
with:
python-version: 3.11
- name: Install OpenVINO
run: |
mkdir ./ov/
curl ${{ env.l_u22_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
sudo ./ov/install_dependencies/install_openvino_dependencies.sh
- name: Build app
run: |
source ./ov/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --target visual_language_chat py_openvino_genai -j
- name: Install dependencies
run: |
source ./ov/setupvars.sh
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt opencv-python
- uses: ./.github/actions/install_openvino
with:
ov_link: ${{ env.l_u22_ov_link }}
- uses: ./.github/actions/build_app
with:
build_target: 'visual_language_chat py_openvino_genai'
- uses: ./.github/actions/install_python_deps
- name: Download and convert tiny-random-minicpmv-2_6 model and an image
run: |
python -m pip install git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv
Expand Down Expand Up @@ -764,13 +756,6 @@ jobs:
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./tiny-random-minicpmv-2_6/ ./images/
<<< $'Describe the images?' | tee cpp.txt
timeout-minutes: 2
- name: Encode cpp.txt with Python encoding instead of terminal one
shell: python
run: |
with open("cpp.txt", "rb") as f:
content = f.read().decode("utf-8", "replace")
with open("cpp.txt", "wb") as f:
f.write(content.encode("utf-8"))
- name: Run visual_language_chat Python sample - tiny-random-minicpmv-2_6
run: >
set -o pipefail
Expand All @@ -779,6 +764,13 @@ jobs:
<<< $'Describe the images?' | tee py.txt
env:
PYTHONPATH: "./build/"
- name: Encode cpp.txt with Python encoding instead of terminal one
shell: python
run: |
with open("cpp.txt", "rb") as f:
content = f.read().decode("utf-8", "replace")
with open("cpp.txt", "wb") as f:
f.write(content.encode("utf-8"))
- run: diff cpp.txt py.txt
- name: Run visual_language_chat C++ sample with 2 prompts - tiny-random-minicpmv-2_6
run: >
Expand All @@ -803,39 +795,51 @@ jobs:
with open("cpp2.txt", "wb") as f:
f.write(content.encode("utf-8"))
- run: diff cpp2.txt py2.txt
- name: Download and convert LLaVa 1.5 model and an image
run: |
source ./ov/setupvars.sh
optimum-cli export openvino --model llava-hf/llava-1.5-7b-hf ./llava_1_5_7b_ov/
wget https://llava-vl.github.io/static/images/monalisa.jpg
- name: Run visual_language_chat C++ sample - LLaVa 1.5
run: >
source ./ov/setupvars.sh
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./llava_1_5_7b_ov/ monalisa.jpg
<<< $'Who drew this painting?\nWhen did the painter live?'
timeout-minutes: 4
- name: Download and convert LLaVa-Next model
run: |
source ./ov/setupvars.sh
optimum-cli export openvino --model llava-hf/llava-v1.6-mistral-7b-hf ./llava_v1_6_mistral_7b_ov/
- name: Run visual_language_chat C++ sample - LLaVa-Next
run: >
source ./ov/setupvars.sh
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./llava_v1_6_mistral_7b_ov/ monalisa.jpg
<<< $'Who drew this painting?\nWhen did the painter live?'
timeout-minutes: 4

visual_language_chat_sample-ubuntu-llava_1_5:
uses: ./.github/workflows/job_vlm_sample_llava.yml
with:
model_id: llava-hf/llava-1.5-7b-hf
model_dir: llava_1_5_7b_ov

visual_language_chat_sample-ubuntu-llava_next:
uses: ./.github/workflows/job_vlm_sample_llava.yml
with:
model_id: llava-hf/llava-v1.6-mistral-7b-hf
model_dir: llava_v1_6_mistral_7b_ov

visual_language_chat_sample-ubuntu-internvl2:
runs-on: ubuntu-22.04-16-cores
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: actions/setup-python@v4
with:
python-version: 3.11
- uses: ./.github/actions/install_openvino
with:
ov_link: ${{ env.l_u22_ov_link }}
- uses: ./.github/actions/build_app
with:
build_target: 'visual_language_chat py_openvino_genai'
- uses: ./.github/actions/install_python_deps
- name: Download and convert InternVL2 model
run: |
# Lowering transformers version, workaround for https://huggingface.co/OpenGVLab/InternVL2-1B/discussions/7
python -m pip install -U "transformers<4.45.0"
source ./ov/setupvars.sh
optimum-cli export openvino --model OpenGVLab/InternVL2-4B ./internvl2_4b_ov/ --trust-remote-code
- name: Download images
run: |
wget https://llava-vl.github.io/static/images/monalisa.jpg
- name: Run visual_language_chat C++ sample - InternVL2
run: >
source ./ov/setupvars.sh
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./internvl2_4b_ov/ monalisa.jpg
<<< $'Who drew this painting?\nWhen did the painter live?'
timeout-minutes: 4


cpp-continuous-batching-ubuntu:
runs-on: ubuntu-20.04-8-cores
defaults:
Expand Down Expand Up @@ -975,7 +979,7 @@ jobs:
cpp-greedy_causal_lm-Qwen-7B-Chat, cpp-beam_search_causal_lm-Qwen1_5-7B-Chat, cpp-beam_search_causal_lm-Phi-2,
cpp-beam_search_causal_lm-notus-7b-v1, cpp-speculative_decoding_lm-ubuntu, cpp-prompt_lookup_decoding_lm-ubuntu,
cpp-Phi-1_5, cpp-greedy_causal_lm-redpajama-3b-chat, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu,
visual_language_chat_sample-ubuntu,
visual_language_chat_sample-ubuntu-minicpm_v2_6, visual_language_chat_sample-ubuntu-llava_1_5, visual_language_chat_sample-ubuntu-llava_next, visual_language_chat_sample-ubuntu-internvl2,
cpp-continuous-batching-windows, cpp-continuous-batching-macos]
if: ${{ always() }}
runs-on: ubuntu-latest
Expand Down
44 changes: 44 additions & 0 deletions .github/workflows/job_vlm_sample_llava.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: visual_language_chat sample - LLaVA

on:
workflow_call:
inputs:
model_id:
required: true
type: string
model_dir:
required: true
type: string

env:
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17289-7cf2bbb8391/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241105_x86_64.tgz
jobs:
visual_language_chat_sample-ubuntu-llava:
runs-on: ubuntu-22.04-16-cores
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: actions/setup-python@v4
with:
python-version: 3.11
- uses: ./.github/actions/install_openvino
with:
ov_link: ${{ env.l_u22_ov_link }}
- uses: ./.github/actions/build_app
with:
build_target: 'visual_language_chat py_openvino_genai'
- uses: ./.github/actions/install_python_deps
- name: Download and convert model
run: |
source ./ov/setupvars.sh
optimum-cli export openvino --model ${{ inputs.model_id }} ./${{ inputs.model_dir }}
- name: Download images
run: |
wget https://llava-vl.github.io/static/images/monalisa.jpg
- name: Run visual_language_chat C++ sample
run: >
source ./ov/setupvars.sh
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./${{ inputs.model_dir }} monalisa.jpg
<<< $'Who drew this painting?\nWhen did the painter live?'
timeout-minutes: 4
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
!__init__.py
!__main__.py

# don't skip GitHub Actions files and directories
!.github/**

# developer tools
*.idea
.vscode
Expand Down
34 changes: 30 additions & 4 deletions src/cpp/src/llm_pipeline_static.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,26 @@

namespace {

uint32_t align_to(uint32_t value, uint32_t alignment) {
return (value + alignment - 1) & ~(alignment - 1);
}

enum class GenerateHint {
FAST_COMPILE,
BEST_PERF
};

GenerateHint str_to_hint(const std::string& str) {
if (str == "FAST_COMPILE") {
return GenerateHint::FAST_COMPILE;
}
if (str == "BEST_PERF") {
return GenerateHint::BEST_PERF;
}
OPENVINO_THROW("Unsupported \"GENERATE_HINT\" provided: " +
str + ". Please select either \"FAST_COMPILE\" or \"BEST_PERF\".");
}

std::shared_ptr<ov::Model> cvt_kvcache_to_fp16(const std::shared_ptr<ov::Model>& model) {
ov::preprocess::PrePostProcessor ppp(model);

Expand Down Expand Up @@ -275,8 +295,12 @@ ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model,
}

ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model,
const std::optional<NPUDesc>& npudesc) {
const std::optional<NPUDesc>& npudesc,
const GenerateHint hint) {
auto config = get_default_common_config(model);
if (hint == GenerateHint::BEST_PERF) {
config.emplace("NPUW_ONLINE_PIPELINE", "NONE");
}
// NB: Unconditionally set for generation model
config.emplace("NPUW_DQ", "YES");
if (npudesc.has_value() && npudesc->arch == "4000") {
Expand Down Expand Up @@ -404,8 +428,8 @@ void StaticLLMPipeline::setupAndCompileModels(
m_prefill_model = m_kvcache_model->clone();
m_prefill_model->set_friendly_name(m_kvcache_model->get_friendly_name() + "_prefill");
// (7) Reshape both models to static shape
const uint32_t kMaxPromptLen = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(1024u);
const uint32_t kMinResponseLen = pop_int_and_cast(properties, "MIN_RESPONSE_LEN").value_or(128u);
const uint32_t kMaxPromptLen = align_to(pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(1024u), 64u);
const uint32_t kMinResponseLen = align_to(pop_int_and_cast(properties, "MIN_RESPONSE_LEN").value_or(128u), 64u);
KVAxesPosition axes = get_kv_axes(get_model_type_from_json(models_path / "config.json"));
m_kvcache_desc = KVCacheDesc { kMaxPromptLen, kMaxPromptLen + kMinResponseLen, 0u, axes.seq_len };
reshape_to_static(m_prefill_model, m_kvcache_desc.max_prompt_size, m_kvcache_desc.max_prompt_size, axes);
Expand All @@ -414,8 +438,10 @@ void StaticLLMPipeline::setupAndCompileModels(
auto prefill_config = pop_or_default(
properties, "PREFILL_CONFIG", get_default_prefill_config(m_prefill_model, npudesc)
);
// NB: GENERATE_HINT is only applicable for default generate config!
auto generate_hint = str_to_hint(pop_or_default<std::string>(properties, "GENERATE_HINT", "FAST_COMPILE"));
auto generate_config = pop_or_default(
properties, "GENERATE_CONFIG", get_default_generate_config(m_kvcache_model, npudesc)
properties, "GENERATE_CONFIG", get_default_generate_config(m_kvcache_model, npudesc, generate_hint)
);
merge_config_with(prefill_config, properties);
merge_config_with(generate_config, properties);
Expand Down
Loading

0 comments on commit 747c5d2

Please sign in to comment.