-
Notifications
You must be signed in to change notification settings - Fork 200
/
Copy pathov_genai_test_utils.py
191 lines (150 loc) · 6.93 KB
/
ov_genai_test_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import pathlib
import os
import pytest
import functools
import openvino
import openvino_tokenizers
import openvino_genai as ov_genai
from typing import List, Tuple
from pathlib import Path
import shutil
import json
import openvino_genai as ov_genai
from common import get_default_properties
def get_models_list():
precommit_models = [
"katuni4ka/tiny-random-phi3",
]
nightly_models = [
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"facebook/opt-125m",
"microsoft/phi-1_5",
"microsoft/phi-2",
"THUDM/chatglm3-6b",
"Qwen/Qwen2-0.5B-Instruct",
"Qwen/Qwen-7B-Chat",
"Qwen/Qwen1.5-7B-Chat",
"argilla/notus-7b-v1",
"HuggingFaceH4/zephyr-7b-beta",
"ikala/redpajama-3b-chat",
"mistralai/Mistral-7B-v0.1",
# "meta-llama/Llama-2-7b-chat-hf", # Cannot be downloaded without access token
# "google/gemma-2b-it", # Cannot be downloaded without access token.
# "google/gemma-7b-it", # Cannot be downloaded without access token.
"meta-llama/Llama-2-13b-chat-hf",
"meta-llama/Meta-Llama-3-8B-Instruct",
"openlm-research/open_llama_3b",
"openlm-research/open_llama_3b_v2",
"openlm-research/open_llama_7b",
"databricks/dolly-v2-12b",
"databricks/dolly-v2-3b",
]
if pytest.run_marker == "precommit":
model_ids = precommit_models
else:
model_ids = nightly_models
if pytest.selected_model_ids:
model_ids = [model_id for model_id in model_ids if model_id in pytest.selected_model_ids.split(' ')]
# pytest.set_trace()
prefix = pathlib.Path(os.getenv('GENAI_MODELS_PATH_PREFIX', ''))
return [(model_id, prefix / model_id.split('/')[1]) for model_id in model_ids]
def get_chat_models_list():
precommit_models = [
"Qwen/Qwen2-0.5B-Instruct",
]
nightly_models = [
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"meta-llama/Meta-Llama-3-8B-Instruct",
"meta-llama/Llama-2-7b-chat-hf",
# "google/gemma-2b-it", # Cannot be downloaded without access token
# "google/gemma-7b-it", # Cannot be downloaded without access token
]
if pytest.run_marker == "precommit":
model_ids = precommit_models
else:
model_ids = nightly_models
prefix = pathlib.Path(os.getenv('GENAI_MODELS_PATH_PREFIX', ''))
return [(model_id, prefix / model_id.split('/')[1]) for model_id in model_ids]
@functools.lru_cache(1)
def read_model(params, **tokenizer_kwargs):
model_id, models_path = params
from optimum.intel.openvino import OVModelForCausalLM
from transformers import AutoTokenizer
hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
if (models_path / "openvino_model.xml").exists():
opt_model = OVModelForCausalLM.from_pretrained(models_path, trust_remote_code=True,
compile=False, device='CPU', ov_config=get_default_properties())
else:
ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(hf_tokenizer,
with_detokenizer=True,
**tokenizer_kwargs)
openvino.save_model(ov_tokenizer, models_path / "openvino_tokenizer.xml")
openvino.save_model(ov_detokenizer, models_path / "openvino_detokenizer.xml")
# to store tokenizer config jsons with special tokens
hf_tokenizer.save_pretrained(models_path)
opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True,
compile=False, device='CPU', load_in_8bit=False, ov_config=get_default_properties())
opt_model.generation_config.save_pretrained(models_path)
opt_model.config.save_pretrained(models_path)
opt_model.save_pretrained(models_path)
return (
model_id,
models_path,
hf_tokenizer,
opt_model,
ov_genai.LLMPipeline(models_path, 'CPU', ENABLE_MMAP=False, **get_default_properties()),
)
@pytest.fixture(scope="module")
def model_tmp_path(tmpdir_factory):
model_id, models_path, _, _, _ = read_model(get_models_list()[0])
temp_path = tmpdir_factory.mktemp(model_id.replace('/', '_'))
# copy openvino converted model and tokenizers
for pattern in ['*.xml', '*.bin']:
for src_file in models_path.glob(pattern):
if src_file.is_file():
shutil.copy(src_file, temp_path / src_file.name)
yield model_id, Path(temp_path)
@pytest.fixture(scope="module")
def model_tokenizers_tmp_path(tmpdir_factory):
model_id, models_path, _, _, _ = read_model(get_models_list()[0])
temp_path = tmpdir_factory.mktemp(model_id.replace('/', '_'))
# If tokens were not found in IR, it fallback to reading from config.
# There was no easy way to add tokens to IR in tests, so we remove them
# and set tokens in configs and to check if they are read and validated correctly.
import openvino as ov
core = ov.Core()
# copy openvino converted model and tokenizers
for pattern in ['*.xml', '*.bin']:
for src_file in models_path.glob(pattern):
# Update files if they are openvino_tokenizer.xml or openvino_detokenizer.xml
if src_file.name in ['openvino_tokenizer.xml', 'openvino_detokenizer.xml']:
if src_file.exists():
# Load the XML content
ov_model = core.read_model(src_file)
# Add empty rt_info so that tokens will be read from config instead of IR
ov_model.set_rt_info("pad_token_id", "")
ov_model.set_rt_info("eos_token_id", "")
ov_model.set_rt_info("chat_template", "")
ov.save_model(ov_model, str(temp_path / src_file.name))
if src_file in ['openvino_tokenizer.bin', 'openvino_detokenizer.bin']:
continue
if src_file.is_file():
shutil.copy(src_file, temp_path / src_file.name)
yield model_id, Path(temp_path)
def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
# Load LLMPipeline where all configs are cleared.
# remove existing jsons from previous tests
for json_file in temp_path.glob("*.json"):
json_file.unlink()
for config_json, config_name in configs:
with (temp_path / config_name).open('w') as f:
json.dump(config_json, f)
ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU', **get_default_properties())
for _, config_name in configs:
os.remove(temp_path / config_name)
return ov_pipe
@functools.lru_cache(1)
def get_continuous_batching(path):
return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig(), **get_default_properties())