-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update tests for transformers 4.36 #10858
Changes from 56 commits
f9d0107
a86c35f
8a6e0f2
d658968
66639dc
e77cee4
f1d6944
c2fa88b
b255ac5
a82199a
7e7d09c
8f1c355
0e7f73a
e0c4407
c51b7ea
a442768
5563f28
b575c48
cc0ed30
04333ae
8ecdeac
49a6933
e52180c
9217662
3ad25b7
8ee92d2
45d2383
e968252
d59f68c
f44e9a4
f9ece00
bf8aece
98789db
d459a82
51134d4
39c104b
5d32b59
da72111
687ba8b
8099a2c
270ecb8
bc847bf
0fcaa40
26aa194
22d0bf6
65ea875
4f98a38
c64ec33
9c9e92d
4b04c45
8638cea
a533ae8
4af1445
1f91353
0696491
6922dc7
6417726
e30a397
ec2cd5e
bc1fec0
55fee3b
dcd8115
936fafe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,7 +99,7 @@ jobs: | |
echo "LLAMA_ORIGIN_PATH=${ORIGIN_DIR}/llama-7b-hf" >> "$GITHUB_ENV" | ||
echo "BLOOM_ORIGIN_PATH=${ORIGIN_DIR}/bloom-7b1" >> "$GITHUB_ENV" | ||
echo "ORIGINAL_CHATGLM2_6B_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV" | ||
echo "ORIGINAL_REPLIT_CODE_PATH=${ORIGIN_DIR}/replit-code-v1-3b" >> "$GITHUB_ENV" | ||
echo "ORIGINAL_CODESHELL_7B_PATH=${ORIGIN_DIR}/CodeShell-7B-Chat" >> "$GITHUB_ENV" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the reason for this change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. replit-code-v1-3b cannot run with transformers 4.36, so another code generation model is used here instead :) |
||
echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV" | ||
echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV" | ||
echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV" | ||
|
@@ -157,13 +157,13 @@ jobs: | |
# fi | ||
if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then | ||
echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..." | ||
echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR" | ||
echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR" | ||
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR | ||
fi | ||
if [ ! -d $ORIGINAL_REPLIT_CODE_PATH ]; then | ||
echo "Directory $ORIGINAL_REPLIT_CODE_PATH not found. Downloading from FTP server..." | ||
echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/replit-code-v1-3b -P $ORIGIN_DIR" | ||
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/replit-code-v1-3b -P $ORIGIN_DIR | ||
if [ ! -d $ORIGINAL_CODESHELL_7B_PATH ]; then | ||
echo "Directory $ORIGINAL_CODESHELL_7B_PATH not found. Downloading from FTP server..." | ||
echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/CodeShell-7B-Chat -P $ORIGIN_DIR" | ||
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/CodeShell-7B-Chat -P $ORIGIN_DIR | ||
fi | ||
if [ ! -d $ORIGINAL_WHISPER_TINY_PATH ]; then | ||
echo "Directory $ORIGINAL_WHISPER_TINY_PATH not found. Downloading from FTP server..." | ||
|
@@ -226,14 +226,15 @@ jobs: | |
shell: bash | ||
run: | | ||
pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface | ||
pip install transformers==4.36.0 | ||
pip install transformers==4.36.2 | ||
pip install "pydantic>=2.0.0" | ||
bash python/llm/test/run-llm-llamaindex-tests.sh | ||
- name: Run sentence-transformers uninstallation | ||
if: ${{ always() }} | ||
shell: bash | ||
run: | | ||
pip uninstall sentence-transformers -y || true | ||
|
||
llm-unit-test-on-arc: | ||
needs: [setup-python-version, llm-cpp-build] | ||
strategy: | ||
|
@@ -363,8 +364,6 @@ jobs: | |
fi | ||
python -m pip install datasets librosa soundfile einops tiktoken transformers_stream_generator | ||
bash python/llm/test/run-llm-inference-tests-gpu.sh | ||
python -m pip install transformers==4.34.0 | ||
bash python/llm/test/run-llm-inference-tests-gpu-434.sh | ||
|
||
- name: Run LLM example tests | ||
shell: bash | ||
|
@@ -410,7 +409,7 @@ jobs: | |
pip install --pre --upgrade ipex-llm[xpu_2.0] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ | ||
source /home/arda/intel/oneapi/setvars.sh | ||
fi | ||
pip install transformers==4.36.0 | ||
pip install transformers==4.36.2 | ||
pip install "pydantic>=2.0.0" | ||
bash python/llm/test/run-llm-llamaindex-tests-gpu.sh | ||
- name: Run sentence-transformers uninstallation | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,13 +10,14 @@ repo_id: | |
- 'databricks/dolly-v1-6b' | ||
- 'databricks/dolly-v2-7b' | ||
- 'databricks/dolly-v2-12b' | ||
- 'internlm/internlm-chat-7b-8k' | ||
- 'internlm/internlm-chat-7b' | ||
- 'Qwen/Qwen-7B-Chat' | ||
- 'BAAI/AquilaChat-7B' | ||
- 'baichuan-inc/Baichuan2-7B-Chat' | ||
- 'baichuan-inc/Baichuan2-13B-Chat-4bit' | ||
- 'bigscience/bloomz-7b1' | ||
- 'fnlp/moss-moon-003-sft-4bit' | ||
# - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this supposed to be fixed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the tokenizer issue of moss-moon-003-sft model, because moss-moon-003-sft haven't fix the tokenizer issue to compatible with transformers 4.34+, we can not fix it. See the issue: https://github.com/analytics-zoo/nano/issues/1145 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then shall we keep transformers 4.31 in the test as well for this model? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know whether we need to keep such test for transformers 4.31 since ipex-llm would be updated to support transformers 4.36. @jason-dai Do we need to keep tests which only works on transformers 4.31? |
||
- 'mistralai/Mistral-7B-v0.1' | ||
local_model_hub: '/mnt/disk1/models' | ||
warm_up: 1 | ||
num_trials: 3 | ||
|
@@ -31,7 +32,7 @@ test_api: | |
- "transformer_int4_gpu" # on Intel GPU | ||
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) | ||
exclude: | ||
- 'fnlp/moss-moon-003-sft-4bit:1024' | ||
- 'fnlp/moss-moon-003-sft-4bit:2048' | ||
# - 'fnlp/moss-moon-003-sft-4bit:1024' | ||
# - 'fnlp/moss-moon-003-sft-4bit:2048' | ||
- 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' | ||
- 'bigscience/bloomz-7b1:2048' |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,16 +49,16 @@ def test_transformers_auto_model_int4(self): | |
print('Prompt:', input_str) | ||
print('Output:', output_str) | ||
print(f'Inference time: {end-st} s') | ||
res = 'Paris' in output_str | ||
res = 'Paris' in output_str | ||
self.assertTrue(res) | ||
|
||
def test_transformers_auto_model_for_causal_lm_int4(self): | ||
model_path = os.environ.get('ORIGINAL_REPLIT_CODE_PATH') | ||
model_path = os.environ.get('ORIGINAL_CODESHELL_7B_PATH') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the reason for this change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. replit-code-v1-3b cannot run with transformers 4.36, so another code generation model is used here instead :) |
||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) | ||
input_str = 'def hello():\n print("hello world")\n' | ||
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) | ||
with torch.inference_mode(): | ||
|
||
st = time.time() | ||
input_ids = tokenizer.encode(input_str, return_tensors="pt") | ||
output = model.generate(input_ids, do_sample=False, max_new_tokens=32) | ||
|
@@ -67,7 +67,7 @@ def test_transformers_auto_model_for_causal_lm_int4(self): | |
print('Prompt:', input_str) | ||
print('Output:', output_str) | ||
print(f'Inference time: {end-st} s') | ||
res = '\nhello()' in output_str | ||
res = '\nhello()' in output_str | ||
self.assertTrue(res) | ||
|
||
|
||
|
@@ -86,7 +86,7 @@ def test_transformers_auto_model_for_speech_seq2seq_int4(self): | |
predicted_ids = model.generate(input_features) | ||
# decode token ids to text | ||
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False) | ||
end = time.time() | ||
end = time.time() | ||
print('Output:', transcription) | ||
print(f'Inference time: {end-st} s') | ||
res = 'Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.' in transcription[0] | ||
|
@@ -108,22 +108,23 @@ def test_transformers_chatglm_for_causallm(self): | |
print('Prompt:', input_str) | ||
print('Output:', output_str) | ||
print(f'Inference time: {end-st} s') | ||
res = 'Paris' in output_str | ||
res = 'Paris' in output_str | ||
self.assertTrue(res) | ||
|
||
@pytest.mark.parametrize('prompt, answer', [ | ||
('What is the capital of France?\n\n', 'Paris') | ||
]) | ||
@pytest.mark.parametrize('Model, Tokenizer, model_path',[ | ||
(AutoModel, AutoTokenizer, os.environ.get('ORIGINAL_CHATGLM2_6B_PATH')), | ||
(AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH')), | ||
]) | ||
def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): | ||
tokenizer = Tokenizer.from_pretrained(model_path, trust_remote_code=True) | ||
model = Model.from_pretrained(model_path, | ||
load_in_4bit=True, | ||
optimize_model=True, | ||
trust_remote_code=True) | ||
|
||
with tempfile.TemporaryDirectory() as tempdir: | ||
model.save_low_bit(tempdir) | ||
loaded_model = Model.load_low_bit(tempdir, | ||
|
@@ -143,9 +144,10 @@ def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): | |
(AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA_ORIGIN_PATH'), prompt), | ||
(AutoModelForCausalLM, AutoTokenizer, os.environ.get('BLOOM_ORIGIN_PATH'), prompt), | ||
(AutoModel, AutoTokenizer, os.environ.get('ORIGINAL_CHATGLM2_6B_PATH'), prompt), | ||
(AutoModelForCausalLM, AutoTokenizer, os.environ.get('ORIGINAL_REPLIT_CODE_PATH'), prompt) | ||
(AutoModelForCausalLM, AutoTokenizer, os.environ.get('ORIGINAL_CODESHELL_7B_PATH'), prompt), | ||
(AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH'), prompt) | ||
]) | ||
|
||
def test_optimize_model(Model, Tokenizer, model_path, prompt): | ||
tokenizer = Tokenizer.from_pretrained(model_path, trust_remote_code=True) | ||
input_ids = tokenizer.encode(prompt, return_tensors="pt") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
revert this change to make this PR more clean.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reverted