Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add batch 4 nightly-perf-test #11332

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 56 additions & 12 deletions .github/workflows/llm_performance_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ on:
schedule:
- cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China
# please uncomment it for PR tests
# pull_request:
# branches: [main]
# paths:
# - ".github/workflows/llm_performance_tests.yml"
# - "python/llm/test/benchmark/**"
# - "python/llm/dev/benchmark/all-in-one/**"
pull_request:
branches: [main]
paths:
- ".github/workflows/llm_performance_tests.yml"
- "python/llm/test/benchmark/**"
- "python/llm/dev/benchmark/all-in-one/**"
workflow_dispatch:
inputs:
arc:
Expand Down Expand Up @@ -49,7 +49,7 @@ jobs:
# uses: ./.github/workflows/llm-binary-build.yml

llm-performance-test-on-arc:
if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.arc ) || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
# if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.arc ) || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
# needs: llm-cpp-build # please uncomment it for PR tests
strategy:
fail-fast: false
Expand Down Expand Up @@ -96,11 +96,11 @@ jobs:
shell: bash
run: |
pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/
test_version_date=`date -d 'yesterday' '+%Y%m%d'`
if ! pip show ipex-llm | grep $test_version_date; then
echo "Did not install ipex-llm with excepted version $test_version_date"
exit 1
fi
# test_version_date=`date -d 'yesterday' '+%Y%m%d'`
# if ! pip show ipex-llm | grep $test_version_date; then
# echo "Did not install ipex-llm with excepted version $test_version_date"
# exit 1
# fi

- name: Test installed xpu version
shell: bash
Expand All @@ -120,6 +120,7 @@ jobs:
cd python/llm/dev/benchmark/all-in-one
mkdir test_batch1
mkdir test_batch2
mkdir test_batch4
# batch_size 1
# hide time info
sed -i 's/str(end - st)/"xxxxxx"/g' run.py
Expand All @@ -135,6 +136,14 @@ jobs:
sed -i 's/batch1/batch2/g' run.py
python run.py
mv *.csv test_batch2
# batch_size 4
cd ../../../../../
cp python/llm/test/benchmark/arc-perf-test-batch4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
cd python/llm/dev/benchmark/all-in-one
# change csv name
sed -i 's/batch2/batch4/g' run.py
python run.py
mv *.csv test_batch4

- name: Test on xpu(transformers==4.37.0)
shell: bash
Expand All @@ -159,6 +168,14 @@ jobs:
sed -i 's/batch1/batch2/g' run.py
python run.py
mv *.csv test_batch2
# batch_size 4
cd ../../../../../
cp python/llm/test/benchmark/arc-perf-transformers-437-batch4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
cd python/llm/dev/benchmark/all-in-one
# change csv name
sed -i 's/batch2/batch4/g' run.py
python run.py
mv *.csv test_batch4

- name: Concat csv and generate html
shell: bash
Expand All @@ -185,6 +202,17 @@ jobs:
done
cd ../../../../test/benchmark
python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_2
# batch_size 4
cd ../../../../
cd python/llm/dev/benchmark/all-in-one/test_batch4
python ../../../../test/benchmark/concat_csv.py
for file in *.csv; do
if [[ $file != *test* ]]; then
cp "$file" $CSV_SAVE_PATH/batch_size_4
fi
done
cd ../../../../test/benchmark
python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_4

- name: Merge and sort csv files of multiple batches and generate html
shell: bash
Expand All @@ -204,6 +232,12 @@ jobs:
cp "$file" ../../../../test/benchmark/merged_temp
fi
done
cd ../test_batch4
for file in *.csv; do
if [[ $file != *test* ]]; then
cp "$file" ../../../../test/benchmark/merged_temp
fi
done
cd ../../../../test/benchmark
python merge_csv_batch.py -f ./merged_temp
cd merged_temp
Expand Down Expand Up @@ -244,6 +278,16 @@ jobs:
fi
cd ../
rm -r test_batch2
# batch_size 4
cd test_batch4
python ../../../../test/benchmark/check_results.py -c test1 -y ../../../../test/benchmark/arc-perf-test-batch4.yaml
python ../../../../test/benchmark/check_results.py -c test2 -y ../../../../test/benchmark/arc-perf-transformers-437-batch4.yaml
find . -name "*test*.csv" -delete
if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then
curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
fi
cd ../
rm -r test_batch4


llm-performance-test-on-spr:
Expand Down
38 changes: 19 additions & 19 deletions python/llm/test/benchmark/arc-perf-test-batch2.yaml
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
repo_id:
- 'meta-llama/Llama-2-7b-chat-hf'
- 'meta-llama/Llama-2-13b-chat-hf'
- 'THUDM/chatglm2-6b'
- 'THUDM/chatglm3-6b-4bit'
- 'tiiuae/falcon-7b-instruct-with-patch'
- 'mosaicml/mpt-7b-chat'
- 'redpajama/gptneox-7b-redpajama-bf16'
- 'bigcode/starcoder-15.5b-4bit'
- 'databricks/dolly-v1-6b'
- 'databricks/dolly-v2-7b'
# - 'meta-llama/Llama-2-13b-chat-hf'
# - 'THUDM/chatglm2-6b'
# - 'THUDM/chatglm3-6b-4bit'
# - 'tiiuae/falcon-7b-instruct-with-patch'
# - 'mosaicml/mpt-7b-chat'
# - 'redpajama/gptneox-7b-redpajama-bf16'
# - 'bigcode/starcoder-15.5b-4bit'
# - 'databricks/dolly-v1-6b'
# - 'databricks/dolly-v2-7b'
# - 'databricks/dolly-v2-12b'
- 'internlm/internlm-chat-7b'
- 'Qwen/Qwen-7B-Chat'
- 'BAAI/AquilaChat-7B'
- 'baichuan-inc/Baichuan2-7B-Chat'
- 'baichuan-inc/Baichuan2-13B-Chat-4bit'
- 'bigscience/bloomz-7b1'
# - 'internlm/internlm-chat-7b'
# - 'Qwen/Qwen-7B-Chat'
# - 'BAAI/AquilaChat-7B'
# - 'baichuan-inc/Baichuan2-7B-Chat'
# - 'baichuan-inc/Baichuan2-13B-Chat-4bit'
# - 'bigscience/bloomz-7b1'
# - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+
- 'mistralai/Mistral-7B-v0.1'
# - 'mistralai/Mistral-7B-v0.1'
local_model_hub: '/mnt/disk1/models'
warm_up: 1
num_trials: 3
Expand All @@ -32,7 +32,7 @@ test_api:
- "transformer_int4_fp16_gpu" # on Intel GPU
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
exclude:
- 'bigcode/starcoder-15.5b-4bit:2048'
# - 'bigcode/starcoder-15.5b-4bit:2048'
# - 'databricks/dolly-v2-12b:2048'
- 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048'
- 'bigscience/bloomz-7b1:2048'
# - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048'
# - 'bigscience/bloomz-7b1:2048'
16 changes: 16 additions & 0 deletions python/llm/test/benchmark/arc-perf-test-batch4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
repo_id:
- 'meta-llama/Llama-2-7b-chat-hf'
local_model_hub: '/mnt/disk1/models'
warm_up: 1
num_trials: 3
num_beams: 1 # default to greedy search
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
batch_size: 4 # default to 1
in_out_pairs:
- '32-32'
- '1024-128'
- '2048-256'
test_api:
- "transformer_int4_fp16_gpu" # on Intel GPU
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)

36 changes: 18 additions & 18 deletions python/llm/test/benchmark/arc-perf-test.yaml
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
repo_id:
- 'meta-llama/Llama-2-7b-chat-hf'
- 'meta-llama/Llama-2-13b-chat-hf'
- 'THUDM/chatglm2-6b'
- 'THUDM/chatglm3-6b-4bit'
- 'tiiuae/falcon-7b-instruct-with-patch'
- 'mosaicml/mpt-7b-chat'
- 'redpajama/gptneox-7b-redpajama-bf16'
- 'bigcode/starcoder-15.5b-4bit'
- 'databricks/dolly-v1-6b'
- 'databricks/dolly-v2-7b'
# - 'meta-llama/Llama-2-13b-chat-hf'
# - 'THUDM/chatglm2-6b'
# - 'THUDM/chatglm3-6b-4bit'
# - 'tiiuae/falcon-7b-instruct-with-patch'
# - 'mosaicml/mpt-7b-chat'
# - 'redpajama/gptneox-7b-redpajama-bf16'
# - 'bigcode/starcoder-15.5b-4bit'
# - 'databricks/dolly-v1-6b'
# - 'databricks/dolly-v2-7b'
# - 'databricks/dolly-v2-12b'
- 'internlm/internlm-chat-7b'
- 'Qwen/Qwen-7B-Chat'
- 'BAAI/AquilaChat-7B'
- 'baichuan-inc/Baichuan2-7B-Chat'
- 'baichuan-inc/Baichuan2-13B-Chat-4bit'
- 'bigscience/bloomz-7b1'
# - 'internlm/internlm-chat-7b'
# - 'Qwen/Qwen-7B-Chat'
# - 'BAAI/AquilaChat-7B'
# - 'baichuan-inc/Baichuan2-7B-Chat'
# - 'baichuan-inc/Baichuan2-13B-Chat-4bit'
# - 'bigscience/bloomz-7b1'
# - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+
- 'mistralai/Mistral-7B-v0.1'
# - 'mistralai/Mistral-7B-v0.1'
local_model_hub: '/mnt/disk1/models'
warm_up: 1
num_trials: 3
Expand All @@ -34,5 +34,5 @@ cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu w
exclude:
# - 'fnlp/moss-moon-003-sft-4bit:1024'
# - 'fnlp/moss-moon-003-sft-4bit:2048'
- 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048'
- 'bigscience/bloomz-7b1:2048'
# - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048'
# - 'bigscience/bloomz-7b1:2048'
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# For the models that require transformers 4.37.0
repo_id:
- 'Qwen/Qwen1.5-7B-Chat'
- 'microsoft/phi-2'
- 'microsoft/Phi-3-mini-4k-instruct'
- 'meta-llama/Meta-Llama-3-8B-Instruct'
# - 'microsoft/phi-2'
# - 'microsoft/Phi-3-mini-4k-instruct'
# - 'meta-llama/Meta-Llama-3-8B-Instruct'
local_model_hub: '/mnt/disk1/models'
warm_up: 1
num_trials: 3
Expand Down
16 changes: 16 additions & 0 deletions python/llm/test/benchmark/arc-perf-transformers-437-batch4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# For the models that require transformers 4.37.0
repo_id:
- 'Qwen/Qwen1.5-7B-Chat'
local_model_hub: '/mnt/disk1/models'
warm_up: 1
num_trials: 3
num_beams: 1 # default to greedy search
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
batch_size: 4 # default to 1
in_out_pairs:
- '32-32'
- '1024-128'
- '2048-256'
test_api:
- "transformer_int4_fp16_gpu" # on Intel GPU
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
6 changes: 3 additions & 3 deletions python/llm/test/benchmark/arc-perf-transformers-437.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# For the models that require transformers 4.37.0
repo_id:
- 'Qwen/Qwen1.5-7B-Chat'
- 'microsoft/phi-2'
- 'microsoft/Phi-3-mini-4k-instruct'
- 'meta-llama/Meta-Llama-3-8B-Instruct'
# - 'microsoft/phi-2'
# - 'microsoft/Phi-3-mini-4k-instruct'
# - 'meta-llama/Meta-Llama-3-8B-Instruct'
local_model_hub: '/mnt/disk1/models'
warm_up: 1
num_trials: 3
Expand Down
4 changes: 2 additions & 2 deletions python/llm/test/benchmark/update_html_in_parent_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
def update_html_in_parent_folder(folder_path):

current_folder = Path(folder_path)
folder_list = [current_folder/'batch_size_1/',current_folder/'batch_size_2/',current_folder/'merged/']
folder_list = [current_folder/'batch_size_1/',current_folder/'batch_size_2/',current_folder/'batch_size_4/',current_folder/'merged/']

# List all html files under current folder and delete them
for html_file in current_folder.glob('*.html'):
html_file.unlink()
for folder in folder_list:
# Find latest html file under batch1/batch2/merged folders
# Find latest html file under batch1/batch2/batch4/merged folders
latest_html_file = max(Path(folder).glob('*.html'), key=os.path.getctime, default=None)
# Copy the latest html file to parent folder
if latest_html_file is not None:
Expand Down