diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 2d0a293e459..a546e017a16 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -13,12 +13,12 @@ on: schedule: - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China # please uncomment it for PR tests - # pull_request: - # branches: [main] - # paths: - # - ".github/workflows/llm_performance_tests.yml" - # - "python/llm/test/benchmark/**" - # - "python/llm/dev/benchmark/all-in-one/**" + pull_request: + branches: [main] + paths: + - ".github/workflows/llm_performance_tests.yml" + - "python/llm/test/benchmark/**" + - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: inputs: arc: @@ -49,7 +49,7 @@ jobs: # uses: ./.github/workflows/llm-binary-build.yml llm-performance-test-on-arc: - if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.arc ) || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + # if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.arc ) || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests # needs: llm-cpp-build # please uncomment it for PR tests strategy: fail-fast: false @@ -96,11 +96,11 @@ jobs: shell: bash run: | pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - test_version_date=`date -d 'yesterday' '+%Y%m%d'` - if ! pip show ipex-llm | grep $test_version_date; then - echo "Did not install ipex-llm with excepted version $test_version_date" - exit 1 - fi + # test_version_date=`date -d 'yesterday' '+%Y%m%d'` + # if ! pip show ipex-llm | grep $test_version_date; then + # echo "Did not install ipex-llm with excepted version $test_version_date" + # exit 1 + # fi - name: Test installed xpu version shell: bash @@ -120,6 +120,7 @@ jobs: cd python/llm/dev/benchmark/all-in-one mkdir test_batch1 mkdir test_batch2 + mkdir test_batch4 # batch_size 1 # hide time info sed -i 's/str(end - st)/"xxxxxx"/g' run.py @@ -135,6 +136,14 @@ jobs: sed -i 's/batch1/batch2/g' run.py python run.py mv *.csv test_batch2 + # batch_size 4 + cd ../../../../../ + cp python/llm/test/benchmark/arc-perf-test-batch4.yaml python/llm/dev/benchmark/all-in-one/config.yaml + cd python/llm/dev/benchmark/all-in-one + # change csv name + sed -i 's/batch2/batch4/g' run.py + python run.py + mv *.csv test_batch4 - name: Test on xpu(transformers==4.37.0) shell: bash @@ -159,6 +168,14 @@ jobs: sed -i 's/batch1/batch2/g' run.py python run.py mv *.csv test_batch2 + # batch_size 4 + cd ../../../../../ + cp python/llm/test/benchmark/arc-perf-transformers-437-batch4.yaml python/llm/dev/benchmark/all-in-one/config.yaml + cd python/llm/dev/benchmark/all-in-one + # change csv name + sed -i 's/batch2/batch4/g' run.py + python run.py + mv *.csv test_batch4 - name: Concat csv and generate html shell: bash @@ -185,6 +202,17 @@ jobs: done cd ../../../../test/benchmark python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_2 + # batch_size 4 + cd ../../../../ + cd python/llm/dev/benchmark/all-in-one/test_batch4 + python ../../../../test/benchmark/concat_csv.py + for file in *.csv; do + if [[ $file != *test* ]]; then + cp "$file" $CSV_SAVE_PATH/batch_size_4 + fi + done + cd ../../../../test/benchmark + python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_4 - name: Merge and sort csv files of multiple batches and generate html shell: bash @@ -204,6 +232,12 @@ jobs: cp "$file" ../../../../test/benchmark/merged_temp fi done + cd ../test_batch4 + for file in *.csv; do + if [[ $file != *test* ]]; then + cp "$file" ../../../../test/benchmark/merged_temp + fi + done cd ../../../../test/benchmark python merge_csv_batch.py -f ./merged_temp cd merged_temp @@ -244,6 +278,16 @@ jobs: fi cd ../ rm -r test_batch2 + # batch_size 4 + cd test_batch4 + python ../../../../test/benchmark/check_results.py -c test1 -y ../../../../test/benchmark/arc-perf-test-batch4.yaml + python ../../../../test/benchmark/check_results.py -c test2 -y ../../../../test/benchmark/arc-perf-transformers-437-batch4.yaml + find . -name "*test*.csv" -delete + if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then + curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ + fi + cd ../ + rm -r test_batch4 llm-performance-test-on-spr: diff --git a/python/llm/test/benchmark/arc-perf-test-batch2.yaml b/python/llm/test/benchmark/arc-perf-test-batch2.yaml index 90a8461d36b..a2dfe89b4f7 100644 --- a/python/llm/test/benchmark/arc-perf-test-batch2.yaml +++ b/python/llm/test/benchmark/arc-perf-test-batch2.yaml @@ -1,23 +1,23 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - - 'meta-llama/Llama-2-13b-chat-hf' - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b-4bit' - - 'tiiuae/falcon-7b-instruct-with-patch' - - 'mosaicml/mpt-7b-chat' - - 'redpajama/gptneox-7b-redpajama-bf16' - - 'bigcode/starcoder-15.5b-4bit' - - 'databricks/dolly-v1-6b' - - 'databricks/dolly-v2-7b' + # - 'meta-llama/Llama-2-13b-chat-hf' + # - 'THUDM/chatglm2-6b' + # - 'THUDM/chatglm3-6b-4bit' + # - 'tiiuae/falcon-7b-instruct-with-patch' + # - 'mosaicml/mpt-7b-chat' + # - 'redpajama/gptneox-7b-redpajama-bf16' + # - 'bigcode/starcoder-15.5b-4bit' + # - 'databricks/dolly-v1-6b' + # - 'databricks/dolly-v2-7b' # - 'databricks/dolly-v2-12b' - - 'internlm/internlm-chat-7b' - - 'Qwen/Qwen-7B-Chat' - - 'BAAI/AquilaChat-7B' - - 'baichuan-inc/Baichuan2-7B-Chat' - - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - - 'bigscience/bloomz-7b1' + # - 'internlm/internlm-chat-7b' + # - 'Qwen/Qwen-7B-Chat' + # - 'BAAI/AquilaChat-7B' + # - 'baichuan-inc/Baichuan2-7B-Chat' + # - 'baichuan-inc/Baichuan2-13B-Chat-4bit' + # - 'bigscience/bloomz-7b1' # - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+ - - 'mistralai/Mistral-7B-v0.1' + # - 'mistralai/Mistral-7B-v0.1' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 @@ -32,7 +32,7 @@ test_api: - "transformer_int4_fp16_gpu" # on Intel GPU cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) exclude: - - 'bigcode/starcoder-15.5b-4bit:2048' + # - 'bigcode/starcoder-15.5b-4bit:2048' # - 'databricks/dolly-v2-12b:2048' - - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' - - 'bigscience/bloomz-7b1:2048' \ No newline at end of file + # - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' + # - 'bigscience/bloomz-7b1:2048' \ No newline at end of file diff --git a/python/llm/test/benchmark/arc-perf-test-batch4.yaml b/python/llm/test/benchmark/arc-perf-test-batch4.yaml new file mode 100644 index 00000000000..f4e58dd098a --- /dev/null +++ b/python/llm/test/benchmark/arc-perf-test-batch4.yaml @@ -0,0 +1,16 @@ +repo_id: + - 'meta-llama/Llama-2-7b-chat-hf' +local_model_hub: '/mnt/disk1/models' +warm_up: 1 +num_trials: 3 +num_beams: 1 # default to greedy search +low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 4 # default to 1 +in_out_pairs: + - '32-32' + - '1024-128' + - '2048-256' +test_api: + - "transformer_int4_fp16_gpu" # on Intel GPU +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) + diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index 7fbf17d6fd1..fba6c611d74 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -1,23 +1,23 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - - 'meta-llama/Llama-2-13b-chat-hf' - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b-4bit' - - 'tiiuae/falcon-7b-instruct-with-patch' - - 'mosaicml/mpt-7b-chat' - - 'redpajama/gptneox-7b-redpajama-bf16' - - 'bigcode/starcoder-15.5b-4bit' - - 'databricks/dolly-v1-6b' - - 'databricks/dolly-v2-7b' + # - 'meta-llama/Llama-2-13b-chat-hf' + # - 'THUDM/chatglm2-6b' + # - 'THUDM/chatglm3-6b-4bit' + # - 'tiiuae/falcon-7b-instruct-with-patch' + # - 'mosaicml/mpt-7b-chat' + # - 'redpajama/gptneox-7b-redpajama-bf16' + # - 'bigcode/starcoder-15.5b-4bit' + # - 'databricks/dolly-v1-6b' + # - 'databricks/dolly-v2-7b' # - 'databricks/dolly-v2-12b' - - 'internlm/internlm-chat-7b' - - 'Qwen/Qwen-7B-Chat' - - 'BAAI/AquilaChat-7B' - - 'baichuan-inc/Baichuan2-7B-Chat' - - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - - 'bigscience/bloomz-7b1' + # - 'internlm/internlm-chat-7b' + # - 'Qwen/Qwen-7B-Chat' + # - 'BAAI/AquilaChat-7B' + # - 'baichuan-inc/Baichuan2-7B-Chat' + # - 'baichuan-inc/Baichuan2-13B-Chat-4bit' + # - 'bigscience/bloomz-7b1' # - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+ - - 'mistralai/Mistral-7B-v0.1' + # - 'mistralai/Mistral-7B-v0.1' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 @@ -34,5 +34,5 @@ cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu w exclude: # - 'fnlp/moss-moon-003-sft-4bit:1024' # - 'fnlp/moss-moon-003-sft-4bit:2048' - - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' - - 'bigscience/bloomz-7b1:2048' + # - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' + # - 'bigscience/bloomz-7b1:2048' diff --git a/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml b/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml index 116226387c0..c89a4988c75 100644 --- a/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml +++ b/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml @@ -1,9 +1,9 @@ # For the models that require transformers 4.37.0 repo_id: - 'Qwen/Qwen1.5-7B-Chat' - - 'microsoft/phi-2' - - 'microsoft/Phi-3-mini-4k-instruct' - - 'meta-llama/Meta-Llama-3-8B-Instruct' + # - 'microsoft/phi-2' + # - 'microsoft/Phi-3-mini-4k-instruct' + # - 'meta-llama/Meta-Llama-3-8B-Instruct' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/arc-perf-transformers-437-batch4.yaml b/python/llm/test/benchmark/arc-perf-transformers-437-batch4.yaml new file mode 100644 index 00000000000..e849e15adc7 --- /dev/null +++ b/python/llm/test/benchmark/arc-perf-transformers-437-batch4.yaml @@ -0,0 +1,16 @@ +# For the models that require transformers 4.37.0 +repo_id: + - 'Qwen/Qwen1.5-7B-Chat' +local_model_hub: '/mnt/disk1/models' +warm_up: 1 +num_trials: 3 +num_beams: 1 # default to greedy search +low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 4 # default to 1 +in_out_pairs: + - '32-32' + - '1024-128' + - '2048-256' +test_api: + - "transformer_int4_fp16_gpu" # on Intel GPU +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) \ No newline at end of file diff --git a/python/llm/test/benchmark/arc-perf-transformers-437.yaml b/python/llm/test/benchmark/arc-perf-transformers-437.yaml index 6756e126bd7..e3ab510adaf 100644 --- a/python/llm/test/benchmark/arc-perf-transformers-437.yaml +++ b/python/llm/test/benchmark/arc-perf-transformers-437.yaml @@ -1,9 +1,9 @@ # For the models that require transformers 4.37.0 repo_id: - 'Qwen/Qwen1.5-7B-Chat' - - 'microsoft/phi-2' - - 'microsoft/Phi-3-mini-4k-instruct' - - 'meta-llama/Meta-Llama-3-8B-Instruct' + # - 'microsoft/phi-2' + # - 'microsoft/Phi-3-mini-4k-instruct' + # - 'meta-llama/Meta-Llama-3-8B-Instruct' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/update_html_in_parent_folder.py b/python/llm/test/benchmark/update_html_in_parent_folder.py index dea237fa926..eb9cc97d493 100644 --- a/python/llm/test/benchmark/update_html_in_parent_folder.py +++ b/python/llm/test/benchmark/update_html_in_parent_folder.py @@ -24,13 +24,13 @@ def update_html_in_parent_folder(folder_path): current_folder = Path(folder_path) - folder_list = [current_folder/'batch_size_1/',current_folder/'batch_size_2/',current_folder/'merged/'] + folder_list = [current_folder/'batch_size_1/',current_folder/'batch_size_2/',current_folder/'batch_size_4/',current_folder/'merged/'] # List all html files under current folder and delete them for html_file in current_folder.glob('*.html'): html_file.unlink() for folder in folder_list: - # Find latest html file under batch1/batch2/merged folders + # Find latest html file under batch1/batch2/batch4/merged folders latest_html_file = max(Path(folder).glob('*.html'), key=os.path.getctime, default=None) # Copy the latest html file to parent folder if latest_html_file is not None: