Skip to content

Commit

Permalink
Arc Stable version test (#10087)
Browse files Browse the repository at this point in the history
* add batch_size in stable version test

* add batch_size in excludes

* add excludes for batch_size

* fix ci

* triger regression test

* fix xpu version

* disable ci

* address kai's comment

---------

Co-authored-by: Ariadne <[email protected]>
  • Loading branch information
Ricky-Ting and Ariadne330 authored Feb 6, 2024
1 parent 33b9e77 commit 36c9442
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 14 deletions.
66 changes: 53 additions & 13 deletions .github/workflows/llm_tests_for_stable_version_on_arc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,47 +61,87 @@ jobs:
- name: Run LLM install (all) test
uses: ./.github/actions/llm/setup-llm-env
with:
extra-dependency: "xpu"
extra-dependency: "xpu_2.1"

- name: Test installed xpu version
shell: bash
run: |
source /home/arda/intel/oneapi/setvars.sh
source /opt/intel/oneapi/setvars.sh
bash python/llm/test/run-llm-install-tests.sh
- name: Test on xpu (int4)
shell: bash
run: |
source /home/arda/intel/oneapi/setvars.sh
source /opt/intel/oneapi/setvars.sh
export USE_XETLA=OFF
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
mv python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
cd python/llm/dev/benchmark/all-in-one
# hide time info
sed -i 's/str(end - st)/"xxxxxx"/g' run.py
# batch_size = 1
sed -i '/batch_size/c\batch_size: 1' config.yaml
python run.py
cp ./*.csv $CSV_SAVE_PATH/int4
cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_1
rm ./*.csv
# batch_size = 2
sed -i '/batch_size/c\batch_size: 2' config.yaml
python run.py
cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_2
rm ./*.csv
# batch_size = 4
sed -i '/batch_size/c\batch_size: 4' config.yaml
python run.py
cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_4
rm ./*.csv
# batch_size = 8
sed -i '/batch_size/c\batch_size: 8' config.yaml
python run.py
cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_8
rm ./*.csv
cd ../../../test/benchmark
python -m pip install pandas==1.5.3
python csv_to_html.py -f $CSV_SAVE_PATH/int4 -b $CSV_SAVE_PATH/int4/transformer_int4_gpu-results-1baseline.csv -t 5.0
python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_1 -b $CSV_SAVE_PATH/int4/batch_size_1/transformer_int4_gpu-results-1baseline.csv -t 5.0
python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_2 -b $CSV_SAVE_PATH/int4/batch_size_2/transformer_int4_gpu-results-1baseline.csv -t 5.0
python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_4 -b $CSV_SAVE_PATH/int4/batch_size_4/transformer_int4_gpu-results-1baseline.csv -t 5.0
python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_8 -b $CSV_SAVE_PATH/int4/batch_size_8/transformer_int4_gpu-results-1baseline.csv -t 5.0
- name: Test on xpu (fp8)
shell: bash
run: |
source /home/arda/intel/oneapi/setvars.sh
source /opt/intel/oneapi/setvars.sh
export USE_XETLA=OFF
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
mv python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml python/llm/dev/benchmark/all-in-one/config.yaml
cd python/llm/dev/benchmark/all-in-one
# hide time info
sed -i 's/str(end - st)/"xxxxxx"/g' run.py
# batch_size = 1
sed -i '/batch_size/c\batch_size: 1' config.yaml
python run.py
cp ./*.csv $CSV_SAVE_PATH/fp8
cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_1
rm ./*.csv
# batch_size = 2
sed -i '/batch_size/c\batch_size: 2' config.yaml
python run.py
cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_2
rm ./*.csv
# batch_size = 4
sed -i '/batch_size/c\batch_size: 4' config.yaml
python run.py
cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_4
rm ./*.csv
# batch_size = 8
sed -i '/batch_size/c\batch_size: 8' config.yaml
python run.py
cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_8
rm ./*.csv
cd ../../../test/benchmark
python -m pip install pandas==1.5.3
python csv_to_html.py -f $CSV_SAVE_PATH/fp8 -b $CSV_SAVE_PATH/fp8/transformer_int4_gpu-results-1baseline.csv -t 5.0
python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_1 -b $CSV_SAVE_PATH/fp8/batch_size_1/transformer_int4_gpu-results-1baseline.csv -t 5.0
python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_2 -b $CSV_SAVE_PATH/fp8/batch_size_2/transformer_int4_gpu-results-1baseline.csv -t 5.0
python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_4 -b $CSV_SAVE_PATH/fp8/batch_size_4/transformer_int4_gpu-results-1baseline.csv -t 5.0
python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_8 -b $CSV_SAVE_PATH/fp8/batch_size_8/transformer_int4_gpu-results-1baseline.csv -t 5.0
llm-stress-test-on-arc:
needs: llm-perf-regression-test-on-arc
Expand Down Expand Up @@ -143,18 +183,18 @@ jobs:
- name: Run LLM install (all) test
uses: ./.github/actions/llm/setup-llm-env
with:
extra-dependency: "xpu"
extra-dependency: "xpu_2.1"

- name: Test installed xpu version
shell: bash
run: |
source /home/arda/intel/oneapi/setvars.sh
source /opt/intel/oneapi/setvars.sh
bash python/llm/test/run-llm-install-tests.sh
- name: Test on xpu (int4)
shell: bash
run: |
source /home/arda/intel/oneapi/setvars.sh
source /opt/intel/oneapi/setvars.sh
export USE_XETLA=OFF
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
mv python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
Expand All @@ -171,7 +211,7 @@ jobs:
- name: Test on xpu (fp8)
shell: bash
run: |
source /home/arda/intel/oneapi/setvars.sh
source /opt/intel/oneapi/setvars.sh
export USE_XETLA=OFF
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
mv python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml python/llm/dev/benchmark/all-in-one/config.yaml
Expand Down
3 changes: 2 additions & 1 deletion python/llm/dev/benchmark/all-in-one/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,7 +956,8 @@ def run_transformer_autocast_bf16( repo_id,
if excludes:
for in_out in conf['in_out_pairs']:
model_id_input = model + ':' + in_out.split('-')[0]
if model_id_input in excludes:
model_id_input_batch_size = model_id_input + ':' + str(conf['batch_size'])
if model_id_input in excludes or model_id_input_batch_size in excludes:
in_out_pairs.remove(in_out)
run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'],
conf['low_bit'], conf['cpu_embedding'], conf['batch_size'])
Expand Down
23 changes: 23 additions & 0 deletions python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,31 @@ low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4)
batch_size: 1 # default to 1
in_out_pairs:
- '32-32'
- '512-256'
- '1024-128'
- '2048-256'
test_api:
- "transformer_int4_gpu" # on Intel GPU
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
exclude:
- 'meta-llama/Llama-2-7b-chat-hf:2048:4'
- 'meta-llama/Llama-2-7b-chat-hf:512:8'
- 'meta-llama/Llama-2-7b-chat-hf:1024:8'
- 'meta-llama/Llama-2-7b-chat-hf:2048:8'
- 'THUDM/chatglm2-6b:2048:8'
- 'THUDM/chatglm3-6b:2048:8'
- 'baichuan-inc/Baichuan2-7B-Chat:2048:2'
- 'baichuan-inc/Baichuan2-7B-Chat:1024:4'
- 'baichuan-inc/Baichuan2-7B-Chat:2048:4'
- 'baichuan-inc/Baichuan2-7B-Chat:512:8'
- 'baichuan-inc/Baichuan2-7B-Chat:1024:8'
- 'baichuan-inc/Baichuan2-7B-Chat:2048:8'
- 'Qwen/Qwen-7B-Chat:2048:1'
- 'Qwen/Qwen-7B-Chat:1024:2'
- 'Qwen/Qwen-7B-Chat:2048:2'
- 'Qwen/Qwen-7B-Chat:512:4'
- 'Qwen/Qwen-7B-Chat:1024:4'
- 'Qwen/Qwen-7B-Chat:2048:4'
- 'Qwen/Qwen-7B-Chat:512:8'
- 'Qwen/Qwen-7B-Chat:1024:8'
- 'Qwen/Qwen-7B-Chat:2048:8'
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,27 @@ low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
batch_size: 1 # default to 1
in_out_pairs:
- '32-32'
- '512-256'
- '1024-128'
- '2048-256'
test_api:
- "transformer_int4_gpu" # on Intel GPU
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
exclude:
- 'meta-llama/Llama-2-7b-chat-hf:2048:4'
- 'meta-llama/Llama-2-7b-chat-hf:1024:8'
- 'meta-llama/Llama-2-7b-chat-hf:2048:8'
- 'THUDM/chatglm2-6b:2048:8'
- 'THUDM/chatglm3-6b:2048:8'
- 'baichuan-inc/Baichuan2-7B-Chat:2048:2'
- 'baichuan-inc/Baichuan2-7B-Chat:1024:4'
- 'baichuan-inc/Baichuan2-7B-Chat:2048:4'
- 'baichuan-inc/Baichuan2-7B-Chat:512:8'
- 'baichuan-inc/Baichuan2-7B-Chat:1024:8'
- 'baichuan-inc/Baichuan2-7B-Chat:2048:8'
- 'Qwen/Qwen-7B-Chat:2048:2'
- 'Qwen/Qwen-7B-Chat:1024:4'
- 'Qwen/Qwen-7B-Chat:2048:4'
- 'Qwen/Qwen-7B-Chat:512:8'
- 'Qwen/Qwen-7B-Chat:1024:8'
- 'Qwen/Qwen-7B-Chat:2048:8'

0 comments on commit 36c9442

Please sign in to comment.