diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 9185e4282d5..e7b1b890c93 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -145,6 +145,38 @@ jobs: python -m pip install --upgrade expecttest bash python/llm/test/run-llm-install-tests.sh + - name: Test on xpu(transformers==4.38.2) + shell: bash + run: | + source /opt/intel/oneapi/setvars.sh + export USE_XETLA=OFF + export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + # upgrade transformers for model stablelm/stablelm-zephyr-3b & Gemma/gemma-7b-it + python -m pip install transformers==4.38.2 + # batch_size 1 + cp python/llm/test/benchmark/arc-perf-transformers-438.yaml python/llm/dev/benchmark/all-in-one/config.yaml + cd python/llm/dev/benchmark/all-in-one + # change csv name + sed -i 's/test1_batch4/test2_batch1/g' run.py + python run.py + mv *.csv test_batch1 + # batch_size 2 + cd ../../../../../ + cp python/llm/test/benchmark/arc-perf-transformers-438-batch2.yaml python/llm/dev/benchmark/all-in-one/config.yaml + cd python/llm/dev/benchmark/all-in-one + # change csv name + sed -i 's/batch1/batch2/g' run.py + python run.py + mv *.csv test_batch2 + # batch_size 4 + cd ../../../../../ + cp python/llm/test/benchmark/arc-perf-transformers-438-batch4.yaml python/llm/dev/benchmark/all-in-one/config.yaml + cd python/llm/dev/benchmark/all-in-one + # change csv name + sed -i 's/batch2/batch4/g' run.py + python run.py + mv *.csv test_batch4 + - name: Test on xpu(transformers==4.36.2) shell: bash run: | diff --git a/python/llm/test/benchmark/arc-perf-transformers-438-batch2.yaml b/python/llm/test/benchmark/arc-perf-transformers-438-batch2.yaml new file mode 100644 index 00000000000..46c3905782c --- /dev/null +++ b/python/llm/test/benchmark/arc-perf-transformers-438-batch2.yaml @@ -0,0 +1,17 @@ +# For the models that require transformers 4.38.2 +repo_id: + - 'stablelm/stablelm-zephyr-3b' +local_model_hub: '/mnt/disk1/models' +warm_up: 1 +num_trials: 3 +num_beams: 1 # default to greedy search +low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 2 # default to 1 +in_out_pairs: + - '32-32' + - '1024-128' + - '2048-256' +test_api: + - "transformer_int4_fp16_gpu" # on Intel GPU +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) +task: 'continuation' # task can be 'continuation', 'QA' and 'summarize' diff --git a/python/llm/test/benchmark/arc-perf-transformers-438-batch4.yaml b/python/llm/test/benchmark/arc-perf-transformers-438-batch4.yaml new file mode 100644 index 00000000000..598b5ce1771 --- /dev/null +++ b/python/llm/test/benchmark/arc-perf-transformers-438-batch4.yaml @@ -0,0 +1,17 @@ +# For the models that require transformers 4.38.2 +repo_id: + - 'stablelm/stablelm-zephyr-3b' +local_model_hub: '/mnt/disk1/models' +warm_up: 1 +num_trials: 3 +num_beams: 1 # default to greedy search +low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 4 # default to 1 +in_out_pairs: + - '32-32' + - '1024-128' + - '2048-256' +test_api: + - "transformer_int4_fp16_gpu" # on Intel GPU +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) +task: 'continuation' # task can be 'continuation', 'QA' and 'summarize' diff --git a/python/llm/test/benchmark/arc-perf-transformers-438.yaml b/python/llm/test/benchmark/arc-perf-transformers-438.yaml new file mode 100644 index 00000000000..02065068962 --- /dev/null +++ b/python/llm/test/benchmark/arc-perf-transformers-438.yaml @@ -0,0 +1,17 @@ +# For the models that require transformers 4.38.2 +repo_id: + - 'stablelm/stablelm-zephyr-3b' +local_model_hub: '/mnt/disk1/models' +warm_up: 1 +num_trials: 3 +num_beams: 1 # default to greedy search +low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 +in_out_pairs: + - '32-32' + - '1024-128' + - '2048-256' +test_api: + - "transformer_int4_fp16_gpu" # on Intel GPU +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) +task: 'continuation' # task can be 'continuation', 'QA' and 'summarize'