intel-analytics · MargarettMao · Jun 17, 2024 · Jun 17, 2024 · Jun 17, 2024 · Jun 17, 2024
@@ -13,12 +13,12 @@ on:
   schedule:
     - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China
   # please uncomment it for PR tests
-  # pull_request:
-  #   branches: [main]
-  #   paths:
-  #     - ".github/workflows/llm_performance_tests.yml"
-  #     - "python/llm/test/benchmark/**"
-  #     - "python/llm/dev/benchmark/all-in-one/**"
+  pull_request:
+    branches: [main]
+    paths:
+      - ".github/workflows/llm_performance_tests.yml"
+      - "python/llm/test/benchmark/**"
+      - "python/llm/dev/benchmark/all-in-one/**"
   workflow_dispatch:
     inputs:
       arc:
@@ -49,7 +49,7 @@ jobs:
   #   uses: ./.github/workflows/llm-binary-build.yml
 
   llm-performance-test-on-arc:
-    if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.arc ) || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+    # if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.arc ) || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
     # needs: llm-cpp-build # please uncomment it for PR tests
     strategy:
       fail-fast: false
@@ -96,11 +96,11 @@ jobs:
         shell: bash
         run: |
           pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/
-          test_version_date=`date -d 'yesterday' '+%Y%m%d'`
-          if ! pip show ipex-llm | grep $test_version_date; then
-            echo "Did not install ipex-llm with excepted version $test_version_date"
-            exit 1
-          fi
+          # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+          # if ! pip show ipex-llm | grep $test_version_date; then
+          #   echo "Did not install ipex-llm with excepted version $test_version_date"
+          #   exit 1
+          # fi
 
       - name: Test installed xpu version
         shell: bash
@@ -120,6 +120,7 @@ jobs:
           cd python/llm/dev/benchmark/all-in-one
           mkdir test_batch1
           mkdir test_batch2
+          mkdir test_batch4
           # batch_size 1
           # hide time info
           sed -i 's/str(end - st)/"xxxxxx"/g' run.py
@@ -135,6 +136,14 @@ jobs:
           sed -i 's/batch1/batch2/g' run.py
           python run.py
           mv *.csv test_batch2
+          # batch_size 4
+          cd ../../../../../ 
+          cp python/llm/test/benchmark/arc-perf-test-batch4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cd python/llm/dev/benchmark/all-in-one
+          # change csv name
+          sed -i 's/batch2/batch4/g' run.py
+          python run.py
+          mv *.csv test_batch4
 
       - name: Test on xpu(transformers==4.37.0)
         shell: bash
@@ -159,6 +168,14 @@ jobs:
           sed -i 's/batch1/batch2/g' run.py
           python run.py
           mv *.csv test_batch2
+          # batch_size 4
+          cd ../../../../../ 
+          cp python/llm/test/benchmark/arc-perf-transformers-437-batch4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cd python/llm/dev/benchmark/all-in-one
+          # change csv name 
+          sed -i 's/batch2/batch4/g' run.py
+          python run.py
+          mv *.csv test_batch4
 
       - name: Concat csv and generate html
         shell: bash
@@ -185,6 +202,17 @@ jobs:
           done
           cd ../../../../test/benchmark
           python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_2
+          # batch_size 4
+          cd ../../../../
+          cd python/llm/dev/benchmark/all-in-one/test_batch4
+          python ../../../../test/benchmark/concat_csv.py
+          for file in *.csv; do
+              if [[ $file != *test* ]]; then
+                  cp "$file" $CSV_SAVE_PATH/batch_size_4
+              fi
+          done
+          cd ../../../../test/benchmark
+          python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_4
 
       - name: Merge and sort csv files of multiple batches and generate html 
         shell: bash
@@ -204,6 +232,12 @@ jobs:
                   cp "$file" ../../../../test/benchmark/merged_temp
               fi
           done
+          cd ../test_batch4
+          for file in *.csv; do
+              if [[ $file != *test* ]]; then
+                  cp "$file" ../../../../test/benchmark/merged_temp
+              fi
+          done
           cd ../../../../test/benchmark
           python merge_csv_batch.py -f ./merged_temp
           cd merged_temp
@@ -244,6 +278,16 @@ jobs:
           fi
           cd ../
           rm -r test_batch2
+          # batch_size 4
+          cd test_batch4
+          python ../../../../test/benchmark/check_results.py -c test1 -y ../../../../test/benchmark/arc-perf-test-batch4.yaml
+          python ../../../../test/benchmark/check_results.py -c test2 -y ../../../../test/benchmark/arc-perf-transformers-437-batch4.yaml
+          find . -name "*test*.csv" -delete
+          if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then
+            curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
+          fi
+          cd ../
+          rm -r test_batch4
 
 
   llm-performance-test-on-spr:

diff --git a/python/llm/test/benchmark/arc-perf-test-batch2.yaml b/python/llm/test/benchmark/arc-perf-test-batch2.yaml
@@ -1,23 +1,23 @@
 repo_id:
   - 'meta-llama/Llama-2-7b-chat-hf'
-  - 'meta-llama/Llama-2-13b-chat-hf'
-  - 'THUDM/chatglm2-6b'
-  - 'THUDM/chatglm3-6b-4bit'
-  - 'tiiuae/falcon-7b-instruct-with-patch'
-  - 'mosaicml/mpt-7b-chat'
-  - 'redpajama/gptneox-7b-redpajama-bf16'
-  - 'bigcode/starcoder-15.5b-4bit'
-  - 'databricks/dolly-v1-6b'
-  - 'databricks/dolly-v2-7b'
+  # - 'meta-llama/Llama-2-13b-chat-hf'
+  # - 'THUDM/chatglm2-6b'
+  # - 'THUDM/chatglm3-6b-4bit'
+  # - 'tiiuae/falcon-7b-instruct-with-patch'
+  # - 'mosaicml/mpt-7b-chat'
+  # - 'redpajama/gptneox-7b-redpajama-bf16'
+  # - 'bigcode/starcoder-15.5b-4bit'
+  # - 'databricks/dolly-v1-6b'
+  # - 'databricks/dolly-v2-7b'
   # - 'databricks/dolly-v2-12b'
-  - 'internlm/internlm-chat-7b'
-  - 'Qwen/Qwen-7B-Chat'
-  - 'BAAI/AquilaChat-7B'
-  - 'baichuan-inc/Baichuan2-7B-Chat'
-  - 'baichuan-inc/Baichuan2-13B-Chat-4bit'
-  - 'bigscience/bloomz-7b1'
+  # - 'internlm/internlm-chat-7b'
+  # - 'Qwen/Qwen-7B-Chat'
+  # - 'BAAI/AquilaChat-7B'
+  # - 'baichuan-inc/Baichuan2-7B-Chat'
+  # - 'baichuan-inc/Baichuan2-13B-Chat-4bit'
+  # - 'bigscience/bloomz-7b1'
 #  - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+
-  - 'mistralai/Mistral-7B-v0.1'
+  # - 'mistralai/Mistral-7B-v0.1'
 local_model_hub: '/mnt/disk1/models'
 warm_up: 1
 num_trials: 3
@@ -32,7 +32,7 @@ test_api:
   - "transformer_int4_fp16_gpu"  # on Intel GPU
 cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
 exclude:
-  - 'bigcode/starcoder-15.5b-4bit:2048'
+  # - 'bigcode/starcoder-15.5b-4bit:2048'
   # - 'databricks/dolly-v2-12b:2048'
-  - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048'
-  - 'bigscience/bloomz-7b1:2048'
+  # - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048'
+  # - 'bigscience/bloomz-7b1:2048'
diff --git a/python/llm/test/benchmark/arc-perf-test-batch4.yaml b/python/llm/test/benchmark/arc-perf-test-batch4.yaml
@@ -0,0 +1,16 @@
+repo_id:
+  - 'meta-llama/Llama-2-7b-chat-hf'
+local_model_hub: '/mnt/disk1/models'
+warm_up: 1
+num_trials: 3
+num_beams: 1 # default to greedy search
+low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
+batch_size: 4 # default to 1
+in_out_pairs:
+  - '32-32'
+  - '1024-128'
+  - '2048-256'
+test_api:
+  - "transformer_int4_fp16_gpu"  # on Intel GPU
+cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
+
diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml
@@ -1,23 +1,23 @@
 repo_id:
   - 'meta-llama/Llama-2-7b-chat-hf'
-  - 'meta-llama/Llama-2-13b-chat-hf'
-  - 'THUDM/chatglm2-6b'
-  - 'THUDM/chatglm3-6b-4bit'
-  - 'tiiuae/falcon-7b-instruct-with-patch'
-  - 'mosaicml/mpt-7b-chat'
-  - 'redpajama/gptneox-7b-redpajama-bf16'
-  - 'bigcode/starcoder-15.5b-4bit'
-  - 'databricks/dolly-v1-6b'
-  - 'databricks/dolly-v2-7b'
+  # - 'meta-llama/Llama-2-13b-chat-hf'
+  # - 'THUDM/chatglm2-6b'
+  # - 'THUDM/chatglm3-6b-4bit'
+  # - 'tiiuae/falcon-7b-instruct-with-patch'
+  # - 'mosaicml/mpt-7b-chat'
+  # - 'redpajama/gptneox-7b-redpajama-bf16'
+  # - 'bigcode/starcoder-15.5b-4bit'
+  # - 'databricks/dolly-v1-6b'
+  # - 'databricks/dolly-v2-7b'
   # - 'databricks/dolly-v2-12b'
-  - 'internlm/internlm-chat-7b'
-  - 'Qwen/Qwen-7B-Chat'
-  - 'BAAI/AquilaChat-7B'
-  - 'baichuan-inc/Baichuan2-7B-Chat'
-  - 'baichuan-inc/Baichuan2-13B-Chat-4bit'
-  - 'bigscience/bloomz-7b1'
+  # - 'internlm/internlm-chat-7b'
+  # - 'Qwen/Qwen-7B-Chat'
+  # - 'BAAI/AquilaChat-7B'
+  # - 'baichuan-inc/Baichuan2-7B-Chat'
+  # - 'baichuan-inc/Baichuan2-13B-Chat-4bit'
+  # - 'bigscience/bloomz-7b1'
 #  - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+
-  - 'mistralai/Mistral-7B-v0.1'
+  # - 'mistralai/Mistral-7B-v0.1'
 local_model_hub: '/mnt/disk1/models'
 warm_up: 1
 num_trials: 3
@@ -34,5 +34,5 @@ cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu w
 exclude:
 #  - 'fnlp/moss-moon-003-sft-4bit:1024'
 #  - 'fnlp/moss-moon-003-sft-4bit:2048'
-  - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048'
-  - 'bigscience/bloomz-7b1:2048'
+  # - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048'
+  # - 'bigscience/bloomz-7b1:2048'
diff --git a/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml b/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml
@@ -1,9 +1,9 @@
 # For the models that require transformers 4.37.0
 repo_id:
   - 'Qwen/Qwen1.5-7B-Chat'
-  - 'microsoft/phi-2'
-  - 'microsoft/Phi-3-mini-4k-instruct'
-  - 'meta-llama/Meta-Llama-3-8B-Instruct'
+  # - 'microsoft/phi-2'
+  # - 'microsoft/Phi-3-mini-4k-instruct'
+  # - 'meta-llama/Meta-Llama-3-8B-Instruct'
 local_model_hub: '/mnt/disk1/models'
 warm_up: 1
 num_trials: 3

diff --git a/python/llm/test/benchmark/arc-perf-transformers-437-batch4.yaml b/python/llm/test/benchmark/arc-perf-transformers-437-batch4.yaml
@@ -0,0 +1,16 @@
+# For the models that require transformers 4.37.0
+repo_id:
+  - 'Qwen/Qwen1.5-7B-Chat'
+local_model_hub: '/mnt/disk1/models'
+warm_up: 1
+num_trials: 3
+num_beams: 1 # default to greedy search
+low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
+batch_size: 4 # default to 1
+in_out_pairs:
+  - '32-32'
+  - '1024-128'
+  - '2048-256'
+test_api:
+  - "transformer_int4_fp16_gpu"  # on Intel GPU
+cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
diff --git a/python/llm/test/benchmark/arc-perf-transformers-437.yaml b/python/llm/test/benchmark/arc-perf-transformers-437.yaml
@@ -1,9 +1,9 @@
 # For the models that require transformers 4.37.0
 repo_id:
   - 'Qwen/Qwen1.5-7B-Chat'
-  - 'microsoft/phi-2'
-  - 'microsoft/Phi-3-mini-4k-instruct'
-  - 'meta-llama/Meta-Llama-3-8B-Instruct'
+  # - 'microsoft/phi-2'
+  # - 'microsoft/Phi-3-mini-4k-instruct'
+  # - 'meta-llama/Meta-Llama-3-8B-Instruct'
 local_model_hub: '/mnt/disk1/models'
 warm_up: 1
 num_trials: 3

diff --git a/python/llm/test/benchmark/update_html_in_parent_folder.py b/python/llm/test/benchmark/update_html_in_parent_folder.py
@@ -24,13 +24,13 @@
 def update_html_in_parent_folder(folder_path):
 
     current_folder = Path(folder_path)
-    folder_list = [current_folder/'batch_size_1/',current_folder/'batch_size_2/',current_folder/'merged/']
+    folder_list = [current_folder/'batch_size_1/',current_folder/'batch_size_2/',current_folder/'batch_size_4/',current_folder/'merged/']
 
     # List all html files under current folder and delete them
     for html_file in current_folder.glob('*.html'):
         html_file.unlink()
     for folder in folder_list:
-        # Find latest html file under batch1/batch2/merged folders
+        # Find latest html file under batch1/batch2/batch4/merged folders
         latest_html_file = max(Path(folder).glob('*.html'), key=os.path.getctime, default=None)
         # Copy the latest html file to parent folder
         if latest_html_file is not None: