From 7664df862d7f87127b6a2d321218ec03b3dd4a0c Mon Sep 17 00:00:00 2001 From: Yishuo Wang Date: Thu, 6 Jun 2024 15:52:10 +0800 Subject: [PATCH 1/3] change the workflow to test ftp --- .github/workflows/llm_performance_tests.yml | 63 +++++++++++---------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index c9f2c8300cc..f766f5200c2 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -13,23 +13,23 @@ on: schedule: - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China # please uncomment it for PR tests - # pull_request: - # branches: [main] - # paths: - # - ".github/workflows/llm_performance_tests.yml" - # - "python/llm/test/benchmark/**" - # - "python/llm/dev/benchmark/all-in-one/**" + pull_request: + branches: [main] + paths: + - ".github/workflows/llm_performance_tests.yml" + - "python/llm/test/benchmark/**" + - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: workflow_call: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - # llm-cpp-build: # please uncomment it for PR tests - # uses: ./.github/workflows/llm-binary-build.yml + llm-cpp-build: # please uncomment it for PR tests + uses: ./.github/workflows/llm-binary-build.yml llm-performance-test-on-arc: - if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests - # needs: llm-cpp-build # please uncomment it for PR tests + # if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + needs: llm-cpp-build # please uncomment it for PR tests strategy: fail-fast: false matrix: @@ -63,23 +63,23 @@ jobs: python -m pip install --upgrade tiktoken # please uncomment it and comment the "Install IPEX-LLM from Pypi" part for PR tests - # - name: Download llm binary - # uses: ./.github/actions/llm/download-llm-binary + - name: Download llm binary + uses: ./.github/actions/llm/download-llm-binary - # - name: Run LLM install (all) test - # uses: ./.github/actions/llm/setup-llm-env - # with: - # extra-dependency: "xpu_2.1" + - name: Run LLM install (all) test + uses: ./.github/actions/llm/setup-llm-env + with: + extra-dependency: "xpu_2.1" - - name: Install IPEX-LLM from Pypi - shell: bash - run: | - pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - test_version_date=`date -d 'yesterday' '+%Y%m%d'` - if ! pip show ipex-llm | grep $test_version_date; then - echo "Did not install ipex-llm with excepted version $test_version_date" - exit 1 - fi + # - name: Install IPEX-LLM from Pypi + # shell: bash + # run: | + # pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + # test_version_date=`date -d 'yesterday' '+%Y%m%d'` + # if ! pip show ipex-llm | grep $test_version_date; then + # echo "Did not install ipex-llm with excepted version $test_version_date" + # exit 1 + # fi - name: Test installed xpu version shell: bash @@ -173,21 +173,22 @@ jobs: python ../../../../test/benchmark/check_results.py -c test1 -y ../../../../test/benchmark/arc-perf-test.yaml python ../../../../test/benchmark/check_results.py -c test2 -y ../../../../test/benchmark/arc-perf-transformers-437.yaml find . -name "*test*.csv" -delete + # if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then + curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ + # fi cd ../ rm -r test_batch1 - if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then - curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ - fi # batch_size 2 cd test_batch2 python ../../../../test/benchmark/check_results.py -c test1 -y ../../../../test/benchmark/arc-perf-test-batch2.yaml python ../../../../test/benchmark/check_results.py -c test2 -y ../../../../test/benchmark/arc-perf-transformers-437-batch2.yaml find . -name "*test*.csv" -delete + # if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then + curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ + # fi cd ../ rm -r test_batch2 - if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then - curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ - fi + llm-performance-test-on-spr: if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests From 3a874323eadbe7aca3850ba5951e31a2ae08369d Mon Sep 17 00:00:00 2001 From: Yishuo Wang Date: Thu, 6 Jun 2024 16:06:38 +0800 Subject: [PATCH 2/3] comment some models --- .../test/benchmark/arc-perf-test-batch2.yaml | 42 +++++++++---------- python/llm/test/benchmark/arc-perf-test.yaml | 38 ++++++++--------- .../arc-perf-transformers-437-batch2.yaml | 6 +-- .../benchmark/arc-perf-transformers-437.yaml | 6 +-- 4 files changed, 46 insertions(+), 46 deletions(-) diff --git a/python/llm/test/benchmark/arc-perf-test-batch2.yaml b/python/llm/test/benchmark/arc-perf-test-batch2.yaml index 00b2e4c1a53..a12dbfa44ff 100644 --- a/python/llm/test/benchmark/arc-perf-test-batch2.yaml +++ b/python/llm/test/benchmark/arc-perf-test-batch2.yaml @@ -1,23 +1,23 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - - 'meta-llama/Llama-2-13b-chat-hf' - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b-4bit' - - 'tiiuae/falcon-7b-instruct-with-patch' - - 'mosaicml/mpt-7b-chat' - - 'redpajama/gptneox-7b-redpajama-bf16' - - 'bigcode/starcoder-15.5b-4bit' - - 'databricks/dolly-v1-6b' - - 'databricks/dolly-v2-7b' - - 'databricks/dolly-v2-12b' - - 'internlm/internlm-chat-7b' - - 'Qwen/Qwen-7B-Chat' - - 'BAAI/AquilaChat-7B' - - 'baichuan-inc/Baichuan2-7B-Chat' - - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - - 'bigscience/bloomz-7b1' + # - 'meta-llama/Llama-2-13b-chat-hf' + # - 'THUDM/chatglm2-6b' + # - 'THUDM/chatglm3-6b-4bit' + # - 'tiiuae/falcon-7b-instruct-with-patch' + # - 'mosaicml/mpt-7b-chat' + # - 'redpajama/gptneox-7b-redpajama-bf16' + # - 'bigcode/starcoder-15.5b-4bit' + # - 'databricks/dolly-v1-6b' + # - 'databricks/dolly-v2-7b' + # - 'databricks/dolly-v2-12b' + # - 'internlm/internlm-chat-7b' + # - 'Qwen/Qwen-7B-Chat' + # - 'BAAI/AquilaChat-7B' + # - 'baichuan-inc/Baichuan2-7B-Chat' + # - 'baichuan-inc/Baichuan2-13B-Chat-4bit' + # - 'bigscience/bloomz-7b1' # - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+ - - 'mistralai/Mistral-7B-v0.1' + # - 'mistralai/Mistral-7B-v0.1' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 @@ -32,7 +32,7 @@ test_api: - "transformer_int4_gpu" # on Intel GPU cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) exclude: - - 'bigcode/starcoder-15.5b-4bit:2048' - - 'databricks/dolly-v2-12b:2048' - - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' - - 'bigscience/bloomz-7b1:2048' \ No newline at end of file + # - 'bigcode/starcoder-15.5b-4bit:2048' + # - 'databricks/dolly-v2-12b:2048' + # - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' + # - 'bigscience/bloomz-7b1:2048' \ No newline at end of file diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index 895588ce4e4..0117e1d7deb 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -1,23 +1,23 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - - 'meta-llama/Llama-2-13b-chat-hf' - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b-4bit' - - 'tiiuae/falcon-7b-instruct-with-patch' - - 'mosaicml/mpt-7b-chat' - - 'redpajama/gptneox-7b-redpajama-bf16' - - 'bigcode/starcoder-15.5b-4bit' - - 'databricks/dolly-v1-6b' - - 'databricks/dolly-v2-7b' - - 'databricks/dolly-v2-12b' - - 'internlm/internlm-chat-7b' - - 'Qwen/Qwen-7B-Chat' - - 'BAAI/AquilaChat-7B' - - 'baichuan-inc/Baichuan2-7B-Chat' - - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - - 'bigscience/bloomz-7b1' + # - 'meta-llama/Llama-2-13b-chat-hf' + # - 'THUDM/chatglm2-6b' + # - 'THUDM/chatglm3-6b-4bit' + # - 'tiiuae/falcon-7b-instruct-with-patch' + # - 'mosaicml/mpt-7b-chat' + # - 'redpajama/gptneox-7b-redpajama-bf16' + # - 'bigcode/starcoder-15.5b-4bit' + # - 'databricks/dolly-v1-6b' + # - 'databricks/dolly-v2-7b' + # - 'databricks/dolly-v2-12b' + # - 'internlm/internlm-chat-7b' + # - 'Qwen/Qwen-7B-Chat' + # - 'BAAI/AquilaChat-7B' + # - 'baichuan-inc/Baichuan2-7B-Chat' + # - 'baichuan-inc/Baichuan2-13B-Chat-4bit' + # - 'bigscience/bloomz-7b1' # - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+ - - 'mistralai/Mistral-7B-v0.1' + # - 'mistralai/Mistral-7B-v0.1' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 @@ -34,5 +34,5 @@ cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu w exclude: # - 'fnlp/moss-moon-003-sft-4bit:1024' # - 'fnlp/moss-moon-003-sft-4bit:2048' - - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' - - 'bigscience/bloomz-7b1:2048' + # - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' + # - 'bigscience/bloomz-7b1:2048' diff --git a/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml b/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml index c9644dc905c..f26969b4ec4 100644 --- a/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml +++ b/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml @@ -1,9 +1,9 @@ # For the models that require transformers 4.37.0 repo_id: - 'Qwen/Qwen1.5-7B-Chat' - - 'microsoft/phi-2' - - 'microsoft/Phi-3-mini-4k-instruct' - - 'meta-llama/Meta-Llama-3-8B-Instruct' + # - 'microsoft/phi-2' + # - 'microsoft/Phi-3-mini-4k-instruct' + # - 'meta-llama/Meta-Llama-3-8B-Instruct' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/arc-perf-transformers-437.yaml b/python/llm/test/benchmark/arc-perf-transformers-437.yaml index c9cc5ce82a5..77dc132263b 100644 --- a/python/llm/test/benchmark/arc-perf-transformers-437.yaml +++ b/python/llm/test/benchmark/arc-perf-transformers-437.yaml @@ -1,9 +1,9 @@ # For the models that require transformers 4.37.0 repo_id: - 'Qwen/Qwen1.5-7B-Chat' - - 'microsoft/phi-2' - - 'microsoft/Phi-3-mini-4k-instruct' - - 'meta-llama/Meta-Llama-3-8B-Instruct' + # - 'microsoft/phi-2' + # - 'microsoft/Phi-3-mini-4k-instruct' + # - 'meta-llama/Meta-Llama-3-8B-Instruct' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 From f90ec502bbb62a2b5c53167eb8961bb07be90006 Mon Sep 17 00:00:00 2001 From: Yishuo Wang Date: Thu, 6 Jun 2024 16:47:50 +0800 Subject: [PATCH 3/3] revert file --- .github/workflows/llm_performance_tests.yml | 62 +++++++++---------- .../test/benchmark/arc-perf-test-batch2.yaml | 42 ++++++------- python/llm/test/benchmark/arc-perf-test.yaml | 38 ++++++------ .../arc-perf-transformers-437-batch2.yaml | 6 +- .../benchmark/arc-perf-transformers-437.yaml | 6 +- 5 files changed, 77 insertions(+), 77 deletions(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index f766f5200c2..1db4e1259bf 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -13,23 +13,23 @@ on: schedule: - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China # please uncomment it for PR tests - pull_request: - branches: [main] - paths: - - ".github/workflows/llm_performance_tests.yml" - - "python/llm/test/benchmark/**" - - "python/llm/dev/benchmark/all-in-one/**" + # pull_request: + # branches: [main] + # paths: + # - ".github/workflows/llm_performance_tests.yml" + # - "python/llm/test/benchmark/**" + # - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: workflow_call: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - llm-cpp-build: # please uncomment it for PR tests - uses: ./.github/workflows/llm-binary-build.yml + # llm-cpp-build: # please uncomment it for PR tests + # uses: ./.github/workflows/llm-binary-build.yml llm-performance-test-on-arc: - # if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests - needs: llm-cpp-build # please uncomment it for PR tests + if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + # needs: llm-cpp-build # please uncomment it for PR tests strategy: fail-fast: false matrix: @@ -63,23 +63,23 @@ jobs: python -m pip install --upgrade tiktoken # please uncomment it and comment the "Install IPEX-LLM from Pypi" part for PR tests - - name: Download llm binary - uses: ./.github/actions/llm/download-llm-binary + # - name: Download llm binary + # uses: ./.github/actions/llm/download-llm-binary - - name: Run LLM install (all) test - uses: ./.github/actions/llm/setup-llm-env - with: - extra-dependency: "xpu_2.1" + # - name: Run LLM install (all) test + # uses: ./.github/actions/llm/setup-llm-env + # with: + # extra-dependency: "xpu_2.1" - # - name: Install IPEX-LLM from Pypi - # shell: bash - # run: | - # pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - # test_version_date=`date -d 'yesterday' '+%Y%m%d'` - # if ! pip show ipex-llm | grep $test_version_date; then - # echo "Did not install ipex-llm with excepted version $test_version_date" - # exit 1 - # fi + - name: Install IPEX-LLM from Pypi + shell: bash + run: | + pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + test_version_date=`date -d 'yesterday' '+%Y%m%d'` + if ! pip show ipex-llm | grep $test_version_date; then + echo "Did not install ipex-llm with excepted version $test_version_date" + exit 1 + fi - name: Test installed xpu version shell: bash @@ -173,9 +173,9 @@ jobs: python ../../../../test/benchmark/check_results.py -c test1 -y ../../../../test/benchmark/arc-perf-test.yaml python ../../../../test/benchmark/check_results.py -c test2 -y ../../../../test/benchmark/arc-perf-transformers-437.yaml find . -name "*test*.csv" -delete - # if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then - curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ - # fi + if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then + curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ + fi cd ../ rm -r test_batch1 # batch_size 2 @@ -183,9 +183,9 @@ jobs: python ../../../../test/benchmark/check_results.py -c test1 -y ../../../../test/benchmark/arc-perf-test-batch2.yaml python ../../../../test/benchmark/check_results.py -c test2 -y ../../../../test/benchmark/arc-perf-transformers-437-batch2.yaml find . -name "*test*.csv" -delete - # if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then - curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ - # fi + if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then + curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ + fi cd ../ rm -r test_batch2 diff --git a/python/llm/test/benchmark/arc-perf-test-batch2.yaml b/python/llm/test/benchmark/arc-perf-test-batch2.yaml index a12dbfa44ff..00b2e4c1a53 100644 --- a/python/llm/test/benchmark/arc-perf-test-batch2.yaml +++ b/python/llm/test/benchmark/arc-perf-test-batch2.yaml @@ -1,23 +1,23 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'THUDM/chatglm2-6b' - # - 'THUDM/chatglm3-6b-4bit' - # - 'tiiuae/falcon-7b-instruct-with-patch' - # - 'mosaicml/mpt-7b-chat' - # - 'redpajama/gptneox-7b-redpajama-bf16' - # - 'bigcode/starcoder-15.5b-4bit' - # - 'databricks/dolly-v1-6b' - # - 'databricks/dolly-v2-7b' - # - 'databricks/dolly-v2-12b' - # - 'internlm/internlm-chat-7b' - # - 'Qwen/Qwen-7B-Chat' - # - 'BAAI/AquilaChat-7B' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - # - 'bigscience/bloomz-7b1' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b-4bit' + - 'tiiuae/falcon-7b-instruct-with-patch' + - 'mosaicml/mpt-7b-chat' + - 'redpajama/gptneox-7b-redpajama-bf16' + - 'bigcode/starcoder-15.5b-4bit' + - 'databricks/dolly-v1-6b' + - 'databricks/dolly-v2-7b' + - 'databricks/dolly-v2-12b' + - 'internlm/internlm-chat-7b' + - 'Qwen/Qwen-7B-Chat' + - 'BAAI/AquilaChat-7B' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat-4bit' + - 'bigscience/bloomz-7b1' # - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+ - # - 'mistralai/Mistral-7B-v0.1' + - 'mistralai/Mistral-7B-v0.1' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 @@ -32,7 +32,7 @@ test_api: - "transformer_int4_gpu" # on Intel GPU cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) exclude: - # - 'bigcode/starcoder-15.5b-4bit:2048' - # - 'databricks/dolly-v2-12b:2048' - # - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' - # - 'bigscience/bloomz-7b1:2048' \ No newline at end of file + - 'bigcode/starcoder-15.5b-4bit:2048' + - 'databricks/dolly-v2-12b:2048' + - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' + - 'bigscience/bloomz-7b1:2048' \ No newline at end of file diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index 0117e1d7deb..895588ce4e4 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -1,23 +1,23 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'THUDM/chatglm2-6b' - # - 'THUDM/chatglm3-6b-4bit' - # - 'tiiuae/falcon-7b-instruct-with-patch' - # - 'mosaicml/mpt-7b-chat' - # - 'redpajama/gptneox-7b-redpajama-bf16' - # - 'bigcode/starcoder-15.5b-4bit' - # - 'databricks/dolly-v1-6b' - # - 'databricks/dolly-v2-7b' - # - 'databricks/dolly-v2-12b' - # - 'internlm/internlm-chat-7b' - # - 'Qwen/Qwen-7B-Chat' - # - 'BAAI/AquilaChat-7B' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - # - 'bigscience/bloomz-7b1' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b-4bit' + - 'tiiuae/falcon-7b-instruct-with-patch' + - 'mosaicml/mpt-7b-chat' + - 'redpajama/gptneox-7b-redpajama-bf16' + - 'bigcode/starcoder-15.5b-4bit' + - 'databricks/dolly-v1-6b' + - 'databricks/dolly-v2-7b' + - 'databricks/dolly-v2-12b' + - 'internlm/internlm-chat-7b' + - 'Qwen/Qwen-7B-Chat' + - 'BAAI/AquilaChat-7B' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat-4bit' + - 'bigscience/bloomz-7b1' # - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+ - # - 'mistralai/Mistral-7B-v0.1' + - 'mistralai/Mistral-7B-v0.1' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 @@ -34,5 +34,5 @@ cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu w exclude: # - 'fnlp/moss-moon-003-sft-4bit:1024' # - 'fnlp/moss-moon-003-sft-4bit:2048' - # - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' - # - 'bigscience/bloomz-7b1:2048' + - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' + - 'bigscience/bloomz-7b1:2048' diff --git a/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml b/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml index f26969b4ec4..c9644dc905c 100644 --- a/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml +++ b/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml @@ -1,9 +1,9 @@ # For the models that require transformers 4.37.0 repo_id: - 'Qwen/Qwen1.5-7B-Chat' - # - 'microsoft/phi-2' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'microsoft/phi-2' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'meta-llama/Meta-Llama-3-8B-Instruct' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/arc-perf-transformers-437.yaml b/python/llm/test/benchmark/arc-perf-transformers-437.yaml index 77dc132263b..c9cc5ce82a5 100644 --- a/python/llm/test/benchmark/arc-perf-transformers-437.yaml +++ b/python/llm/test/benchmark/arc-perf-transformers-437.yaml @@ -1,9 +1,9 @@ # For the models that require transformers 4.37.0 repo_id: - 'Qwen/Qwen1.5-7B-Chat' - # - 'microsoft/phi-2' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'microsoft/phi-2' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'meta-llama/Meta-Llama-3-8B-Instruct' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3