LLM Performance Test #35
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: LLM Performance Test | |
# Cancel previous runs in the PR when you push new commits | |
concurrency: | |
group: ${{ github.workflow }}-llm-performance-tests-${{ github.event.pull_request.number || github.run_id }} | |
cancel-in-progress: true | |
# Controls when the action will run. | |
on: | |
schedule: | |
- cron: "00 13 * * *" # GMT time, 13:00 GMT == 21:00 China | |
# pull_request: | |
# branches: [main] | |
# paths: | |
# - ".github/workflows/llm_performance_tests.yml" | |
# - "python/llm/test/benchmark/**" | |
# - "python/llm/dev/benchmark/all-in-one/**" | |
workflow_dispatch: | |
workflow_call: | |
# A workflow run is made up of one or more jobs that can run sequentially or in parallel | |
jobs: | |
llm-cpp-build: | |
if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-cpp-build' || github.event.inputs.artifact == 'all' }} | |
uses: ./.github/workflows/llm-binary-build.yml | |
llm-performance-test-on-arc: | |
if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} | |
needs: llm-cpp-build | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ["3.9"] | |
runs-on: [self-hosted, llm, perf] | |
env: | |
OMP_NUM_THREADS: 16 | |
THREAD_NUM: 16 | |
ANALYTICS_ZOO_ROOT: ${{ github.workspace }} | |
CSV_SAVE_PATH: ${{ github.event.schedule && '/mnt/disk1/nightly_perf_gpu/' || '/mnt/disk1/pr_perf_gpu/' }} | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
shell: bash | |
# pip install transformers_stream_generator for model internlm-chat-7b-8k | |
# pip install tiktoken for model Qwen-7B-Chat-10-12 | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install --upgrade wheel | |
python -m pip install --upgrade omegaconf | |
python -m pip install --upgrade pandas | |
python -m pip install --upgrade einops | |
python -m pip install --upgrade transformers_stream_generator | |
python -m pip install --upgrade tiktoken | |
- name: Download llm binary | |
uses: ./.github/actions/llm/download-llm-binary | |
- name: Run LLM install (all) test | |
uses: ./.github/actions/llm/setup-llm-env | |
with: | |
extra-dependency: "xpu" | |
- name: Test installed xpu version | |
shell: bash | |
run: | | |
source /opt/intel/oneapi/setvars.sh | |
bash python/llm/test/run-llm-install-tests.sh | |
- name: Test on xpu | |
shell: bash | |
run: | | |
source /opt/intel/oneapi/setvars.sh | |
export USE_XETLA=OFF | |
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 | |
mv python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml | |
cd python/llm/dev/benchmark/all-in-one | |
# hide time info | |
sed -i 's/str(end - st)/"xxxxxx"/g' run.py | |
# change csv name | |
sed -i 's/{today}/{today}_test1/g' run.py | |
python run.py | |
# upgrade transformers for model Mistral-7B-v0.1 | |
python -m pip install transformers==4.34.0 | |
mv ../../../test/benchmark/arc-perf-transformers-434.yaml ./config.yaml | |
# change csv name | |
sed -i 's/test1/test2/g' run.py | |
python run.py | |
python ../../../test/benchmark/concat_csv.py | |
cp ./*.csv $CSV_SAVE_PATH | |
cd ../../../test/benchmark | |
python csv_to_html.py -f $CSV_SAVE_PATH | |
cd ../../dev/benchmark/all-in-one/ | |
if [ ${{ github.event.schedule}} ]; then | |
curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ | |
fi | |
llm-performance-test-on-spr: | |
if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} | |
needs: llm-cpp-build | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ["3.9"] | |
runs-on: [self-hosted, llm, spr-perf] | |
env: | |
OMP_NUM_THREADS: 16 | |
THREAD_NUM: 16 | |
ANALYTICS_ZOO_ROOT: ${{ github.workspace }} | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
shell: bash | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install --upgrade wheel | |
python -m pip install --upgrade omegaconf | |
python -m pip install --upgrade pandas | |
python -m pip install --upgrade einops | |
- name: Download llm binary | |
uses: ./.github/actions/llm/download-llm-binary | |
- name: Run LLM install (all) test | |
uses: ./.github/actions/llm/setup-llm-env | |
- name: Test on cpu | |
shell: bash | |
run: | | |
mv python/llm/test/benchmark/cpu-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml | |
cd python/llm/dev/benchmark/all-in-one | |
export http_proxy=${HTTP_PROXY} | |
export https_proxy=${HTTPS_PROXY} | |
# hide time info | |
sed -i 's/str(end - st)/"xxxxxx"/g' run.py | |
python run.py | |
cp ./*.csv /mnt/disk1/nightly_perf_cpu/ | |
cd ../../../test/benchmark | |
python csv_to_html.py -f /mnt/disk1/nightly_perf_cpu/ | |
llm-performance-test-on-core: | |
if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-core' || github.event.inputs.artifact == 'all' }} | |
needs: llm-cpp-build | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- os: windows | |
platform: dp | |
python-version: "3.9" | |
# - os: windows | |
# platform: lp | |
# python-version: "3.9" | |
runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-core, "${{ matrix.platform }}"] | |
env: | |
ANALYTICS_ZOO_ROOT: ${{ github.workspace }} | |
CSV_SAVE_PATH: ${{ github.event.schedule && 'D:/action-runners/nightly_perf_core_' || 'D:/action-runners/pr_perf_core_' }}${{ matrix.platform }}/ | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
shell: bash | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install --upgrade wheel | |
python -m pip install --upgrade omegaconf pandas | |
python -m pip install --upgrade tiktoken einops transformers_stream_generator | |
- name: Download llm binary | |
uses: ./.github/actions/llm/download-llm-binary | |
- name: Run LLM install (all) test | |
uses: ./.github/actions/llm/setup-llm-env | |
- name: Test on core ${{ matrix.platform }} | |
shell: bash | |
run: | | |
mv python/llm/test/benchmark/core-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml | |
cd python/llm/dev/benchmark/all-in-one | |
export http_proxy=${HTTP_PROXY} | |
export https_proxy=${HTTPS_PROXY} | |
# hide time info | |
sed -i 's/str(end - st)/"xxxxxx"/g' run.py | |
python run.py | |
cp ./*.csv $CSV_SAVE_PATH | |
cd ../../../test/benchmark | |
python csv_to_html.py -f $CSV_SAVE_PATH | |
cd ../../dev/benchmark/all-in-one/ | |
if [ ${{ github.event.schedule}} ]; then | |
curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/core_${{ matrix.platform }}/ | |
fi | |
llm-performance-test-on-igpu: | |
if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} | |
needs: llm-cpp-build | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- os: windows | |
python-version: "3.9" | |
runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu] | |
env: | |
ANALYTICS_ZOO_ROOT: ${{ github.workspace }} | |
steps: | |
- uses: actions/checkout@v3 | |
# TODO: Put the bigdl-llm related install process for win gpu into a action function | |
- name: Download llm binary | |
uses: ./.github/actions/llm/download-llm-binary | |
- name: Prepare for install bigdl-llm from source | |
shell: bash | |
run: | | |
sed -i 's/"bigdl-core-xe==" + VERSION + "/"bigdl-core-xe/g' python/llm/setup.py | |
- name: Install bigdl-llm and other related packages | |
shell: cmd | |
run: | | |
call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y | |
call conda activate igpu-perf | |
pip install --upgrade pip | |
pip install --upgrade wheel | |
pip install --upgrade omegaconf pandas | |
pip install --upgrade tiktoken einops transformers_stream_generator | |
cd python\llm | |
python setup.py clean --all bdist_wheel --win | |
if not exist dist\bigdl_llm*.whl (exit /b 1) | |
for %%i in (dist\bigdl_llm*.whl) do set whl_name=%%i | |
pip install %whl_name%[xpu] -i %INTERNAL_PYPI_URL% --trusted-host %INTERNAL_PYPI_TRUSTED_HOST% -q | |
if %ERRORLEVEL% neq 0 (exit /b 1) | |
pip list | |
call conda deactivate | |
- name: Set directory envs | |
shell: bash | |
run: | | |
if [ ${{ github.event_name }} == 'schedule' ]; then | |
echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV" | |
else | |
echo "CSV_SAVE_PATH=${CSV_PR_PATH}" >> "$GITHUB_ENV" | |
fi | |
cur_date=$(date +%Y-%m-%d) | |
echo "LOG_FILE=${cur_date}_output.txt" >> "$GITHUB_ENV" | |
- name: Prepare igpu perf test (32) | |
shell: bash | |
run: | | |
# hide time info | |
sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py | |
sed -i 's/{today}/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py | |
sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/32-igpu-perf-test.yaml | |
- name: Test on igpu (32) | |
shell: cmd | |
run: | | |
call conda activate igpu-perf | |
call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" | |
set SYCL_ENABLE_DEFAULT_CONTEXTS=1 | |
set SYCL_CACHE_PERSISTENT=1 | |
REM for llava | |
set TRANSFORMERS_OFFLINE=1 | |
cd python\llm\dev\benchmark\all-in-one | |
move ..\..\..\test\benchmark\32-igpu-perf-test.yaml config.yaml | |
python run.py >> %CSV_SAVE_PATH%\32\log\%LOG_FILE% 2>&1 | |
if %ERRORLEVEL% neq 0 (exit /b 1) | |
call conda deactivate | |
- name: Prepare igpu perf test for Mistral (32) | |
shell: bash | |
run: | | |
sed -i 's/test1/test2/g' python/llm/dev/benchmark/all-in-one/run.py | |
sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/32-igpu-perf-test-434.yaml | |
- name: Test on igpu for Mistral (32) | |
shell: cmd | |
run: | | |
call conda activate igpu-perf | |
pip install transformers==4.34.0 | |
call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" | |
set SYCL_ENABLE_DEFAULT_CONTEXTS=1 | |
set SYCL_CACHE_PERSISTENT=1 | |
cd python\llm\dev\benchmark\all-in-one | |
move ..\..\..\test\benchmark\32-igpu-perf-test-434.yaml config.yaml | |
python run.py >> %CSV_SAVE_PATH%\32\log\%LOG_FILE% 2>&1 | |
if %ERRORLEVEL% neq 0 (exit /b 1) | |
call conda deactivate | |
- name: Concat csv and generate html (32) | |
shell: cmd | |
run: | | |
call conda activate igpu-perf | |
cd python\llm\dev\benchmark\all-in-one | |
python ..\..\..\test\benchmark\concat_csv.py | |
copy *.csv %CSV_SAVE_PATH%\32\ | |
del /q *.csv | |
cd ..\..\..\test\benchmark | |
python csv_to_html.py -f %CSV_SAVE_PATH%\32\ | |
if %ERRORLEVEL% neq 0 (exit /b 1) | |
call conda deactivate | |
# TODO: create a action function here for different input | |
- name: Prepare igpu perf test (512) | |
shell: bash | |
run: | | |
sed -i 's/{today}_test2/{today}/g' python/llm/dev/benchmark/all-in-one/run.py | |
sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/512-igpu-perf-test.yaml | |
- name: Test on igpu (512) | |
shell: cmd | |
run: | | |
call conda activate igpu-perf | |
pip install transformers==4.31.0 | |
call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" | |
set SYCL_ENABLE_DEFAULT_CONTEXTS=1 | |
set SYCL_CACHE_PERSISTENT=1 | |
REM for llava | |
set TRANSFORMERS_OFFLINE=1 | |
cd python\llm\dev\benchmark\all-in-one | |
move ..\..\..\test\benchmark\512-igpu-perf-test.yaml config.yaml | |
python run.py >> %CSV_SAVE_PATH%\512\log\%LOG_FILE% 2>&1 | |
if %ERRORLEVEL% neq 0 (exit /b 1) | |
call conda deactivate | |
- name: Generate html (512) | |
shell: cmd | |
run: | | |
call conda activate igpu-perf | |
cd python\llm\dev\benchmark\all-in-one | |
copy *.csv %CSV_SAVE_PATH%\512\ | |
del /q *.csv | |
cd ..\..\..\test\benchmark | |
python csv_to_html.py -f %CSV_SAVE_PATH%\512\ | |
if %ERRORLEVEL% neq 0 (exit /b 1) | |
call conda deactivate | |
# for test on machine when encountering error | |
# - name: Remove conda env | |
# if: ${{ always() }} | |
# shell: cmd | |
# run: | | |
# call conda env remove -n igpu-perf -y |