daily_run_test #254
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: daily_run_test | |
on: | |
workflow_dispatch: | |
inputs: | |
repo_org: | |
required: false | |
description: 'Tested repository organization name. Default is open-compass/opencompass' | |
type: string | |
default: 'open-compass/opencompass' | |
repo_ref: | |
required: false | |
description: 'Set branch or tag or commit id. Default is "main"' | |
type: string | |
default: 'main' | |
regression_func: | |
required: true | |
description: 'regression functions' | |
type: string | |
default: "['chat','base','cmd']" | |
schedule: | |
- cron: '56 16 * * *' | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
env: | |
CONDA_ENV: opencompass_regression | |
PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip | |
HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub | |
HUGGINGFACE_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub | |
HF_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub | |
DATEASET_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/llm-evaluation-datasets | |
HF_DATASETS_OFFLINE: 1 | |
HF_EVALUATE_OFFLINE: 1 | |
TRANSFORMERS_OFFLINE: 1 | |
VLLM_USE_MODELSCOPE: false | |
LMDEPLOY_USE_MODELSCOPE: false | |
HF_HUB_OFFLINE: 1 | |
TRITON_PTXAS_PATH: /usr/local/cuda/bin/ptxas | |
jobs: | |
build-pypi: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v2 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Set up Python 3.x | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.x | |
- name: Build lagent | |
run: | | |
pip install wheel setuptools | |
python setup.py sdist bdist_wheel | |
- name: Upload Artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
if-no-files-found: error | |
path: dist/* | |
retention-days: 1 | |
name: my-artifact-${{ github.run_id }} | |
daily_run_test: | |
if: ${{!cancelled()}} | |
needs: build-pypi | |
strategy: | |
fail-fast: false | |
matrix: | |
cuda_env: [dsw_cu11, dsw_cu12] | |
runs-on: ${{ matrix.cuda_env }} | |
environment: 'prod' | |
timeout-minutes: 420 #7hours | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Download Artifacts | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }} | |
- name: Prepare - create conda env and install torch - cu11 | |
if: ${{matrix.cuda_env == 'dsw_cu11'}} | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10 | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
pip install -r /cpfs01/shared/public/qa-llm-cicd/requirements-cu11.txt --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.6.1+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.6.1.post1+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip uninstall torch torchvision torchaudio -y | |
pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118 | |
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu118torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
conda info --envs | |
pip list | |
- name: Prepare - create conda env and install torch - cu12 | |
if: ${{matrix.cuda_env == 'dsw_cu12'}} | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10 | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
pip install -r /cpfs01/shared/public/qa-llm-cicd/requirements-cu12.txt --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install opencompass[lmdeploy] --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install opencompass[vllm] --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip uninstall torch torchvision torchaudio -y | |
pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}} | |
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
conda info --envs | |
pip list | |
- name: Prepare - prepare data and hf model | |
run: | | |
ln -s ${{env.DATEASET_CACHE_PATH}} data | |
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p | |
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub | |
- name: Run command testcase | |
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'cmd') | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
export from_tf=TRUE | |
python tools/list_configs.py internlm2_5 mmlu | |
opencompass --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse --max-num-workers 2 | |
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py | |
opencompass --models hf_internlm2_5_7b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }} --reuse --max-num-workers 2 | |
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py | |
opencompass --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }} --reuse --max-num-workers 2 | |
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py | |
opencompass --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }} --reuse --max-num-workers 2 | |
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py | |
- name: Run chat model test | |
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'chat') | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
sed -i 's/judgemodel/'$(tail -n 1 /cpfs01/shared/public/llmeval/share_info/compassjuder_ip.txt)'/g' .github/scripts/eval_regression_chat.py | |
opencompass .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }} --reuse --max-num-workers 2 | |
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m chat -s -v --color=yes .github/scripts/oc_score_assert.py | |
- name: Run base model test | |
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'base') | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
opencompass .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }} --reuse --max-num-workers 2 | |
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m base -s -v --color=yes .github/scripts/oc_score_assert.py | |
- name: Remove Conda Env | |
if: always() | |
run: | | |
rm -rf regression_result_daily | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda env remove -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
notify_to_feishu: | |
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }} | |
needs: [daily_run_test] | |
environment: 'prod' | |
timeout-minutes: 5 | |
runs-on: self-hosted | |
steps: | |
- name: notify | |
run: | | |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- Daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }} |