Skip to content

periodic

periodic #8

Workflow file for this run

name: periodic
on:
schedule:
- cron: '0,6,12,18 0 * * *' # Runs at midnight UTC and every 6 hours
push:
tags:
- ciflow/periodic/*
workflow_dispatch:
jobs:
gather-models:
runs-on: ubuntu-22.04
outputs:
models: ${{ steps.gather-models.outputs.models }}
steps:
- uses: actions/checkout@v3
with:
submodules: 'false'
- uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Extract the list of models to test
id: gather-models
run: |
set -eux
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "periodic"
test-cpu:
name: test-cpu (${{ matrix.platform }}, ${{ matrix.repo_name }})
needs: gather-models
strategy:
matrix: ${{ fromJSON(needs.gather-models.outputs.models) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
TORCHCHAT_ROOT: ${{ github.workspace }}
REPO_NAME: ${{ matrix.repo_name }}
ENABKE_ET_PYBIND: ${{ matrix.runner == 'macos-14' && 'false' || 'true' }}
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Print machine info
run: |
echo "$(uname -a)"
- name: Install dependencies
run: |
bash ${TORCHCHAT_ROOT}/scripts/install_et.sh $ENABKE_ET_PYBIND
- name: Download checkpoints
run: |
bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
- name: Run validation
run: |
pushd ${TORCHCHAT_ROOT}
export CHECKPOINT_PATH=./checkpoints/${REPO_NAME}/model.pth
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
bash .ci/scripts/validate.sh ${CHECKPOINT_PATH}
test-cuda:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
name: test-cuda (linux, ${{ matrix.repo_name }})
needs: gather-models
strategy:
matrix: ${{ fromJSON(needs.gather-models.outputs.models) }}
fail-fast: false
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"
echo "::group::Install required packages"
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
pip install -r ./requirements.txt
pip list
echo "::endgroup::"
echo "::group::Download checkpoint"
export REPO_NAME=${{ matrix.repo_name }}
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
echo "::endgroup::"
echo "::group::Convert checkpoint"
export CHECKPOINT_PATH=./checkpoints/${REPO_NAME}/model.pth
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
echo "::endgroup::"
echo "::group::Run inference"
bash .ci/scripts/validate.sh ${CHECKPOINT_PATH} cuda
echo "::endgroup::"