CI: enable testing with coming PT 2.2 #1097
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Docker builds | |
on: | |
push: | |
branches: [master, "release/*"] | |
pull_request: | |
branches: [master, "release/*"] | |
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped | |
paths: | |
- ".actions/*" | |
- ".github/workflows/docker-build.yml" | |
- "dockers/**" | |
- "requirements/*.txt" | |
- "requirements/pytorch/**" | |
- "requirements/fabric/**" | |
- "setup.py" | |
- "!requirements/*/docs.txt" | |
- "!*.md" | |
- "!**/*.md" | |
schedule: | |
- cron: "0 0 * * *" # at the end of every day | |
release: | |
types: [published] | |
workflow_dispatch: {} | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}-${{ github.event_name }} | |
cancel-in-progress: ${{ github.event_name == 'pull_request' }} | |
env: | |
PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} | |
PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} | |
jobs: | |
build-pl: | |
# the images generated by this job are not used anywhere in this repository. they are just meant to be available | |
# for users | |
if: github.event.pull_request.draft == false | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
# We only release one docker image per PyTorch version. | |
# The matrix here is the same as the one in release-docker.yml. | |
- { python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.7.1" } | |
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" } | |
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } | |
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } | |
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- uses: docker/setup-buildx-action@v3 | |
- uses: docker/login-action@v3 | |
if: env.PUSH_RELEASE == 'true' && github.repository_owner == 'Lightning-AI' | |
with: | |
username: ${{ secrets.DOCKER_USERNAME }} | |
password: ${{ secrets.DOCKER_PASSWORD }} | |
- name: Get release version | |
if: github.event_name == 'release' | |
# For workflows triggered by release, `GITHUB_REF` is the release tag created. | |
run: echo "RELEASE_VERSION=$(echo ${GITHUB_REF##*/})" >> $GITHUB_ENV | |
- name: Set tags | |
run: | | |
import os | |
repo = "pytorchlightning/pytorch_lightning" | |
ver = os.getenv('RELEASE_VERSION') | |
py_ver = "${{ matrix.python_version }}" | |
pt_ver = "${{ matrix.pytorch_version }}" | |
cuda_ver = "${{ matrix.cuda_version }}" | |
tags = [f"latest-py{py_ver}-torch{pt_ver}-cuda{cuda_ver}"] | |
if ver: | |
tags += [f"{ver}-py{py_ver}-torch{pt_ver}-cuda{cuda_ver}"] | |
if py_ver == '3.10' and pt_ver == '2.1' and cuda_ver == '12.1.0': | |
tags += ["latest"] | |
tags = [f"{repo}:{tag}" for tag in tags] | |
with open(os.getenv('GITHUB_ENV'), "a") as gh_env: | |
gh_env.write("DOCKER_TAGS=" + ",".join(tags)) | |
shell: python | |
- uses: docker/build-push-action@v5 | |
with: | |
build-args: | | |
PYTHON_VERSION=${{ matrix.python_version }} | |
PYTORCH_VERSION=${{ matrix.pytorch_version }} | |
CUDA_VERSION=${{ matrix.cuda_version }} | |
LIGHTNING_VERSION=${{ env.RELEASE_VERSION }} | |
file: dockers/release/Dockerfile | |
push: ${{ env.PUSH_RELEASE }} # pushed in release-docker.yml only when PL is released | |
tags: ${{ env.DOCKER_TAGS }} | |
timeout-minutes: 35 | |
build-cuda: | |
if: github.event.pull_request.draft == false | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
# These are the base images for PL release docker images, | |
# so include at least all the combinations in release-dockers.yml. | |
- { python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.7.1" } | |
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" } | |
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } | |
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } | |
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } | |
- { python_version: "3.10", pytorch_version: "2.2", cuda_version: "12.1.0" } | |
- { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } | |
- { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } | |
# - { python_version: "3.12", pytorch_version: "2.2", cuda_version: "12.1.0" } # todo: pending on `onnxruntime` | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: docker/setup-buildx-action@v3 | |
- uses: docker/login-action@v3 | |
if: env.PUSH_NIGHTLY == 'true' && github.repository_owner == 'Lightning-AI' | |
with: | |
username: ${{ secrets.DOCKER_USERNAME }} | |
password: ${{ secrets.DOCKER_PASSWORD }} | |
- uses: docker/build-push-action@v5 | |
with: | |
build-args: | | |
PYTHON_VERSION=${{ matrix.python_version }} | |
PYTORCH_VERSION=${{ matrix.pytorch_version }} | |
CUDA_VERSION=${{ matrix.cuda_version }} | |
file: dockers/base-cuda/Dockerfile | |
push: ${{ env.PUSH_NIGHTLY }} | |
tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }}" | |
timeout-minutes: 95 | |
- uses: ravsamhq/notify-slack-action@v2 | |
if: failure() && env.PUSH_NIGHTLY == 'true' | |
with: | |
status: ${{ job.status }} | |
token: ${{ secrets.GITHUB_TOKEN }} | |
notification_title: ${{ format('CUDA; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} | |
message_format: "{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>" # akihironitta | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
build-NGC: | |
if: github.event.pull_request.draft == false | |
# fixme: use larger machine or optimize image size | |
# runs-on: ubuntu-latest-4-cores | |
# then drop continue-on-error | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Build Conda Docker | |
# publish master/release | |
continue-on-error: true | |
uses: docker/build-push-action@v5 | |
with: | |
file: dockers/nvidia/Dockerfile | |
push: false | |
timeout-minutes: 55 |