-
Notifications
You must be signed in to change notification settings - Fork 3.4k
158 lines (148 loc) · 6.43 KB
/
docker-build.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
name: Docker builds
on:
push:
branches: [master, "release/*"]
pull_request:
branches: [master, "release/*"]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- ".actions/*"
- ".github/workflows/docker-build.yml"
- "dockers/**"
- "requirements/*.txt"
- "requirements/pytorch/**"
- "requirements/fabric/**"
- "setup.py"
- "!requirements/*/docs.txt"
- "!*.md"
- "!**/*.md"
schedule:
- cron: "0 0 * * *" # at the end of every day
release:
types: [published]
workflow_dispatch: {}
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}-${{ github.event_name }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
env:
PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }}
jobs:
build-pl:
# the images generated by this job are not used anywhere in this repository. they are just meant to be available
# for users
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
# We only release one docker image per PyTorch version.
# Make sure the matrix here matches the one below.
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" }
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" }
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" }
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" }
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
if: env.PUSH_RELEASE == 'true' && github.repository_owner == 'Lightning-AI'
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Get release version
if: github.event_name == 'release'
# For workflows triggered by release, `GITHUB_REF` is the release tag created.
run: echo "RELEASE_VERSION=$(echo ${GITHUB_REF##*/})" >> $GITHUB_ENV
- name: Set tags
run: |
import os
repo = "pytorchlightning/pytorch_lightning"
ver = os.getenv('RELEASE_VERSION')
py_ver = "${{ matrix.python_version }}"
pt_ver = "${{ matrix.pytorch_version }}"
cuda_ver = "${{ matrix.cuda_version }}"
tags = [f"latest-py{py_ver}-torch{pt_ver}-cuda{cuda_ver}"]
if ver:
tags += [f"{ver}-py{py_ver}-torch{pt_ver}-cuda{cuda_ver}"]
if py_ver == '3.10' and pt_ver == '2.1' and cuda_ver == '12.1.0':
tags += ["latest"]
tags = [f"{repo}:{tag}" for tag in tags]
with open(os.getenv('GITHUB_ENV'), "a") as gh_env:
gh_env.write("DOCKER_TAGS=" + ",".join(tags))
shell: python
- uses: docker/build-push-action@v5
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
CUDA_VERSION=${{ matrix.cuda_version }}
LIGHTNING_VERSION=${{ env.RELEASE_VERSION }}
file: dockers/release/Dockerfile
push: ${{ env.PUSH_RELEASE }} # pushed in release-docker.yml only when PL is released
tags: ${{ env.DOCKER_TAGS }}
timeout-minutes: 35
build-cuda:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
# These are the base images for PL release docker images.
# Make sure the matrix here matches the one above.
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" }
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" }
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" }
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" }
- { python_version: "3.10", pytorch_version: "2.2", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" }
# - { python_version: "3.12", pytorch_version: "2.2", cuda_version: "12.1.0" } # todo: pending on `onnxruntime`
steps:
- uses: actions/checkout@v4
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
if: env.PUSH_NIGHTLY == 'true' && github.repository_owner == 'Lightning-AI'
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- uses: docker/build-push-action@v5
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
CUDA_VERSION=${{ matrix.cuda_version }}
file: dockers/base-cuda/Dockerfile
push: ${{ env.PUSH_NIGHTLY }}
tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }}"
timeout-minutes: 95
- uses: ravsamhq/notify-slack-action@v2
if: failure() && env.PUSH_NIGHTLY == 'true'
with:
status: ${{ job.status }}
token: ${{ secrets.GITHUB_TOKEN }}
notification_title: ${{ format('CUDA; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }}
message_format: "{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>" # akihironitta
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
build-NGC:
if: github.event.pull_request.draft == false
# fixme: use larger machine or optimize image size
# runs-on: ubuntu-latest-4-cores
# then drop continue-on-error
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Build Conda Docker
# publish master/release
continue-on-error: true
uses: docker/build-push-action@v5
with:
file: dockers/nvidia/Dockerfile
push: false
timeout-minutes: 55