Skip to content

Commit

Permalink
Add PyTorch 2.3 to CI matrix (#19708)
Browse files Browse the repository at this point in the history
  • Loading branch information
awaelchli authored Apr 29, 2024
1 parent 2913633 commit 49ed2b1
Show file tree
Hide file tree
Showing 19 changed files with 46 additions and 26 deletions.
3 changes: 1 addition & 2 deletions .azure/gpu-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ jobs:
variables:
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
container:
# TODO: Upgrade to Python 3.11
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0"
options: "--gpus=all --shm-size=32g"
strategy:
matrix:
Expand Down
5 changes: 2 additions & 3 deletions .azure/gpu-tests-fabric.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,11 @@ jobs:
options: "--gpus=all --shm-size=2gb -v /var/tmp:/var/tmp"
strategy:
matrix:
# TODO: Upgrade to Python 3.11
"Fabric | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0"
PACKAGE_NAME: "fabric"
"Lightning | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0"
PACKAGE_NAME: "lightning"
workspace:
clean: all
Expand Down
5 changes: 2 additions & 3 deletions .azure/gpu-tests-pytorch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,11 @@ jobs:
cancelTimeoutInMinutes: "2"
strategy:
matrix:
# TODO: Upgrade to Python 3.11
"PyTorch | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0"
PACKAGE_NAME: "pytorch"
"Lightning | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0"
PACKAGE_NAME: "lightning"
pool: lit-rtx-3090
variables:
Expand Down
6 changes: 6 additions & 0 deletions .github/checkgroup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@ subprojects:
- "pl-cpu (macOS-11, lightning, 3.10, 2.0)"
- "pl-cpu (macOS-11, lightning, 3.10, 2.1)"
- "pl-cpu (macOS-11, lightning, 3.10, 2.2)"
- "pl-cpu (macOS-14, lightning, 3.10, 2.3)"
- "pl-cpu (ubuntu-20.04, lightning, 3.8, 2.0, oldest)"
- "pl-cpu (ubuntu-20.04, lightning, 3.10, 2.0)"
- "pl-cpu (ubuntu-20.04, lightning, 3.10, 2.1)"
- "pl-cpu (ubuntu-20.04, lightning, 3.10, 2.2)"
- "pl-cpu (ubuntu-20.04, lightning, 3.10, 2.3)"
- "pl-cpu (windows-2022, lightning, 3.8, 2.0, oldest)"
- "pl-cpu (windows-2022, lightning, 3.10, 2.0)"
- "pl-cpu (windows-2022, lightning, 3.10, 2.1)"
- "pl-cpu (windows-2022, lightning, 3.10, 2.2)"
- "pl-cpu (windows-2022, lightning, 3.10, 2.3)"
- "pl-cpu (macOS-11, pytorch, 3.8, 2.0)"
- "pl-cpu (ubuntu-20.04, pytorch, 3.8, 2.0)"
- "pl-cpu (windows-2022, pytorch, 3.8, 2.0)"
Expand Down Expand Up @@ -171,14 +174,17 @@ subprojects:
- "fabric-cpu (macOS-11, lightning, 3.10, 2.0)"
- "fabric-cpu (macOS-11, lightning, 3.11, 2.1)"
- "fabric-cpu (macOS-11, lightning, 3.11, 2.2)"
- "fabric-cpu (macOS-14, lightning, 3.10, 2.3)"
- "fabric-cpu (ubuntu-20.04, lightning, 3.8, 2.0, oldest)"
- "fabric-cpu (ubuntu-20.04, lightning, 3.10, 2.0)"
- "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.1)"
- "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.2)"
- "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.3)"
- "fabric-cpu (windows-2022, lightning, 3.8, 2.0, oldest)"
- "fabric-cpu (windows-2022, lightning, 3.10, 2.0)"
- "fabric-cpu (windows-2022, lightning, 3.11, 2.1)"
- "fabric-cpu (windows-2022, lightning, 3.11, 2.2)"
- "fabric-cpu (windows-2022, lightning, 3.11, 2.3)"
- "fabric-cpu (macOS-11, fabric, 3.8, 2.0)"
- "fabric-cpu (ubuntu-20.04, fabric, 3.8, 2.0)"
- "fabric-cpu (windows-2022, fabric, 3.8, 2.0)"
Expand Down
7 changes: 6 additions & 1 deletion .github/workflows/ci-tests-fabric.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ jobs:
- { os: "macOS-11", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" }
- { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" }
- { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" }
- { os: "macOS-14", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.3" }
- { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" }
- { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" }
# only run PyTorch latest with Python latest, use Fabric scope to limit dependency issues
- { os: "macOS-12", pkg-name: "fabric", python-version: "3.11", pytorch-version: "2.0" }
- { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.11", pytorch-version: "2.0" }
Expand Down Expand Up @@ -83,6 +86,8 @@ jobs:
PYPI_CACHE_DIR: "_pip-wheels"
TORCH_URL_STABLE: "https://download.pytorch.org/whl/cpu/torch_stable.html"
TORCH_URL_TEST: "https://download.pytorch.org/whl/test/cpu/torch_test.html"
# TODO: Remove this - Enable running MPS tests on this platform
DISABLE_MPS: ${{ matrix.os == 'macOS-14' && '1' || '0' }}
steps:
- uses: actions/checkout@v4

Expand Down Expand Up @@ -119,7 +124,7 @@ jobs:
- name: Env. variables
run: |
# Switch PyTorch URL
python -c "print('TORCH_URL=' + str('${{env.TORCH_URL_TEST}}' if '${{ matrix.pytorch-version }}' == '2.2' else '${{env.TORCH_URL_STABLE}}'))" >> $GITHUB_ENV
python -c "print('TORCH_URL=' + str('${{env.TORCH_URL_TEST}}' if '${{ matrix.pytorch-version }}' == '2.3' else '${{env.TORCH_URL_STABLE}}'))" >> $GITHUB_ENV
# Switch coverage scope
python -c "print('COVERAGE_SCOPE=' + str('lightning' if '${{matrix.pkg-name}}' == 'lightning' else 'lightning_fabric'))" >> $GITHUB_ENV
# if you install mono-package set dependency only for this subpackage
Expand Down
7 changes: 6 additions & 1 deletion .github/workflows/ci-tests-pytorch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ jobs:
- { os: "macOS-11", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.2" }
- { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.2" }
- { os: "windows-2022", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.2" }
- { os: "macOS-14", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.3" }
- { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.3" }
- { os: "windows-2022", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.3" }
# only run PyTorch latest with Python latest, use PyTorch scope to limit dependency issues
- { os: "macOS-12", pkg-name: "pytorch", python-version: "3.11", pytorch-version: "2.0" }
- { os: "ubuntu-22.04", pkg-name: "pytorch", python-version: "3.11", pytorch-version: "2.0" }
Expand Down Expand Up @@ -88,6 +91,8 @@ jobs:
TORCH_URL_TEST: "https://download.pytorch.org/whl/test/cpu/torch_test.html"
FREEZE_REQUIREMENTS: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }}
PYPI_CACHE_DIR: "_pip-wheels"
# TODO: Remove this - Enable running MPS tests on this platform
DISABLE_MPS: ${{ matrix.os == 'macOS-14' && '1' || '0' }}
steps:
- uses: actions/checkout@v4

Expand Down Expand Up @@ -125,7 +130,7 @@ jobs:
- name: Env. variables
run: |
# Switch PyTorch URL
python -c "print('TORCH_URL=' + str('${{env.TORCH_URL_TEST}}' if '${{ matrix.pytorch-version }}' == '2.2' else '${{env.TORCH_URL_STABLE}}'))" >> $GITHUB_ENV
python -c "print('TORCH_URL=' + str('${{env.TORCH_URL_TEST}}' if '${{ matrix.pytorch-version }}' == '2.3' else '${{env.TORCH_URL_STABLE}}'))" >> $GITHUB_ENV
# Switch coverage scope
python -c "print('COVERAGE_SCOPE=' + str('lightning' if '${{matrix.pkg-name}}' == 'lightning' else 'pytorch_lightning'))" >> $GITHUB_ENV
# if you install mono-package set dependency only for this subpackage
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ jobs:
- { python_version: "3.10", pytorch_version: "2.2", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" }
# - { python_version: "3.12", pytorch_version: "2.2", cuda_version: "12.1.0" } # todo: pending on `onnxruntime`
steps:
- uses: actions/checkout@v4
Expand Down
6 changes: 0 additions & 6 deletions dockers/base-cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,7 @@ ENV \
MAKEFLAGS="-j2"

RUN \
# TODO: Remove the manual key installation once the base image is updated.
# https://github.com/NVIDIA/nvidia-docker/issues/1631
# https://github.com/NVIDIA/nvidia-docker/issues/1631#issuecomment-1264715214
apt-get update && apt-get install -y wget && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
mkdir -p /etc/apt/keyrings/ && mv 3bf863cc.pub /etc/apt/keyrings/ && \
echo "deb [signed-by=/etc/apt/keyrings/3bf863cc.pub] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" /etc/apt/sources.list.d/cuda.list && \
apt-get update -qq --fix-missing && \
NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \
CUDA_VERSION_MM=${CUDA_VERSION%.*} && \
Expand Down
2 changes: 1 addition & 1 deletion requirements/fabric/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

numpy >=1.17.2, <1.27.0
torch >=2.0.0, <2.3.0
torch >=2.0.0, <2.4.0
fsspec[http] >=2022.5.0, <2023.11.0
packaging >=20.0, <=23.1
typing-extensions >=4.4.0, <4.10.0
Expand Down
2 changes: 1 addition & 1 deletion requirements/fabric/examples.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

torchvision >=0.15.0, <0.18.0
torchvision >=0.15.0, <0.19.0
torchmetrics >=0.10.0, <1.3.0
lightning-utilities >=0.8.0, <0.12.0
2 changes: 1 addition & 1 deletion requirements/pytorch/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

numpy >=1.17.2, <1.27.0
torch >=2.0.0, <2.3.0
torch >=2.0.0, <2.4.0
tqdm >=4.57.0, <4.67.0
PyYAML >=5.4, <6.1.0
fsspec[http] >=2022.5.0, <2023.11.0
Expand Down
3 changes: 1 addition & 2 deletions requirements/pytorch/examples.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

requests <2.32.0
torchvision >=0.15.0, <0.18.0
gym[classic_control] >=0.17.0, <0.27.0
torchvision >=0.15.0, <0.19.0
ipython[all] <8.15.0
torchmetrics >=0.10.0, <1.3.0
lightning-utilities >=0.8.0, <0.12.0
2 changes: 1 addition & 1 deletion src/lightning/fabric/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Enabled consolidating distributed checkpoints through `fabric consolidate` in the new CLI [#19560](https://github.com/Lightning-AI/pytorch-lightning/pull/19560))

-
- Added support for PyTorch 2.3 ([#19708](https://github.com/Lightning-AI/pytorch-lightning/pull/19708))

-

Expand Down
4 changes: 3 additions & 1 deletion src/lightning/fabric/accelerators/mps.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import platform
from functools import lru_cache
from typing import List, Optional, Union
Expand Down Expand Up @@ -70,7 +71,8 @@ def auto_device_count() -> int:
@lru_cache(1)
def is_available() -> bool:
"""MPS is only available on a machine with the ARM-based Apple Silicon processors."""
return torch.backends.mps.is_available() and platform.processor() in ("arm", "arm64")
mps_disabled = os.getenv("DISABLE_MPS", "0") == "1"
return not mps_disabled and torch.backends.mps.is_available() and platform.processor() in ("arm", "arm64")

@classmethod
@override
Expand Down
3 changes: 2 additions & 1 deletion src/lightning/pytorch/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Added `on_exception` hook to `LightningDataModule` ([#19601](https://github.com/Lightning-AI/pytorch-lightning/pull/19601))

-
- Added support for PyTorch 2.3 ([#19708](https://github.com/Lightning-AI/pytorch-lightning/pull/19708))


### Changed

Expand Down
4 changes: 3 additions & 1 deletion tests/tests_fabric/plugins/precision/test_fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ def test_fsdp_precision_scaler_with_bf16():
@RunIf(min_cuda_gpus=1)
def test_fsdp_precision_forward_context():
"""Test to ensure that the context manager correctly is set to bfloat16."""
from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler

precision = FSDPPrecision(precision="16-mixed")
assert isinstance(precision.scaler, torch.cuda.amp.GradScaler)
assert isinstance(precision.scaler, ShardedGradScaler)
assert torch.get_default_dtype() == torch.float32
with precision.forward_context():
assert torch.get_autocast_gpu_dtype() == torch.float16
Expand Down
5 changes: 5 additions & 0 deletions tests/tests_pytorch/callbacks/test_finetuning_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import pytest
import torch
from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_2_3
from lightning.pytorch import LightningModule, Trainer, seed_everything
from lightning.pytorch.callbacks import BackboneFinetuning, BaseFinetuning, ModelCheckpoint
from lightning.pytorch.demos.boring_classes import BoringModel, RandomDataset
Expand Down Expand Up @@ -359,6 +360,8 @@ def test_callbacks_restore(tmp_path):
"foreach": None,
"differentiable": False,
}
if _TORCH_GREATER_EQUAL_2_3:
expected["fused"] = None

assert callback._internal_optimizer_metadata[0][0] == expected

Expand All @@ -374,6 +377,8 @@ def test_callbacks_restore(tmp_path):
"foreach": None,
"differentiable": False,
}
if _TORCH_GREATER_EQUAL_2_3:
expected["fused"] = None

assert callback._internal_optimizer_metadata[0][1] == expected

Expand Down
1 change: 1 addition & 0 deletions tests/tests_pytorch/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def restore_env_variables():
"KMP_DUPLICATE_LIB_OK", # leaked by PyTorch
"CRC32C_SW_MODE", # leaked by tensorboardX
"TRITON_CACHE_DIR", # leaked by torch.compile
"_TORCHINDUCTOR_PYOBJECT_TENSOR_DATA_PTR", # leaked by torch.compile
"OMP_NUM_THREADS", # set by our launchers
# leaked by XLA
"ALLOW_MULTIPLE_LIBTPU_LOAD",
Expand Down
4 changes: 3 additions & 1 deletion tests/tests_pytorch/plugins/precision/test_fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ def test_fsdp_precision_scaler_with_bf16():
@RunIf(min_cuda_gpus=1)
def test_fsdp_precision_forward_context():
"""Test to ensure that the context manager correctly is set to bfloat16."""
from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler

precision = FSDPPrecision(precision="16-mixed")
assert isinstance(precision.scaler, torch.cuda.amp.GradScaler)
assert isinstance(precision.scaler, ShardedGradScaler)
assert torch.get_default_dtype() == torch.float32
with precision.forward_context():
assert torch.get_autocast_gpu_dtype() == torch.float16
Expand Down

0 comments on commit 49ed2b1

Please sign in to comment.