Skip to content

Commit

Permalink
Merge branch 'PyTorchLightning:master' into bug/12768_datamodule_hpar…
Browse files Browse the repository at this point in the history
…am_update
  • Loading branch information
tanmoyio authored May 5, 2022
2 parents fd65af8 + 1a502c0 commit 0cbc78c
Show file tree
Hide file tree
Showing 303 changed files with 15,899 additions and 9,564 deletions.
2 changes: 1 addition & 1 deletion .actions/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"requirements.txt",
"requirements/extra.txt",
"requirements/loggers.txt",
# "requirements/test.txt",
"requirements/strategies.txt",
"requirements/examples.txt",
)

Expand Down
6 changes: 2 additions & 4 deletions .azure-pipelines/gpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pr:
jobs:
- job: pytest
# how long to run the job before automatically cancelling
timeoutInMinutes: "45"
timeoutInMinutes: "55"
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: "2"

Expand Down Expand Up @@ -51,9 +51,7 @@ jobs:
displayName: 'Image info & NVIDIA'
- bash: |
python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
pip install fairscale>=0.4.5
pip install deepspeed>=0.6.0
python -c "fname = 'requirements/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
pip install . --requirement requirements/devel.txt
Expand Down
5 changes: 3 additions & 2 deletions .azure-pipelines/hpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ pr:
- "release/*"

jobs:
- job: hpu
- job: tests

# how long to run the job before automatically cancelling
timeoutInMinutes: "5"
timeoutInMinutes: "10"
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: "2"

Expand All @@ -33,6 +33,7 @@ jobs:
displayName: 'Instance HW info'
- bash: |
pip install . --requirement requirements/extra.txt
pip install . --requirement requirements/test.txt
displayName: 'Install dependencies'
Expand Down
8 changes: 4 additions & 4 deletions .azure-pipelines/ipu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ variables:
value: "poplar_sdk-ubuntu_20_04-2.3.1+793-89796d462d"

jobs:
- job: ipu
- job: tests

# how long to run the job before automatically cancelling
timeoutInMinutes: "15"
pool: graphcore-ipus

workspace:
Expand Down Expand Up @@ -51,11 +53,9 @@ jobs:
- bash: |
export GIT_TERMINAL_PROMPT=1
python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)"
python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
python ./requirements/adjust-versions.py requirements/extra.txt
python ./requirements/adjust-versions.py requirements/examples.txt
pip install . --requirement requirements/devel.txt
pip install . --requirement ./requirements/devel-base.txt
pip list
displayName: 'Install dependencies'
Expand Down
12 changes: 7 additions & 5 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
*.yml @borda @tchaton @carmocca

# Docs
/docs/ @edenlightning @tchaton @borda @awaelchli
/.github/*.md @edenlightning @williamfalcon @borda
/.github/ISSUE_TEMPLATE/ @edenlightning @borda @tchaton
/docs/source/conf.py @borda @awaelchli @carmocca
/docs/source/index.rst @williamfalcon
/docs/ @edenlightning @tchaton @borda @awaelchli
/.github/*.md @edenlightning @williamfalcon @borda
/.github/ISSUE_TEMPLATE/ @edenlightning @borda @tchaton
/docs/source/conf.py @borda @awaelchli @carmocca
/docs/source/index.rst @williamfalcon
/docs/source/levels @williamfalcon
/docs/source/expertise_levels @williamfalcon

# Packages
/pytorch_lightning/accelerators @williamfalcon @tchaton @SeanNaren @awaelchli @justusschock @kaushikb11
Expand Down
11 changes: 8 additions & 3 deletions .github/workflows/ci_dockers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,15 @@ jobs:
strategy:
fail-fast: false
matrix:
python_version: ["3.7", "3.9"]
pytorch_version: ["1.8", "1.11"]
include:
# the config used in '.azure-pipelines/gpu-tests.yml'
- {python_version: "3.9", pytorch_version: "1.10"}
- {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.1"}
- {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"}
# latest (used in Tutorials)
- {python_version: "3.8", pytorch_version: "1.8", cuda_version: "11.1"}
- {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"}
- {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1"}
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
steps:
- name: Checkout
uses: actions/checkout@v2
Expand All @@ -88,6 +92,7 @@ jobs:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
CUDA_VERSION=${{ matrix.cuda_version }}
file: dockers/base-cuda/Dockerfile
push: false
timeout-minutes: 75
Expand Down
6 changes: 1 addition & 5 deletions .github/workflows/ci_test-full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,6 @@ jobs:
shell: bash

- name: Install extra dependencies
env:
HOROVOD_BUILD_ARCH_FLAGS: "-mfma"
HOROVOD_WITHOUT_MXNET: 1
HOROVOD_WITHOUT_TENSORFLOW: 1
run: |
# adjust versions according installed Torch version
python ./requirements/adjust-versions.py requirements/extra.txt
Expand All @@ -119,7 +115,7 @@ jobs:
HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
pip uninstall -y horovod
echo $(grep "horovod" requirements/extra.txt) > requirements/horovod.txt
grep "horovod" requirements/strategies.txt > requirements/horovod.txt
pip install --no-cache-dir -r requirements/horovod.txt
fi
horovodrun --check-build
Expand Down
16 changes: 2 additions & 14 deletions .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,10 @@ concurrency:
jobs:
mypy:
runs-on: ubuntu-20.04
#strategy:
# fail-fast: false
# matrix:
# include:
# - {python-version: "3.8", pytorch-version: "1.8"}
# - {python-version: "3.9", pytorch-version: "1.10"}
steps:
- uses: actions/checkout@master
- uses: actions/setup-python@v2
with:
# python-version: ${{ matrix.python-version }}
python-version: 3.9

# Note: This uses an internal pip API and may not always work
Expand All @@ -37,15 +30,10 @@ jobs:
${{ runner.os }}-pip-
- name: Install dependencies
env:
# TORCH_VERSION: ${{ matrix.pytorch-version }}
TORCH_VERSION: "1.10"
run: |
pip install "torch==$TORCH_VERSION" --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
# adjust versions according installed Torch version
pip install torch==1.11 --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
python ./requirements/adjust-versions.py requirements/extra.txt
python ./requirements/adjust-versions.py requirements/examples.txt
pip install '.[dev]' --upgrade-strategy only-if-needed --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
pip install '.[dev]'
pip list
- name: Type check
Expand Down
5 changes: 1 addition & 4 deletions .github/workflows/docs-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,9 @@ jobs:
sudo apt-get install -y cmake pandoc
pip --version
pip install -q fire
# remove Horovod from requirements
python .actions/assistant.py requirements_prune_pkgs horovod
# python -m pip install --upgrade --user pip
pip install --requirement requirements.txt --upgrade-strategy only-if-needed --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --quiet
pip install --requirement requirements/extra.txt
pip install --requirement requirements/loggers.txt
pip install --requirement requirements/devel-base.txt
pip install --requirement requirements/docs.txt
pip list
shell: bash
Expand Down
12 changes: 7 additions & 5 deletions .github/workflows/events-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,13 @@ jobs:
matrix:
include:
# the config used in '.azure-pipelines/gpu-tests.yml'
- {python_version: "3.7", pytorch_version: "1.8"}
- {python_version: "3.7", pytorch_version: "1.10"}
- {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.1"}
- {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"}
# latest (used in Tutorials)
- {python_version: "3.8", pytorch_version: "1.8"}
- {python_version: "3.9", pytorch_version: "1.10"}
- {python_version: "3.9", pytorch_version: "1.11"}
- {python_version: "3.8", pytorch_version: "1.8", cuda_version: "11.1"}
- {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"}
- {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1"}
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}

steps:
- name: Checkout
Expand All @@ -140,6 +141,7 @@ jobs:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
CUDA_VERSION=${{ matrix.cuda_version }}
file: dockers/base-cuda/Dockerfile
push: ${{ env.PUSH_TO_HUB }}
tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
Expand Down
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@ pip-wheel-metadata/
lightning_logs/
.vscode/

# Test-tube
test_tube_*/

# Documentations
docs/source/api
docs/source/*.md
Expand Down
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ repos:
args: ['--maxkb=350', '--enforce-all']
exclude: |
(?x)^(
CHANGELOG.md|
docs/source/_static/images/general/fast_2.gif|
docs/source/_static/images/mnist_imgs/pt_to_pl.jpg|
docs/source/_static/images/lightning_module/pt_to_pl.png|
Expand Down
99 changes: 92 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,47 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Added

-
- Added support for reloading the last checkpoint saved by passing `ckpt_path="last"` ([#12816](https://github.com/PyTorchLightning/pytorch-lightning/pull/12816))


- Added `LightningDataModule.load_from_checkpoint` to support loading datamodules directly from checkpoint ([#12550](https://github.com/PyTorchLightning/pytorch-lightning/pull/12550))


- Added a friendly error message when attempting to call `Trainer.save_checkpoint()` without a model attached ([#12772](https://github.com/PyTorchLightning/pytorch-lightning/pull/12772))


- Added a friendly error message when attempting to use `DeepSpeedStrategy` on unsupported accelerators ([#12699](https://github.com/PyTorchLightning/pytorch-lightning/pull/12699))


- Enabled `torch.inference_mode` for evaluation and prediction ([#12715](https://github.com/PyTorchLightning/pytorch-lightning/pull/12715))


- Added support for setting `val_check_interval` to a value higher than the amount of training batches when `check_val_every_n_epoch=None` ([#11993](https://github.com/PyTorchLightning/pytorch-lightning/pull/11993))


- Include the `pytorch_lightning` version as a header in the CLI config files ([#12532](https://github.com/PyTorchLightning/pytorch-lightning/pull/12532))


-
- Added support for `Callback` registration through entry points ([#12739](https://github.com/PyTorchLightning/pytorch-lightning/pull/12739))


-
- Added support for `Trainer(deterministic="warn")` to warn instead of fail when a non-deterministic operation is encountered ([#12588](https://github.com/PyTorchLightning/pytorch-lightning/pull/12588))


-
- Added `CollaborativeStrategy` ([#12842](https://github.com/PyTorchLightning/pytorch-lightning/pull/12842))


- Include a version suffix for new "last" checkpoints of later runs in the same directory ([#12902](https://github.com/PyTorchLightning/pytorch-lightning/pull/12902))


- Added missing `predict_dataset` argument in `LightningDataModule.from_datasets` to create predict dataloaders ([#12942](https://github.com/PyTorchLightning/pytorch-lightning/pull/12942))


- Added class name prefix to metrics logged by `DeviceStatsMonitor` ([#12228](https://github.com/PyTorchLightning/pytorch-lightning/pull/12228))


- Added Native FSDP Strategy ([#12447](https://github.com/PyTorchLightning/pytorch-lightning/pull/12447))


### Changed

Expand All @@ -40,10 +68,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Marked `swa_lrs` argument in `StochasticWeightAveraging` callback as required ([#12556](https://github.com/PyTorchLightning/pytorch-lightning/pull/12556))


-
- `LightningCLI`'s shorthand notation changed to use jsonargparse native feature ([#12614](https://github.com/PyTorchLightning/pytorch-lightning/pull/12614))


-
- Changed `seed_everything_default` argument in the `LightningCLI` to type `Union[bool, int]`. If set to `True` a seed is automatically generated for the parser argument `--seed_everything`. ([#12822](https://github.com/PyTorchLightning/pytorch-lightning/pull/12822))


- Make positional arguments required for classes passed into the `add_argparse_args` function. ([#12504](https://github.com/PyTorchLightning/pytorch-lightning/pull/12504))


- Raise an error if there are insufficient training batches when using a float value of `limit_train_batches` ([#12885](https://github.com/PyTorchLightning/pytorch-lightning/pull/12885))


-
Expand All @@ -57,7 +91,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Deprecated `num_processes`, `gpus`, `tpu_cores,` and `ipus` from the `Trainer` constructor in favor of using the `accelerator` and `devices` arguments ([#11040](https://github.com/PyTorchLightning/pytorch-lightning/pull/11040))


-
- Deprecated setting `LightningCLI(seed_everything_default=None)` in favor of `False` ([#12804](https://github.com/PyTorchLightning/pytorch-lightning/issues/12804)).


-
Expand All @@ -67,6 +101,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Removed

- Removed the deprecated `TestTubeLogger` ([#12859](https://github.com/PyTorchLightning/pytorch-lightning/pull/12859))


- Removed the deprecated `pytorch_lightning.core.memory.LayerSummary` and `pytorch_lightning.core.memory.ModelSummary` ([#12593](https://github.com/PyTorchLightning/pytorch-lightning/pull/12593))


Expand Down Expand Up @@ -118,16 +155,64 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Removed support for passing strategy names or strategy instances to the plugins Trainer argument ([#12700](https://github.com/PyTorchLightning/pytorch-lightning/pull/12700))


- Removed the deprecated `val_transforms` argument from the `LightningDataModule` constructor ([#12763](https://github.com/PyTorchLightning/pytorch-lightning/pull/12763))


- Removed the deprecated `test_transforms` argument from the `LightningDataModule` constructor ([#12773](https://github.com/PyTorchLightning/pytorch-lightning/pull/12773))


- Removed deprecated `dataloader_idx` argument from `on_train_batch_start/end` hooks `Callback` and `LightningModule` ([#12769](https://github.com/PyTorchLightning/pytorch-lightning/pull/12769))


- Removed deprecated `get_progress_bar_dict` property from `LightningModule` ([#12839](https://github.com/PyTorchLightning/pytorch-lightning/pull/12839))

### Fixed

- Fixed an issue causing zero-division error for empty dataloaders ([#12885](https://github.com/PyTorchLightning/pytorch-lightning/pull/12885))


-


-


-


-


## [1.6.3] - 2022-05-03

### Fixed

- Use only a single instance of `rich.console.Console` throughout codebase ([#12886](https://github.com/PyTorchLightning/pytorch-lightning/pull/12886))
- Fixed an issue to ensure all the checkpoint states are saved in a common filepath with `DeepspeedStrategy` ([#12887](https://github.com/PyTorchLightning/pytorch-lightning/pull/12887))
- Fixed `trainer.logger` deprecation message ([#12671](https://github.com/PyTorchLightning/pytorch-lightning/pull/12671))
- Fixed an issue where sharded grad scaler is passed in when using BF16 with the `ShardedStrategy` ([#12915](https://github.com/PyTorchLightning/pytorch-lightning/pull/12915))
- Fixed an issue wrt recursive invocation of DDP configuration in hpu parallel plugin ([#12912](https://github.com/PyTorchLightning/pytorch-lightning/pull/12912))
- Fixed printing of ragged dictionaries in `Trainer.validate` and `Trainer.test` ([#12857](https://github.com/PyTorchLightning/pytorch-lightning/pull/12857))
- Fixed threading support for legacy loading of checkpoints ([#12814](https://github.com/PyTorchLightning/pytorch-lightning/pull/12814))
- Fixed pickling of `KFoldLoop` ([#12441](https://github.com/PyTorchLightning/pytorch-lightning/pull/12441))
- Stopped `optimizer_zero_grad` from being called after IPU execution ([#12913](https://github.com/PyTorchLightning/pytorch-lightning/pull/12913))
- Fixed `fuse_modules` to be qat-aware for `torch>=1.11` ([#12891](https://github.com/PyTorchLightning/pytorch-lightning/pull/12891))
- Enforced eval shuffle warning only for default samplers in DataLoader ([#12653](https://github.com/PyTorchLightning/pytorch-lightning/pull/12653))
- Enable mixed precision in `DDPFullyShardedStrategy` when `precision=16` ([#12965](https://github.com/PyTorchLightning/pytorch-lightning/pull/12965))
- Fixed `TQDMProgressBar` reset and update to show correct time estimation ([#12889](https://github.com/PyTorchLightning/pytorch-lightning/pull/12889))
- Fixed fit loop restart logic to enable resume using the checkpoint ([#12821](https://github.com/PyTorchLightning/pytorch-lightning/pull/12821))


## [1.6.2] - 2022-04-27

### Fixed

- Fixed `ImportError` when `torch.distributed` is not available. ([#12794](https://github.com/PyTorchLightning/pytorch-lightning/pull/12794))
- When using custom DataLoaders in LightningDataModule, multiple inheritance is resolved properly ([#12716](https://github.com/PyTorchLightning/pytorch-lightning/pull/12716))
- Fixed encoding issues on terminals that do not support unicode characters ([#12828](https://github.com/PyTorchLightning/pytorch-lightning/pull/12828))
- Fixed support for `ModelCheckpoint` monitors with dots ([#12783](https://github.com/PyTorchLightning/pytorch-lightning/pull/12783))


## [1.6.1] - 2022-04-13

### Changed
Expand Down
Loading

0 comments on commit 0cbc78c

Please sign in to comment.