Merge branch 'master' into fix/overfit_sampler

Lightning-AI · Nov 15, 2021 · 00294e5 · 00294e5
2 parents 0b9bd64 + 1de3539
commit 00294e5
Show file tree

Hide file tree

Showing 38 changed files with 360 additions and 255 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,14 +1,14 @@
 ---
 name: Bug report
-about: Create a report to help us improve
+about: Create a bug report to help us improve
 title: ''
-labels: bug / fix, help wanted
+labels: bug
 assignees: ''
 ---
 
 ## 🐛 Bug
 
-<!-- A clear and concise description of what the bug is. -->
+<!-- A clear and concise description of the bug. -->
 
 ### To Reproduce
 

diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md
@@ -1,8 +1,8 @@
 ---
 name: Typos and doc fixes
-about: Typos and doc fixes
+about: Tell us about how we can improve our documentation
 title: ''
-labels: documentation
+labels: docs
 assignees: ''
 ---
 

diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,8 +1,8 @@
 ---
 name: Feature request
-about: Suggest an idea for this project
+about: Propose a feature for this project
 title: ''
-labels: enhancement
+labels: feature
 assignees: ''
 ---
 
@@ -12,7 +12,7 @@ assignees: ''
 
 ### Motivation
 
-<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
+<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link it here -->
 
 ### Pitch
 

diff --git a/.github/ISSUE_TEMPLATE/code_improvement.md → .github/ISSUE_TEMPLATE/refactor.md b/.github/ISSUE_TEMPLATE/code_improvement.md → .github/ISSUE_TEMPLATE/refactor.md
@@ -1,18 +1,18 @@
 ---
-name: Code improvement
-about: Suggest a code improvement, i.e. refactoring, deprecation, etc.
+name: Refactor
+about: Suggest a code refactor or deprecation
 title: ''
-labels: refactors / code health
+labels: refactor
 assignees: ''
 ---
 
-## Proposed refactoring or deprecation
+## Proposed refactor
 
-<!-- A clear and concise description of the code improvement -->
+<!-- A clear and concise description of the refactor -->
 
 ### Motivation
 
-<!-- Please outline the motivation for the proposal. If this is related to another GitHub issue, please link here too -->
+<!-- Please outline the motivation for the proposal. If this is related to another GitHub issue, please link it here -->
 
 ### Pitch
 

diff --git a/.github/lightning-probot.yml b/.github/lightning-probot.yml
@@ -0,0 +1 @@
+tracking_issue: 10530
diff --git a/.github/mergify.yml b/.github/mergify.yml
@@ -45,7 +45,7 @@ pull_request_rules:
       - "#changes-requested-reviews-by=0" # no requested changes
     actions:
       label:
-        add: [ "0:] Ready-To-Go" ]
+        add: [ "ready" ]
 
   - name: Not ready yet
     conditions:
@@ -54,13 +54,13 @@ pull_request_rules:
         - "#changes-requested-reviews-by>=1" # no requested changes
     actions:
       label:
-        remove: [ "0:] Ready-To-Go" ]
+        remove: [ "ready" ]
 
   - name: add core reviewer
     conditions:
       - -conflict # skip if conflict
       - -draft # filter-out GH draft PRs
-      - label="0:] Ready-To-Go"
+      - label="ready"
       - "#approved-reviews-by<3" # number of review approvals
       - "#review-requested<3" # number of requested reviews
     actions:

diff --git a/.github/stale.yml b/.github/stale.yml
@@ -8,8 +8,8 @@ issues:
     daysUntilClose: 7
     # Issues with these labels will never be considered stale
     exemptLabels:
-      - Important
-      - Priority
+      - p0
+      - p1
     # Comment to post when marking an issue as stale. Set to `false` to disable
     markComment: >
       This issue has been automatically marked as stale because it hasn't had any recent activity.

diff --git a/.github/workflows/probot-auto-cc.yml b/.github/workflows/probot-auto-cc.yml
@@ -0,0 +1,18 @@
+name: Probot
+
+on:
+  issues:
+    types:
+      - labeled
+  pull_request:
+    types:
+      - labeled
+
+jobs:
+  auto-cc:
+    if: ${{ github.repository_owner == 'PyTorchLightning' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: carmocca/probot@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -44,7 +44,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/issues/10103))
 
 
--
+- Deprecated `DistributedType` in favor of `_StrategyType` ([#10505](https://github.com/PyTorchLightning/pytorch-lightning/pull/10505))
 
 
 -
@@ -57,6 +57,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Removed
 
+- Removed deprecated parameter `method` in `pytorch_lightning.utilities.model_helpers.is_overridden` ([#10507](https://github.com/PyTorchLightning/pytorch-lightning/pull/10507))
+
+
 - Remove deprecated method `ClusterEnvironment.creates_children` ([#10339](https://github.com/PyTorchLightning/pytorch-lightning/issues/10339))
 
 
@@ -123,6 +126,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482))
 
+
+- Removed deprecated `disable_validation` property from Trainer ([#10450](https://github.com/PyTorchLightning/pytorch-lightning/pull/10450))
+
+
+- Removed deprecated `CheckpointConnector.hpc_load` property in favor of `CheckpointConnector.restore` ([#10525](https://github.com/PyTorchLightning/pytorch-lightning/pull/10525))
+
+
+
 ### Fixed
 
 - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702))
@@ -134,6 +145,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed sampler replacement logic with `overfit_batches` ([#10486](https://github.com/PyTorchLightning/pytorch-lightning/issues/10486))
 
 
+- Fixed `isinstance` not working with `init_meta_context`, materialized model not being moved to the device ([#10493](https://github.com/PyTorchLightning/metrics/pull/10493))
+
+
 - Fixed an issue that prevented the Trainer to shutdown workers when execution is interrupted due to failure([#10463](https://github.com/PyTorchLightning/pytorch-lightning/issues/10463))
 
 

diff --git a/docs/source/advanced/training_tricks.rst b/docs/source/advanced/training_tricks.rst
@@ -64,10 +64,7 @@ read `this post <https://pytorch.org/blog/pytorch-1.6-now-includes-stochastic-we
 
 .. testcode::
 
-    # Enable Stochastic Weight Averaging - uses the class defaults
-    trainer = Trainer(stochastic_weight_avg=True)
-
-    # alternatively, if you need to pass custom arguments
+    # Enable Stochastic Weight Averaging using the callback
     trainer = Trainer(callbacks=[StochasticWeightAveraging(...)])
 
 ----------

diff --git a/pytorch_lightning/core/mixins/device_dtype_mixin.py b/pytorch_lightning/core/mixins/device_dtype_mixin.py
@@ -17,6 +17,8 @@
 import torch
 from torch.nn import Module
 
+import pytorch_lightning as pl
+
 
 class DeviceDtypeModuleMixin(Module):
     __jit_unused_properties__ = ["device", "dtype"]
@@ -177,7 +179,9 @@ def __update_properties(
         self, device: Optional[torch.device] = None, dtype: Optional[Union[str, torch.dtype]] = None
     ) -> None:
         def apply_fn(module: Union["DeviceDtypeModuleMixin", Module]) -> None:
-            if not isinstance(module, DeviceDtypeModuleMixin):
+            # TODO: Find why `isinstance(module, DeviceDtypeModuleMixin)` doesn't
+            # work when using `init_meta_context`.
+            if not isinstance(module, (DeviceDtypeModuleMixin, pl.LightningModule)):
                 return
             if device is not None:
                 module._device = device

diff --git a/pytorch_lightning/lite/lite.py b/pytorch_lightning/lite/lite.py
@@ -41,7 +41,7 @@
 )
 from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector
 from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin
-from pytorch_lightning.utilities import DeviceType, DistributedType, move_data_to_device
+from pytorch_lightning.utilities import _StrategyType, DeviceType, move_data_to_device
 from pytorch_lightning.utilities.apply_func import apply_to_collection, convert_to_tensors
 from pytorch_lightning.utilities.data import has_iterable_dataset
 from pytorch_lightning.utilities.device_parser import _parse_devices
@@ -477,14 +477,14 @@ def _supported_device_types() -> Sequence[DeviceType]:
         )
 
     @staticmethod
-    def _supported_strategy_types() -> Sequence[DistributedType]:
+    def _supported_strategy_types() -> Sequence[_StrategyType]:
         return (
-            DistributedType.DP,
-            DistributedType.DDP,
-            DistributedType.DDP_SPAWN,
-            DistributedType.DEEPSPEED,
-            DistributedType.DDP_SHARDED,
-            DistributedType.DDP_SHARDED_SPAWN,
+            _StrategyType.DP,
+            _StrategyType.DDP,
+            _StrategyType.DDP_SPAWN,
+            _StrategyType.DEEPSPEED,
+            _StrategyType.DDP_SHARDED,
+            _StrategyType.DDP_SHARDED_SPAWN,
         )
 
     @staticmethod

diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py
@@ -55,7 +55,7 @@
     ReduceOp,
     sync_ddp_if_available,
 )
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import DeadlockDetectedException, MisconfigurationException
 from pytorch_lightning.utilities.seed import reset_seed
 from pytorch_lightning.utilities.types import STEP_OUTPUT
@@ -79,7 +79,7 @@ class DDPPlugin(ParallelPlugin):
     devices (e.g. GPU) per node. It is very similar to how :mod:`torch.distributed.launch` launches processes.
     """
 
-    distributed_backend = DistributedType.DDP
+    distributed_backend = _StrategyType.DDP
 
     def __init__(
         self,

diff --git a/pytorch_lightning/plugins/training_type/ddp2.py b/pytorch_lightning/plugins/training_type/ddp2.py
@@ -15,14 +15,14 @@
 
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.utilities.apply_func import apply_to_collection
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.types import _METRIC_COLLECTION
 
 
 class DDP2Plugin(DDPPlugin):
     """DDP2 behaves like DP in one node, but synchronization across nodes behaves like in DDP."""
 
-    distributed_backend = DistributedType.DDP2
+    distributed_backend = _StrategyType.DDP2
 
     @property
     def global_rank(self) -> int:

diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py
@@ -43,7 +43,7 @@
     ReduceOp,
     sync_ddp_if_available,
 )
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.seed import reset_seed
 from pytorch_lightning.utilities.types import STEP_OUTPUT
@@ -58,7 +58,7 @@ class DDPSpawnPlugin(ParallelPlugin):
     """Spawns processes using the :func:`torch.multiprocessing.spawn` method and joins processes after training
     finishes."""
 
-    distributed_backend = DistributedType.DDP_SPAWN
+    distributed_backend = _StrategyType.DDP_SPAWN
 
     def __init__(
         self,

diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -36,7 +36,7 @@
 from pytorch_lightning.utilities import AMPType, GradClipAlgorithmType
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.distributed import log, rank_zero_info, rank_zero_only
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE
 from pytorch_lightning.utilities.model_helpers import is_overridden
@@ -82,7 +82,7 @@ def _move_float_tensors_to_half(self, batch: Any):
 
 
 class DeepSpeedPlugin(DDPPlugin):
-    distributed_backend = DistributedType.DEEPSPEED
+    distributed_backend = _StrategyType.DEEPSPEED
     DEEPSPEED_ENV_VAR = "PL_DEEPSPEED_CONFIG_PATH"
 
     def __init__(

diff --git a/pytorch_lightning/plugins/training_type/dp.py b/pytorch_lightning/plugins/training_type/dp.py
@@ -20,7 +20,7 @@
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.utilities.apply_func import apply_to_collection
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.types import _METRIC_COLLECTION
 
@@ -29,7 +29,7 @@ class DataParallelPlugin(ParallelPlugin):
     """Implements data-parallel training in a single process, i.e., the model gets replicated to each device and
     each gets a split of the data."""
 
-    distributed_backend = DistributedType.DP
+    distributed_backend = _StrategyType.DP
 
     def __init__(
         self,

diff --git a/pytorch_lightning/plugins/training_type/fully_sharded.py b/pytorch_lightning/plugins/training_type/fully_sharded.py
@@ -20,7 +20,7 @@
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.utilities import _FAIRSCALE_FULLY_SHARDED_AVAILABLE
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _FAIRSCALE_FULLY_SHARDED_AVAILABLE:
@@ -30,7 +30,7 @@
 
 class DDPFullyShardedPlugin(DDPPlugin):
 
-    distributed_backend = DistributedType.DDP_FULLY_SHARDED
+    distributed_backend = _StrategyType.DDP_FULLY_SHARDED
 
     def __init__(
         self,

diff --git a/pytorch_lightning/plugins/training_type/horovod.py b/pytorch_lightning/plugins/training_type/horovod.py
@@ -26,7 +26,7 @@
 from pytorch_lightning.utilities.distributed import distributed_available
 from pytorch_lightning.utilities.distributed import group as dist_group
 from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 
 if _HOROVOD_AVAILABLE:
     import horovod.torch as hvd
@@ -35,7 +35,7 @@
 class HorovodPlugin(ParallelPlugin):
     """Plugin for Horovod distributed training integration."""
 
-    distributed_backend = DistributedType.HOROVOD
+    distributed_backend = _StrategyType.HOROVOD
 
     def __init__(
         self,

diff --git a/pytorch_lightning/plugins/training_type/sharded.py b/pytorch_lightning/plugins/training_type/sharded.py
@@ -23,7 +23,7 @@
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.trainer.states import TrainerFn
 from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, _FAIRSCALE_OSS_FP16_BROADCAST_AVAILABLE, rank_zero_only
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _FAIRSCALE_AVAILABLE:
@@ -36,7 +36,7 @@
 class DDPShardedPlugin(DDPPlugin):
     """Optimizer and gradient sharded training provided by FairScale."""
 
-    distributed_backend = DistributedType.DDP_SHARDED
+    distributed_backend = _StrategyType.DDP_SHARDED
     _REDUCE_BUFFER_SIZE_DEFAULT: int = 2 ** 23  # 8M
 
     def __init__(self, *args, **kwargs):

diff --git a/pytorch_lightning/plugins/training_type/sharded_spawn.py b/pytorch_lightning/plugins/training_type/sharded_spawn.py
@@ -24,7 +24,7 @@
 from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin
 from pytorch_lightning.trainer.states import TrainerFn
 from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, rank_zero_only
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _FAIRSCALE_AVAILABLE:
@@ -38,7 +38,7 @@
 class DDPSpawnShardedPlugin(DDPSpawnPlugin):
     """Optimizer sharded training provided by FairScale."""
 
-    distributed_backend = DistributedType.DDP_SHARDED_SPAWN
+    distributed_backend = _StrategyType.DDP_SHARDED_SPAWN
 
     def configure_ddp(self) -> None:
         trainer = self.lightning_module.trainer