Skip to content

Commit

Permalink
merge master for the 100th time
Browse files Browse the repository at this point in the history
  • Loading branch information
awaelchli committed Sep 29, 2022
2 parents 058c897 + a45c047 commit eb69b9e
Show file tree
Hide file tree
Showing 10 changed files with 27 additions and 44 deletions.
2 changes: 1 addition & 1 deletion .azure/hpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
cancelTimeoutInMinutes: "2"
pool: intel-hpus
container:
image: "vault.habana.ai/gaudi-docker/1.6.0/ubuntu20.04/habanalabs/pytorch-installer-1.12.0:latest"
image: "vault.habana.ai/gaudi-docker/1.6.1/ubuntu20.04/habanalabs/pytorch-installer-1.12.0:latest"
options: "--runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host --shm-size=4g --name cd-container -v /usr/bin/docker:/tmp/docker:ro"
workspace:
clean: all
Expand Down
8 changes: 4 additions & 4 deletions dockers/base-ipu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ LABEL maintainer="Lightning-AI <https://github.com/Lightning-AI>"

ARG PYTHON_VERSION=3.9
ARG CONDA_VERSION=4.9.2
ARG PYTORCH_VERSION=1.9

SHELL ["/bin/bash", "-c"]

Expand Down Expand Up @@ -56,13 +57,12 @@ ENV \

COPY environment.yml environment.yml

RUN conda create -y --name $CONDA_ENV python=${PYTHON_VERSION} pytorch=${PYTORCH_VERSION} cudatoolkit=${CUDA_VERSION} -c pytorch && \
conda init bash && \
RUN conda init bash && \
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'python>=[\d\.]+', 'python=${PYTHON_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- pytorch[>=]+[\d\.]+', '# - pytorch=${PYTORCH_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- pytorch[>=]+[\d\.]+', '- pytorch=${PYTORCH_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if not any(n in ln for n in ['pytorch>', 'horovod'])])" && \
cat environment.yml && \
conda env update --file environment.yml && \
conda env create -f environment.yml && \
conda clean -ya && \
rm environment.yml

Expand Down
2 changes: 1 addition & 1 deletion dockers/ci-runner-hpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# gaudi-docker-agent:latest

ARG DIST="latest"
ARG GAUDI_VERSION="1.6.0"
ARG GAUDI_VERSION="1.6.1"
ARG PYTORCH_INSTALLER_VERSION="1.12.0"
FROM vault.habana.ai/gaudi-docker/${GAUDI_VERSION}/ubuntu20.04/habanalabs/pytorch-installer-${PYTORCH_INSTALLER_VERSION}:${DIST}

Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ module = [
"pytorch_lightning.callbacks.progress.rich_progress",
"pytorch_lightning.trainer.trainer",
"pytorch_lightning.trainer.connectors.checkpoint_connector",
"pytorch_lightning.tuner.batch_size_scaling",
"lightning_app.api.http_methods",
"lightning_app.api.request_types",
"lightning_app.cli.app-template.app",
Expand Down
3 changes: 3 additions & 0 deletions src/pytorch_lightning/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Removed the deprecated device attributes `Trainer.{devices,gpus,num_gpus,ipus,tpu_cores}` in favor of the accelerator-agnostic `Trainer.num_devices` ([#14829](https://github.com/Lightning-AI/lightning/pull/14829))


- Removed the deprecated `LightningIPUModule` ([#14830](https://github.com/Lightning-AI/lightning/pull/14830))


- Removed the deprecated `Logger.agg_and_log_metrics` hook in favour of `Logger.log_metrics` and the `agg_key_funcs` and `agg_default_func` arguments. ([#14840](https://github.com/Lightning-AI/lightning/pull/14840))


Expand Down
2 changes: 2 additions & 0 deletions src/pytorch_lightning/callbacks/batch_size_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
class BatchSizeFinder(Callback):
SUPPORTED_MODES = ("power", "binsearch")

optimal_batch_size: Optional[int]

def __init__(
self,
mode: str = "power",
Expand Down
25 changes: 1 addition & 24 deletions src/pytorch_lightning/strategies/ipu.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from lightning_lite.plugins import CheckpointIO, ClusterEnvironment
from lightning_lite.plugins.precision.utils import _fp_to_half
from lightning_lite.utilities.cloud_io import get_filesystem
from pytorch_lightning.overrides.base import _LightningModuleWrapperBase, _LightningPrecisionModuleWrapperBase
from pytorch_lightning.overrides.base import _LightningModuleWrapperBase
from pytorch_lightning.plugins.precision import PrecisionPlugin
from pytorch_lightning.strategies.parallel import ParallelStrategy
from pytorch_lightning.strategies.strategy import TBroadcast
Expand All @@ -33,7 +33,6 @@
from pytorch_lightning.utilities.data import _get_dataloader_init_args_and_kwargs, _reinstantiate_wrapped_cls
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.model_helpers import is_overridden
from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation
from pytorch_lightning.utilities.types import STEP_OUTPUT

if _POPTORCH_AVAILABLE:
Expand All @@ -42,28 +41,6 @@
poptorch = None


class LightningIPUModule(_LightningModuleWrapperBase):
"""
.. deprecated:: v1.7.0
``LightningIPUModule`` has been deprecated in v1.7.0 and will be removed in v1.9.0.
"""

def __init__(
self,
forward_module: Optional[Union["pl.LightningModule", _LightningPrecisionModuleWrapperBase]] = None,
precision: Union[str, int] = 32,
pl_module: Optional[Union["pl.LightningModule", _LightningPrecisionModuleWrapperBase]] = None,
) -> None:
rank_zero_deprecation("`LightningIPUModule` has been deprecated in v1.7.0 and will be removed in v1.8.0")
self._validate_init_arguments(pl_module, forward_module)
super().__init__(forward_module=(pl_module or forward_module))
self.precision = precision

def forward(self, *inputs: Any, **kwargs: Any) -> Any:
inputs = apply_to_collection(inputs, Tensor, function=_fp_to_half, precision=self.precision)
return super().forward(*inputs, **kwargs)


class IPUStrategy(ParallelStrategy):
"""Plugin for training on IPU devices."""

Expand Down
20 changes: 15 additions & 5 deletions src/pytorch_lightning/tuner/batch_size_scaling.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ def scale_batch_size(
init_val: int = 2,
max_trials: int = 25,
batch_arg_name: str = "batch_size",
):
) -> Optional[int]:
if trainer.fast_dev_run:
rank_zero_warn("Skipping batch size scaler since `fast_dev_run` is enabled.")
return
return None

# Save initial model, that is loaded after batch size is found
ckpt_path = os.path.join(trainer.default_root_dir, f".scale_batch_size_{uuid.uuid4()}.ckpt")
Expand Down Expand Up @@ -141,7 +141,12 @@ def __scale_batch_restore_params(trainer: "pl.Trainer", params: Dict[str, Any])


def _run_power_scaling(
trainer: "pl.Trainer", pl_module: "pl.LightningModule", new_size: int, batch_arg_name: str, max_trials: int, params
trainer: "pl.Trainer",
pl_module: "pl.LightningModule",
new_size: int,
batch_arg_name: str,
max_trials: int,
params: Dict[str, Any],
) -> int:
"""Batch scaling mode where the size is doubled at each iteration until an OOM error is encountered."""
# this flag is used to determine whether the previously scaled batch size, right before OOM, was a success or not
Expand Down Expand Up @@ -179,7 +184,12 @@ def _run_power_scaling(


def _run_binary_scaling(
trainer: "pl.Trainer", pl_module: "pl.LightningModule", new_size: int, batch_arg_name: str, max_trials: int, params
trainer: "pl.Trainer",
pl_module: "pl.LightningModule",
new_size: int,
batch_arg_name: str,
max_trials: int,
params: Dict[str, Any],
) -> int:
"""Batch scaling mode where the size is initially is doubled at each iteration until an OOM error is
encountered.
Expand Down Expand Up @@ -309,7 +319,7 @@ def _reset_dataloaders(trainer: "pl.Trainer", pl_module: "pl.LightningModule") -
reset_fn(pl_module)


def _try_loop_run(trainer: "pl.Trainer", params) -> None:
def _try_loop_run(trainer: "pl.Trainer", params: Dict[str, Any]) -> None:
if trainer.state.fn == "fit":
loop = trainer.fit_loop
else:
Expand Down
2 changes: 0 additions & 2 deletions tests/tests_pytorch/deprecated_api/test_remove_1-10.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from pytorch_lightning.plugins.environments import LightningEnvironment
from pytorch_lightning.strategies.bagua import LightningBaguaModule
from pytorch_lightning.strategies.deepspeed import LightningDeepSpeedModule
from pytorch_lightning.strategies.ipu import LightningIPUModule
from pytorch_lightning.strategies.utils import on_colab_kaggle
from pytorch_lightning.utilities.apply_func import (
apply_to_collection,
Expand Down Expand Up @@ -85,7 +84,6 @@ def test_deprecated_amp_level():
LightningBaguaModule,
LightningDeepSpeedModule,
pytest.param(LightningShardedDataParallel, marks=RunIf(fairscale=True)),
LightningIPUModule,
],
)
def test_v1_10_deprecated_pl_module_init_parameter(wrapper_class):
Expand Down
6 changes: 0 additions & 6 deletions tests/tests_pytorch/deprecated_api/test_remove_1-8.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from pytorch_lightning import Callback, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.demos.boring_classes import BoringDataModule, BoringModel
from pytorch_lightning.strategies.ipu import LightningIPUModule
from pytorch_lightning.trainer.configuration_validator import _check_datamodule_checkpoint_hooks


Expand Down Expand Up @@ -257,11 +256,6 @@ def on_load_checkpoint(self, checkpoint):
_check_datamodule_checkpoint_hooks(trainer)


def test_v1_8_0_deprecated_lightning_ipu_module():
with pytest.deprecated_call(match=r"has been deprecated in v1.7.0 and will be removed in v1.8."):
_ = LightningIPUModule(BoringModel(), 32)


def test_deprecated_mc_save_checkpoint():
mc = ModelCheckpoint()
trainer = Trainer()
Expand Down

0 comments on commit eb69b9e

Please sign in to comment.