Skip to content

Commit

Permalink
Merge branch 'master' into ci/ext-ver
Browse files Browse the repository at this point in the history
  • Loading branch information
Borda committed Oct 31, 2022
2 parents 67edc9b + 773cb3e commit 2045226
Show file tree
Hide file tree
Showing 11 changed files with 47 additions and 21 deletions.
2 changes: 1 addition & 1 deletion requirements/pytorch/extra.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

# extended list of package dependencies to reach full functionality
matplotlib>3.1, <3.5.3
matplotlib>3.1, <3.6.2
omegaconf>=2.0.5, <2.3.0
hydra-core>=1.0.5, <1.3.0
jsonargparse[signatures]>=4.15.2, <4.16.0
Expand Down
7 changes: 6 additions & 1 deletion src/pytorch_lightning/callbacks/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,12 @@ def on_train_batch_start(
def on_train_batch_end(
self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", outputs: STEP_OUTPUT, batch: Any, batch_idx: int
) -> None:
"""Called when the train batch ends."""
"""Called when the train batch ends.
Note:
The value ``outputs["loss"]`` here will be the normalized value w.r.t ``accumulate_grad_batches`` of the
loss returned from ``training_step``.
"""

def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
"""Called when the train epoch begins."""
Expand Down
4 changes: 4 additions & 0 deletions src/pytorch_lightning/core/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,10 @@ def training_step(self, batch, batch_idx, hiddens):
Note:
The loss value shown in the progress bar is smoothed (averaged) over the last values,
so it differs from the actual loss returned in train/validation step.
Note:
When ``accumulate_grad_batches`` > 1, the loss returned here will be automatically
normalized by ``accumulate_grad_batches`` internally.
"""
rank_zero_warn("`training_step` must be implemented to be used with the Lightning Trainer")

Expand Down
6 changes: 3 additions & 3 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ You can rely on our CI to make sure all these tests pass.
There are certain standalone tests, which you can run using:

```bash
PL_RUN_STANDALONE_TESTS=1 python -m pytest -v tests/tests_pytorch/trainer/
# or
./tests/run_standalone_tests.sh tests/tests_pytorch/trainer/
./tests/tests_pytorch/run_standalone_tests.sh tests/tests_pytorch/trainer/
# or run a specific test
./tests/tests_pytorch/run_standalone_tests.sh -k test_multi_gpu_model_ddp
```

## Running Coverage
Expand Down
2 changes: 2 additions & 0 deletions tests/tests_pytorch/accelerators/test_hpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from unittest import mock

import pytest
import torch
Expand Down Expand Up @@ -76,6 +77,7 @@ def test_all_stages(tmpdir, hpus):


@RunIf(hpu=True)
@mock.patch.dict(os.environ, os.environ.copy())
def test_optimization(tmpdir):
seed_everything(42)

Expand Down
2 changes: 1 addition & 1 deletion tests/tests_pytorch/callbacks/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def test_quantize_torchscript(tmpdir):
trainer = Trainer(callbacks=[qcb], default_root_dir=tmpdir, max_epochs=1)
trainer.fit(qmodel, datamodule=dm)

batch = iter(dm.test_dataloader()).next()
batch = next(iter(dm.test_dataloader()))
qmodel(qmodel.quant(batch[0]))

tsmodel = qmodel.to_torchscript()
Expand Down
16 changes: 11 additions & 5 deletions tests/tests_pytorch/core/test_datamodules.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import pytest
import torch

from pytorch_lightning import LightningDataModule, Trainer
from pytorch_lightning import LightningDataModule, seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.demos.boring_classes import BoringDataModule, BoringModel
from pytorch_lightning.profilers.simple import SimpleProfiler
Expand Down Expand Up @@ -149,6 +149,8 @@ def test_dm_pickle_after_init():


def test_train_loop_only(tmpdir):
seed_everything(7)

dm = ClassifDataModule()
model = ClassificationModel()

Expand All @@ -164,10 +166,12 @@ def test_train_loop_only(tmpdir):
# fit model
trainer.fit(model, datamodule=dm)
assert trainer.state.finished, f"Training failed with {trainer.state}"
assert trainer.callback_metrics["train_loss"] < 1.0
assert trainer.callback_metrics["train_loss"] < 1.1


def test_train_val_loop_only(tmpdir):
seed_everything(7)

dm = ClassifDataModule()
model = ClassificationModel()

Expand All @@ -180,7 +184,7 @@ def test_train_val_loop_only(tmpdir):
# fit model
trainer.fit(model, datamodule=dm)
assert trainer.state.finished, f"Training failed with {trainer.state}"
assert trainer.callback_metrics["train_loss"] < 1.0
assert trainer.callback_metrics["train_loss"] < 1.1


def test_dm_checkpoint_save_and_load(tmpdir):
Expand Down Expand Up @@ -223,6 +227,8 @@ def load_state_dict(self, state_dict: Dict[str, Any]) -> None:


def test_full_loop(tmpdir):
seed_everything(7)

dm = ClassifDataModule()
model = ClassificationModel()

Expand All @@ -236,12 +242,12 @@ def test_full_loop(tmpdir):
# validate
result = trainer.validate(model, dm)
assert dm.trainer is not None
assert result[0]["val_acc"] > 0.7
assert result[0]["val_acc"] > 0.6

# test
result = trainer.test(model, dm)
assert dm.trainer is not None
assert result[0]["test_acc"] > 0.6
assert result[0]["test_acc"] > 0.57


def test_dm_reload_dataloaders_every_n_epochs(tmpdir):
Expand Down
8 changes: 4 additions & 4 deletions tests/tests_pytorch/helpers/datamodules.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from pytorch_lightning.core.datamodule import LightningDataModule
from tests_pytorch.helpers.datasets import MNIST, SklearnDataset, TrialMNIST

_SKLEARN_AVAILABLE = RequirementCache("sklearn")
_SKLEARN_AVAILABLE = RequirementCache("scikit-learn")


class MNISTDataModule(LightningDataModule):
Expand Down Expand Up @@ -54,7 +54,7 @@ def test_dataloader(self):
class SklearnDataModule(LightningDataModule):
def __init__(self, sklearn_dataset, x_type, y_type, batch_size: int = 10):
if not _SKLEARN_AVAILABLE:
pytest.skip("`sklearn` is not available.")
pytest.skip(str(_SKLEARN_AVAILABLE))
super().__init__()
self.batch_size = batch_size
self._x, self._y = sklearn_dataset
Expand Down Expand Up @@ -100,7 +100,7 @@ def sample(self):
class ClassifDataModule(SklearnDataModule):
def __init__(self, num_features=32, length=800, num_classes=3, batch_size=10):
if not _SKLEARN_AVAILABLE:
pytest.skip("`sklearn` is not available.")
pytest.skip(str(_SKLEARN_AVAILABLE))
from sklearn.datasets import make_classification

data = make_classification(
Expand All @@ -112,7 +112,7 @@ def __init__(self, num_features=32, length=800, num_classes=3, batch_size=10):
class RegressDataModule(SklearnDataModule):
def __init__(self, num_features=16, length=800, batch_size=10):
if not _SKLEARN_AVAILABLE:
pytest.skip("`sklearn` is not available.")
pytest.skip(str(_SKLEARN_AVAILABLE))
from sklearn.datasets import make_regression

x, y = make_regression(n_samples=length, n_features=num_features, random_state=42)
Expand Down
3 changes: 3 additions & 0 deletions tests/tests_pytorch/models/test_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import tests_pytorch.helpers.pipelines as tpipes
import tests_pytorch.helpers.utils as tutils
from lightning_lite import seed_everything
from pytorch_lightning import Callback, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.demos.boring_classes import BoringModel, ManualOptimBoringModel
Expand Down Expand Up @@ -483,6 +484,8 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):

def test_running_test_pretrained_model_cpu(tmpdir):
"""Verify test() on pretrained model."""
seed_everything(1)

dm = ClassifDataModule()
model = ClassificationModel()

Expand Down
17 changes: 11 additions & 6 deletions tests/tests_pytorch/strategies/test_colossalai.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from torch.optim import Optimizer
from torchmetrics import Accuracy

from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning import LightningModule, seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.demos.boring_classes import BoringModel
from pytorch_lightning.plugins.precision import ColossalAIPrecisionPlugin
Expand Down Expand Up @@ -269,6 +269,8 @@ def test_multi_gpu_checkpointing(tmpdir):

@RunIf(min_cuda_gpus=2, standalone=True, colossalai=True)
def test_multi_gpu_model_colossalai_fit_test(tmpdir):
seed_everything(7)

dm = ClassifDataModule()
model = ModelParallelClassificationModel()
trainer = Trainer(
Expand All @@ -280,10 +282,13 @@ def test_multi_gpu_model_colossalai_fit_test(tmpdir):
max_epochs=1,
)
trainer.fit(model, datamodule=dm)
out_metrics = trainer.callback_metrics
assert out_metrics["train_acc"] > 0.7
assert out_metrics["val_acc"] > 0.7

if trainer.is_global_zero:
out_metrics = trainer.callback_metrics
assert out_metrics["train_acc"].item() > 0.7
assert out_metrics["val_acc"].item() > 0.7

result = trainer.test(model, datamodule=dm)
for out in result:
assert out["test_acc"] > 0.7
if trainer.is_global_zero:
for out in result:
assert out["test_acc"] > 0.7
1 change: 1 addition & 0 deletions tests/tests_pytorch/strategies/test_deepspeed_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,7 @@ def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config
_assert_save_model_is_equal(model, tmpdir, trainer)


@pytest.mark.skip(reason="skipped due to deepspeed/#2449, keep track @rohitgr7")
@pytest.mark.parametrize(("accumulate_grad_batches", "automatic_optimization"), [(1, False), (2, True)])
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization, accumulate_grad_batches):
Expand Down

0 comments on commit 2045226

Please sign in to comment.