diff --git a/pl_examples/__init__.py b/pl_examples/__init__.py index e69de29bb2d1d..d7cec9fc1bc3a 100644 --- a/pl_examples/__init__.py +++ b/pl_examples/__init__.py @@ -0,0 +1,10 @@ +import os + +from pytorch_lightning.utilities import _module_available + +EXAMPLES_ROOT = os.path.dirname(__file__) +PACKAGE_ROOT = os.path.dirname(EXAMPLES_ROOT) +DATASETS_PATH = os.path.join(PACKAGE_ROOT, 'Datasets') + +TORCHVISION_AVAILABLE = _module_available("torchvision") +DALI_AVAILABLE = _module_available("nvidia.dali") diff --git a/pl_examples/basic_examples/autoencoder.py b/pl_examples/basic_examples/autoencoder.py index eb6403aaa2364..58a117a648458 100644 --- a/pl_examples/basic_examples/autoencoder.py +++ b/pl_examples/basic_examples/autoencoder.py @@ -13,17 +13,20 @@ # limitations under the License. from argparse import ArgumentParser + import torch -from torch import nn import torch.nn.functional as F +from torch import nn from torch.utils.data import DataLoader -import pytorch_lightning as pl from torch.utils.data import random_split -try: +import pytorch_lightning as pl +from pl_examples import TORCHVISION_AVAILABLE + +if TORCHVISION_AVAILABLE: from torchvision.datasets.mnist import MNIST from torchvision import transforms -except ModuleNotFoundError: +else: from tests.base.datasets import MNIST diff --git a/pl_examples/basic_examples/image_classifier.py b/pl_examples/basic_examples/backbone_image_classifier.py similarity index 92% rename from pl_examples/basic_examples/image_classifier.py rename to pl_examples/basic_examples/backbone_image_classifier.py index 0968525e41197..91a8481de7fd9 100644 --- a/pl_examples/basic_examples/image_classifier.py +++ b/pl_examples/basic_examples/backbone_image_classifier.py @@ -15,14 +15,16 @@ from argparse import ArgumentParser import torch -import pytorch_lightning as pl from torch.nn import functional as F from torch.utils.data import DataLoader, random_split -try: +import pytorch_lightning as pl +from pl_examples import DATASETS_PATH, TORCHVISION_AVAILABLE + +if TORCHVISION_AVAILABLE: from torchvision.datasets.mnist import MNIST from torchvision import transforms -except Exception as e: +else: from tests.base.datasets import MNIST @@ -96,8 +98,8 @@ def cli_main(): # ------------ # data # ------------ - dataset = MNIST('', train=True, download=True, transform=transforms.ToTensor()) - mnist_test = MNIST('', train=False, download=True, transform=transforms.ToTensor()) + dataset = MNIST(DATASETS_PATH, train=True, download=True, transform=transforms.ToTensor()) + mnist_test = MNIST(DATASETS_PATH, train=False, download=True, transform=transforms.ToTensor()) mnist_train, mnist_val = random_split(dataset, [55000, 5000]) train_loader = DataLoader(mnist_train, batch_size=args.batch_size) diff --git a/pl_examples/basic_examples/mnist_classifier_dali.py b/pl_examples/basic_examples/dali_image_classifier.py similarity index 96% rename from pl_examples/basic_examples/mnist_classifier_dali.py rename to pl_examples/basic_examples/dali_image_classifier.py index 649198053a01b..0a39f1cb9a9ae 100644 --- a/pl_examples/basic_examples/mnist_classifier_dali.py +++ b/pl_examples/basic_examples/dali_image_classifier.py @@ -22,21 +22,21 @@ from torch.utils.data import random_split import pytorch_lightning as pl +from pl_examples import TORCHVISION_AVAILABLE, DALI_AVAILABLE -try: +if TORCHVISION_AVAILABLE: from torchvision.datasets.mnist import MNIST from torchvision import transforms -except Exception: +else: from tests.base.datasets import MNIST -try: +if DALI_AVAILABLE: import nvidia.dali.ops as ops - import nvidia.dali.types as types from nvidia.dali.pipeline import Pipeline from nvidia.dali.plugin.pytorch import DALIClassificationIterator -except (ImportError, ModuleNotFoundError): +else: warn('NVIDIA DALI is not available') - ops, types, Pipeline, DALIClassificationIterator = ..., ..., ABC, ABC + ops, Pipeline, DALIClassificationIterator = ..., ABC, ABC class ExternalMNISTInputIterator(object): @@ -152,6 +152,9 @@ def add_model_specific_args(parent_parser): def cli_main(): + if not DALI_AVAILABLE: + return + pl.seed_everything(1234) # ------------ diff --git a/pl_examples/basic_examples/mnist_datamodule.py b/pl_examples/basic_examples/mnist_datamodule.py new file mode 100644 index 0000000000000..eb1415cf8b981 --- /dev/null +++ b/pl_examples/basic_examples/mnist_datamodule.py @@ -0,0 +1,132 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from torch.utils.data import DataLoader, random_split + +from pl_examples import DATASETS_PATH, TORCHVISION_AVAILABLE +from pytorch_lightning import LightningDataModule + +if TORCHVISION_AVAILABLE: + from torchvision import transforms as transform_lib + from torchvision.datasets import MNIST +else: + from tests.base.datasets import MNIST + + +class MNISTDataModule(LightningDataModule): + """ + Standard MNIST, train, val, test splits and transforms + """ + + name = "mnist" + + def __init__( + self, + data_dir: str = DATASETS_PATH, + val_split: int = 5000, + num_workers: int = 16, + normalize: bool = False, + seed: int = 42, + batch_size: int = 32, + *args, + **kwargs, + ): + """ + Args: + data_dir: where to save/load the data + val_split: how many of the training images to use for the validation split + num_workers: how many workers to use for loading data + normalize: If true applies image normalize + """ + super().__init__(*args, **kwargs) + + self.dims = (1, 28, 28) + self.data_dir = data_dir + self.val_split = val_split + self.num_workers = num_workers + self.normalize = normalize + self.seed = seed + self.batch_size = batch_size + self.dataset_train = ... + self.dataset_val = ... + self.test_transforms = self.default_transforms + + @property + def num_classes(self): + return 10 + + def prepare_data(self): + """Saves MNIST files to `data_dir`""" + MNIST(self.data_dir, train=True, download=True) + MNIST(self.data_dir, train=False, download=True) + + def setup(self, stage: Optional[str] = None): + """Split the train and valid dataset""" + extra = dict(transform=self.default_transforms) if self.default_transforms else {} + dataset = MNIST(self.data_dir, train=True, download=False, **extra) + train_length = len(dataset) + self.dataset_train, self.dataset_val = random_split(dataset, [train_length - self.val_split, self.val_split]) + + def train_dataloader(self): + """MNIST train set removes a subset to use for validation""" + loader = DataLoader( + self.dataset_train, + batch_size=self.batch_size, + shuffle=True, + num_workers=self.num_workers, + drop_last=True, + pin_memory=True, + ) + return loader + + def val_dataloader(self): + """MNIST val set uses a subset of the training set for validation""" + loader = DataLoader( + self.dataset_val, + batch_size=self.batch_size, + shuffle=False, + num_workers=self.num_workers, + drop_last=True, + pin_memory=True, + ) + return loader + + def test_dataloader(self): + """MNIST test set uses the test split""" + extra = dict(transform=self.test_transforms) if self.test_transforms else {} + dataset = MNIST(self.data_dir, train=False, download=False, **extra) + loader = DataLoader( + dataset, + batch_size=self.batch_size, + shuffle=False, + num_workers=self.num_workers, + drop_last=True, + pin_memory=True, + ) + return loader + + @property + def default_transforms(self): + if not TORCHVISION_AVAILABLE: + return None + if self.normalize: + mnist_transforms = transform_lib.Compose( + [transform_lib.ToTensor(), transform_lib.Normalize(mean=(0.5,), std=(0.5,))] + ) + else: + mnist_transforms = transform_lib.ToTensor() + + return mnist_transforms diff --git a/pl_examples/basic_examples/mnist_classifier.py b/pl_examples/basic_examples/simple_image_classifier.py similarity index 76% rename from pl_examples/basic_examples/mnist_classifier.py rename to pl_examples/basic_examples/simple_image_classifier.py index 2d0dcd2bd2573..371b62fbb9dca 100644 --- a/pl_examples/basic_examples/mnist_classifier.py +++ b/pl_examples/basic_examples/simple_image_classifier.py @@ -11,19 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from argparse import ArgumentParser +from pprint import pprint import torch -from torch.utils.data import random_split, DataLoader - -import pytorch_lightning as pl from torch.nn import functional as F -try: - from torchvision.datasets.mnist import MNIST - from torchvision import transforms -except Exception as e: - from tests.base.datasets import MNIST +import pytorch_lightning as pl +from pl_examples.basic_examples.mnist_datamodule import MNISTDataModule class LitClassifier(pl.LightningModule): @@ -76,21 +72,15 @@ def cli_main(): # args # ------------ parser = ArgumentParser() - parser.add_argument('--batch_size', default=32, type=int) parser = pl.Trainer.add_argparse_args(parser) parser = LitClassifier.add_model_specific_args(parser) + parser = MNISTDataModule.add_argparse_args(parser) args = parser.parse_args() # ------------ # data # ------------ - dataset = MNIST('', train=True, download=True, transform=transforms.ToTensor()) - mnist_test = MNIST('', train=False, download=True, transform=transforms.ToTensor()) - mnist_train, mnist_val = random_split(dataset, [55000, 5000]) - - train_loader = DataLoader(mnist_train, batch_size=args.batch_size) - val_loader = DataLoader(mnist_val, batch_size=args.batch_size) - test_loader = DataLoader(mnist_test, batch_size=args.batch_size) + dm = MNISTDataModule.from_argparse_args(args) # ------------ # model @@ -101,12 +91,13 @@ def cli_main(): # training # ------------ trainer = pl.Trainer.from_argparse_args(args) - trainer.fit(model, train_loader, val_loader) + trainer.fit(model, datamodule=dm) # ------------ # testing # ------------ - result = trainer.test(test_dataloaders=test_loader) + result = trainer.test(datamodule=dm) + pprint(result) if __name__ == '__main__': diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index 013d49b8c9baa..3b9613a132030 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -5,12 +5,7 @@ import pytest import torch -try: - from nvidia.dali import ops, types, pipeline, plugin -except (ImportError, ModuleNotFoundError): - DALI_AVAILABLE = False -else: - DALI_AVAILABLE = True +from pl_examples import DALI_AVAILABLE ARGS_DEFAULT = """ --max_epochs 1 \ @@ -38,8 +33,8 @@ # ToDo: fix this failing example # @pytest.mark.parametrize('import_cli', [ -# 'pl_examples.basic_examples.mnist_classifier', -# 'pl_examples.basic_examples.image_classifier', +# 'pl_examples.basic_examples.simple_image_classifier', +# 'pl_examples.basic_examples.backbone_image_classifier', # 'pl_examples.basic_examples.autoencoder', # ]) # @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -54,8 +49,8 @@ # ToDo: fix this failing example # @pytest.mark.parametrize('import_cli', [ -# 'pl_examples.basic_examples.mnist_classifier', -# 'pl_examples.basic_examples.image_classifier', +# 'pl_examples.basic_examples.simple_image_classifier', +# 'pl_examples.basic_examples.backbone_image_classifier', # 'pl_examples.basic_examples.autoencoder', # ]) # @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -69,8 +64,8 @@ @pytest.mark.parametrize('import_cli', [ - 'pl_examples.basic_examples.mnist_classifier', - 'pl_examples.basic_examples.image_classifier', + 'pl_examples.basic_examples.simple_image_classifier', + 'pl_examples.basic_examples.backbone_image_classifier', 'pl_examples.basic_examples.autoencoder', ]) @pytest.mark.parametrize('cli_args', [ARGS_DEFAULT]) @@ -87,7 +82,7 @@ def test_examples_cpu(import_cli, cli_args): @pytest.mark.skipif(platform.system() != 'Linux', reason='Only applies to Linux platform.') @pytest.mark.parametrize('cli_args', [ARGS_GPU]) def test_examples_mnist_dali(cli_args): - from pl_examples.basic_examples.mnist_classifier_dali import cli_main + from pl_examples.basic_examples.dali_image_classifier import cli_main with mock.patch("argparse._sys.argv", ["any.py"] + cli_args.strip().split()): cli_main() diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py index 7075c790c60d9..46dde20aa906e 100644 --- a/pytorch_lightning/utilities/__init__.py +++ b/pytorch_lightning/utilities/__init__.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """General utilities""" +import importlib from enum import Enum import numpy @@ -21,13 +22,26 @@ from pytorch_lightning.utilities.distributed import rank_zero_only, rank_zero_warn, rank_zero_info from pytorch_lightning.utilities.parsing import AttributeDict, flatten_dict, is_picklable -try: - from apex import amp -except ImportError: - APEX_AVAILABLE = False -else: - APEX_AVAILABLE = True +def _module_available(module_path: str) -> bool: + """Testing if given module is avalaible in your env + + >>> _module_available('system') + True + >>> _module_available('bla.bla') + False + """ + mods = module_path.split('.') + assert mods, 'nothing given to test' + # it has to be tested as per partets + for i in range(1, len(mods)): + module_path = '.'.join(mods[:i]) + if importlib.util.find_spec(module_path) is None: + return False + return True + + +APEX_AVAILABLE = _module_available("apex.amp") NATIVE_AMP_AVALAIBLE = hasattr(torch.cuda, "amp") and hasattr(torch.cuda.amp, "autocast") FLOAT16_EPSILON = numpy.finfo(numpy.float16).eps