From 0abd53a41025fa2d97af2038cdc6b2c591cfda5e Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Fri, 20 Nov 2020 14:02:55 +0900 Subject: [PATCH 01/20] Adding flags to datamodules --- .../datamodules/binary_mnist_datamodule.py | 29 +++++++++++++------ pl_bolts/datamodules/cifar10_datamodule.py | 29 +++++++++++++------ pl_bolts/datamodules/cityscapes_datamodule.py | 29 +++++++++++++------ .../datamodules/fashion_mnist_datamodule.py | 29 +++++++++++++------ pl_bolts/datamodules/imagenet_datamodule.py | 27 ++++++++++++----- pl_bolts/datamodules/kitti_datamodule.py | 9 ++++-- pl_bolts/datamodules/mnist_datamodule.py | 20 ++++++++++--- pl_bolts/datamodules/stl10_datamodule.py | 23 +++++++++++---- .../datamodules/vocdetection_datamodule.py | 10 +++++-- 9 files changed, 146 insertions(+), 59 deletions(-) diff --git a/pl_bolts/datamodules/binary_mnist_datamodule.py b/pl_bolts/datamodules/binary_mnist_datamodule.py index e4dba120bf..bdb71090b3 100644 --- a/pl_bolts/datamodules/binary_mnist_datamodule.py +++ b/pl_bolts/datamodules/binary_mnist_datamodule.py @@ -53,6 +53,9 @@ def __init__( num_workers: int = 16, normalize: bool = False, seed: int = 42, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -62,6 +65,11 @@ def __init__( val_split: how many of the training images to use for the validation split num_workers: how many workers to use for loading data normalize: If true applies image normalize + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -76,6 +84,9 @@ def __init__( self.num_workers = num_workers self.normalize = normalize self.seed = seed + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last @property def num_classes(self): @@ -112,10 +123,10 @@ def train_dataloader(self, batch_size=32, transforms=None): loader = DataLoader( dataset_train, batch_size=batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -138,10 +149,10 @@ def val_dataloader(self, batch_size=32, transforms=None): loader = DataLoader( dataset_val, batch_size=batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -159,10 +170,10 @@ def test_dataloader(self, batch_size=32, transforms=None): loader = DataLoader( dataset, batch_size=batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/cifar10_datamodule.py b/pl_bolts/datamodules/cifar10_datamodule.py index 19535b2932..52a0572da8 100644 --- a/pl_bolts/datamodules/cifar10_datamodule.py +++ b/pl_bolts/datamodules/cifar10_datamodule.py @@ -71,6 +71,9 @@ def __init__( num_workers: int = 16, batch_size: int = 32, seed: int = 42, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -80,6 +83,11 @@ def __init__( val_split: how many of the training images to use for the validation split num_workers: how many workers to use for loading data batch_size: number of examples per training/eval step + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -94,6 +102,9 @@ def __init__( self.num_workers = num_workers self.batch_size = batch_size self.seed = seed + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last self.data_dir = data_dir if data_dir is not None else os.getcwd() self.num_samples = 60000 - val_split @@ -128,10 +139,10 @@ def train_dataloader(self): loader = DataLoader( dataset_train, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -151,10 +162,10 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True, - drop_last=True + pin_memory=self.pin_memory, + drop_last=self.drop_last ) return loader @@ -168,10 +179,10 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/cityscapes_datamodule.py b/pl_bolts/datamodules/cityscapes_datamodule.py index a0d623253e..116c64f1e4 100644 --- a/pl_bolts/datamodules/cityscapes_datamodule.py +++ b/pl_bolts/datamodules/cityscapes_datamodule.py @@ -68,6 +68,9 @@ def __init__( num_workers: int = 16, batch_size: int = 32, seed: int = 42, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -79,6 +82,11 @@ def __init__( target_type: targets to use, either 'instance' or 'semantic' num_workers: how many workers to use for loading data batch_size: number of examples per training/eval step + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -97,6 +105,9 @@ def __init__( self.num_workers = num_workers self.batch_size = batch_size self.seed = seed + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last self.target_transforms = None @property @@ -125,10 +136,10 @@ def train_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -150,10 +161,10 @@ def val_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True, - drop_last=True + pin_memory=self.pin_memory, + drop_last=self.drop_last ) return loader @@ -174,10 +185,10 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/fashion_mnist_datamodule.py b/pl_bolts/datamodules/fashion_mnist_datamodule.py index 32ccb2ce81..0556ac22cf 100644 --- a/pl_bolts/datamodules/fashion_mnist_datamodule.py +++ b/pl_bolts/datamodules/fashion_mnist_datamodule.py @@ -51,6 +51,9 @@ def __init__( val_split: int = 5000, num_workers: int = 16, seed: int = 42, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -59,6 +62,11 @@ def __init__( data_dir: where to save/load the data val_split: how many of the training images to use for the validation split num_workers: how many workers to use for loading data + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -72,6 +80,9 @@ def __init__( self.val_split = val_split self.num_workers = num_workers self.seed = seed + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last @property def num_classes(self): @@ -108,10 +119,10 @@ def train_dataloader(self, batch_size=32, transforms=None): loader = DataLoader( dataset_train, batch_size=batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -135,10 +146,10 @@ def val_dataloader(self, batch_size=32, transforms=None): loader = DataLoader( dataset_val, batch_size=batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -156,10 +167,10 @@ def test_dataloader(self, batch_size=32, transforms=None): loader = DataLoader( dataset, batch_size=batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/imagenet_datamodule.py b/pl_bolts/datamodules/imagenet_datamodule.py index 3dcd43e279..3c0a61c139 100644 --- a/pl_bolts/datamodules/imagenet_datamodule.py +++ b/pl_bolts/datamodules/imagenet_datamodule.py @@ -64,6 +64,9 @@ def __init__( image_size: int = 224, num_workers: int = 16, batch_size: int = 32, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -74,6 +77,10 @@ def __init__( num_imgs_per_val_class: how many images per class for the validation set image_size: final image size num_workers: how many data workers + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch batch_size: batch_size """ super().__init__(*args, **kwargs) @@ -90,6 +97,9 @@ def __init__( self.meta_dir = meta_dir self.num_imgs_per_val_class = num_imgs_per_val_class self.batch_size = batch_size + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last self.num_samples = 1281167 - self.num_imgs_per_val_class * self.num_classes @property @@ -152,10 +162,10 @@ def train_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -177,9 +187,10 @@ def val_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -197,10 +208,10 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/kitti_datamodule.py b/pl_bolts/datamodules/kitti_datamodule.py index 5b39228742..db4b8d34b4 100644 --- a/pl_bolts/datamodules/kitti_datamodule.py +++ b/pl_bolts/datamodules/kitti_datamodule.py @@ -21,6 +21,7 @@ def __init__( num_workers: int = 16, batch_size: int = 32, seed: int = 42, + shuffle: bool = False, *args, **kwargs, ): @@ -55,12 +56,14 @@ def __init__( num_workers: how many workers to use for loading data batch_size: the batch size seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch """ super().__init__(*args, **kwargs) self.data_dir = data_dir if data_dir is not None else os.getcwd() self.batch_size = batch_size self.num_workers = num_workers self.seed = seed + self.shuffle = shuffle self.default_transforms = transforms.Compose([ transforms.ToTensor(), @@ -82,20 +85,20 @@ def __init__( def train_dataloader(self): loader = DataLoader(self.trainset, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers) return loader def val_dataloader(self): loader = DataLoader(self.valset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers) return loader def test_dataloader(self): loader = DataLoader(self.testset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers) return loader diff --git a/pl_bolts/datamodules/mnist_datamodule.py b/pl_bolts/datamodules/mnist_datamodule.py index 38d3d001bb..84f5ebc7a3 100644 --- a/pl_bolts/datamodules/mnist_datamodule.py +++ b/pl_bolts/datamodules/mnist_datamodule.py @@ -52,6 +52,9 @@ def __init__( normalize: bool = False, seed: int = 42, batch_size: int = 32, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -61,6 +64,12 @@ def __init__( val_split: how many of the training images to use for the validation split num_workers: how many workers to use for loading data normalize: If true applies image normalize + batch_size: size of batch + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -74,8 +83,11 @@ def __init__( self.val_split = val_split self.num_workers = num_workers self.normalize = normalize - self.seed = seed self.batch_size = batch_size + self.seed = seed + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last @property def num_classes(self): @@ -109,10 +121,10 @@ def train_dataloader(self): loader = DataLoader( dataset_train, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True, + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/stl10_datamodule.py b/pl_bolts/datamodules/stl10_datamodule.py index b1ee3058a8..2cb774733b 100644 --- a/pl_bolts/datamodules/stl10_datamodule.py +++ b/pl_bolts/datamodules/stl10_datamodule.py @@ -62,6 +62,9 @@ def __init__( num_workers: int = 16, batch_size: int = 32, seed: int = 42, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -72,6 +75,11 @@ def __init__( train_val_split: how many images from the labeled training split to use for validation num_workers: how many workers to use for loading data batch_size: the batch size + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -87,6 +95,9 @@ def __init__( self.num_workers = num_workers self.batch_size = batch_size self.seed = seed + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last self.num_unlabeled_samples = 100000 - unlabeled_val_split @property @@ -115,10 +126,10 @@ def train_dataloader(self): loader = DataLoader( dataset_train, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -153,10 +164,10 @@ def train_dataloader_mixed(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/vocdetection_datamodule.py b/pl_bolts/datamodules/vocdetection_datamodule.py index 008d859c35..78e02594b9 100644 --- a/pl_bolts/datamodules/vocdetection_datamodule.py +++ b/pl_bolts/datamodules/vocdetection_datamodule.py @@ -114,6 +114,9 @@ def __init__( year: str = "2012", num_workers: int = 16, normalize: bool = False, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -128,6 +131,9 @@ def __init__( self.data_dir = data_dir self.num_workers = num_workers self.normalize = normalize + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last @property def num_classes(self): @@ -164,9 +170,9 @@ def train_dataloader(self, batch_size=1, transforms=None): loader = DataLoader( dataset, batch_size=batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True, + pin_memory=self.pin_memory, collate_fn=_collate_fn, ) return loader From 0cc2677fe20eb07566bc2b0f0996e9b014ed9769 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 14:46:32 +0900 Subject: [PATCH 02/20] Finishing up changes --- pl_bolts/datamodules/mnist_datamodule.py | 14 ++++++---- pl_bolts/datamodules/sklearn_datamodule.py | 23 +++++++++------ .../datamodules/ssl_imagenet_datamodule.py | 23 +++++++++------ pl_bolts/datamodules/stl10_datamodule.py | 28 ++++++++++--------- .../datamodules/vocdetection_datamodule.py | 6 ++-- 5 files changed, 57 insertions(+), 37 deletions(-) diff --git a/pl_bolts/datamodules/mnist_datamodule.py b/pl_bolts/datamodules/mnist_datamodule.py index 84f5ebc7a3..e91d2e79bb 100644 --- a/pl_bolts/datamodules/mnist_datamodule.py +++ b/pl_bolts/datamodules/mnist_datamodule.py @@ -144,10 +144,10 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True, + drop_last=self.drop_last, + pin_memory=self.pin_memory, ) return loader @@ -162,8 +162,12 @@ def test_dataloader(self): dataset = MNIST(self.data_dir, train=False, download=False, transform=transforms) loader = DataLoader( - dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers, drop_last=True, - pin_memory=True + dataset, + batch_size=self.batch_size, + shuffle=self.shuffle, + num_workers=self.num_workers, + drop_last=self.drop_last, + pin_memory=self.pin_memory, ) return loader diff --git a/pl_bolts/datamodules/sklearn_datamodule.py b/pl_bolts/datamodules/sklearn_datamodule.py index dd66a40678..07d9f91e9c 100644 --- a/pl_bolts/datamodules/sklearn_datamodule.py +++ b/pl_bolts/datamodules/sklearn_datamodule.py @@ -149,12 +149,17 @@ def __init__( num_workers=2, random_state=1234, shuffle=True, + pin_memory=False, + drop_last=False, *args, **kwargs, ): super().__init__(*args, **kwargs) self.num_workers = num_workers + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last # shuffle x and y if shuffle and _SKLEARN_AVAILABLE: @@ -196,10 +201,10 @@ def train_dataloader(self, batch_size: int = 16): loader = DataLoader( self.train_dataset, batch_size=batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -207,10 +212,10 @@ def val_dataloader(self, batch_size: int = 16): loader = DataLoader( self.val_dataset, batch_size=batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -218,10 +223,10 @@ def test_dataloader(self, batch_size: int = 16): loader = DataLoader( self.test_dataset, batch_size=batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/ssl_imagenet_datamodule.py b/pl_bolts/datamodules/ssl_imagenet_datamodule.py index 06bcf77ce1..f4333dc551 100644 --- a/pl_bolts/datamodules/ssl_imagenet_datamodule.py +++ b/pl_bolts/datamodules/ssl_imagenet_datamodule.py @@ -26,6 +26,9 @@ def __init__( data_dir, meta_dir=None, num_workers=16, + shuffle=False, + pin_memory=False, + drop_last=False, *args, **kwargs, ): @@ -39,6 +42,9 @@ def __init__( self.data_dir = data_dir self.num_workers = num_workers self.meta_dir = meta_dir + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last @property def num_classes(self): @@ -85,10 +91,10 @@ def train_dataloader(self, batch_size, num_images_per_class=-1, add_normalize=Fa loader = DataLoader( dataset, batch_size=batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -103,9 +109,10 @@ def val_dataloader(self, batch_size, num_images_per_class=50, add_normalize=Fals loader = DataLoader( dataset, batch_size=batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True + drop_last=self.drop_last + pin_memory=self.pin_memory ) return loader @@ -120,10 +127,10 @@ def test_dataloader(self, batch_size, num_images_per_class, add_normalize=False) loader = DataLoader( dataset, batch_size=batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/stl10_datamodule.py b/pl_bolts/datamodules/stl10_datamodule.py index 2cb774733b..73b67ae584 100644 --- a/pl_bolts/datamodules/stl10_datamodule.py +++ b/pl_bolts/datamodules/stl10_datamodule.py @@ -191,9 +191,10 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True + drpo_last=self.drop_last + pin_memory=self.pin_memory ) return loader @@ -233,10 +234,10 @@ def val_dataloader_mixed(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -254,10 +255,10 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -272,9 +273,10 @@ def train_dataloader_labeled(self): loader = DataLoader( dataset_train, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True + drop_last=self.drop_last + pin_memory=self.pin_memory ) return loader @@ -292,10 +294,10 @@ def val_dataloader_labeled(self): loader = DataLoader( labeled_val, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/vocdetection_datamodule.py b/pl_bolts/datamodules/vocdetection_datamodule.py index 78e02594b9..8db9d8d9a9 100644 --- a/pl_bolts/datamodules/vocdetection_datamodule.py +++ b/pl_bolts/datamodules/vocdetection_datamodule.py @@ -172,6 +172,7 @@ def train_dataloader(self, batch_size=1, transforms=None): batch_size=batch_size, shuffle=self.shuffle, num_workers=self.num_workers, + drop_last=self.drop_last pin_memory=self.pin_memory, collate_fn=_collate_fn, ) @@ -196,9 +197,10 @@ def val_dataloader(self, batch_size=1, transforms=None): loader = DataLoader( dataset, batch_size=batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True, + drop_last=self.drop_last + pin_memory=self.pin_memory, collate_fn=_collate_fn, ) return loader From e25dc6447750109567cea8431b48f7378c3a5d1b Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 14:51:08 +0900 Subject: [PATCH 03/20] Fixing syntax error --- pl_bolts/datamodules/ssl_imagenet_datamodule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pl_bolts/datamodules/ssl_imagenet_datamodule.py b/pl_bolts/datamodules/ssl_imagenet_datamodule.py index f4333dc551..791ef76e28 100644 --- a/pl_bolts/datamodules/ssl_imagenet_datamodule.py +++ b/pl_bolts/datamodules/ssl_imagenet_datamodule.py @@ -111,7 +111,7 @@ def val_dataloader(self, batch_size, num_images_per_class=50, add_normalize=Fals batch_size=batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=self.drop_last + drop_last=self.drop_last, pin_memory=self.pin_memory ) return loader From 90ba5985f7ce106ddf465bc3c1097f5f5398492f Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 14:53:30 +0900 Subject: [PATCH 04/20] More syntax errors --- pl_bolts/datamodules/stl10_datamodule.py | 2 +- pl_bolts/datamodules/vocdetection_datamodule.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pl_bolts/datamodules/stl10_datamodule.py b/pl_bolts/datamodules/stl10_datamodule.py index 73b67ae584..5fb95a6c5c 100644 --- a/pl_bolts/datamodules/stl10_datamodule.py +++ b/pl_bolts/datamodules/stl10_datamodule.py @@ -275,7 +275,7 @@ def train_dataloader_labeled(self): batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=self.drop_last + drop_last=self.drop_last, pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/vocdetection_datamodule.py b/pl_bolts/datamodules/vocdetection_datamodule.py index 8db9d8d9a9..eb3a6c1a30 100644 --- a/pl_bolts/datamodules/vocdetection_datamodule.py +++ b/pl_bolts/datamodules/vocdetection_datamodule.py @@ -172,7 +172,7 @@ def train_dataloader(self, batch_size=1, transforms=None): batch_size=batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=self.drop_last + drop_last=self.drop_last, pin_memory=self.pin_memory, collate_fn=_collate_fn, ) @@ -199,7 +199,7 @@ def val_dataloader(self, batch_size=1, transforms=None): batch_size=batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=self.drop_last + drop_last=self.drop_last, pin_memory=self.pin_memory, collate_fn=_collate_fn, ) From 89c1d604cb558e53e5d6279d62c2fe7e44bd7042 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 14:55:16 +0900 Subject: [PATCH 05/20] More --- pl_bolts/datamodules/stl10_datamodule.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pl_bolts/datamodules/stl10_datamodule.py b/pl_bolts/datamodules/stl10_datamodule.py index 5fb95a6c5c..b535993646 100644 --- a/pl_bolts/datamodules/stl10_datamodule.py +++ b/pl_bolts/datamodules/stl10_datamodule.py @@ -193,7 +193,7 @@ def val_dataloader(self): batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drpo_last=self.drop_last + drpo_last=self.drop_last, pin_memory=self.pin_memory ) return loader @@ -275,7 +275,7 @@ def train_dataloader_labeled(self): batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=self.drop_last, + drop_last=self.drop_last, pin_memory=self.pin_memory ) return loader From 2d4f306bb42a230dbce6efc907d559c7da339d69 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 15:24:57 +0900 Subject: [PATCH 06/20] Adding drop_last flag to sklearn test --- tests/datamodules/test_sklearn_dataloaders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/datamodules/test_sklearn_dataloaders.py b/tests/datamodules/test_sklearn_dataloaders.py index 7e35e20c66..d763b45ccb 100644 --- a/tests/datamodules/test_sklearn_dataloaders.py +++ b/tests/datamodules/test_sklearn_dataloaders.py @@ -27,7 +27,7 @@ def test_dataloader(): # ----------------------------- # train # ----------------------------- - loaders = SklearnDataModule(X=X, y=y, val_split=0.2, test_split=0.2, random_state=1234) + loaders = SklearnDataModule(X=X, y=y, val_split=0.2, test_split=0.2, random_state=1234, drop_last=True) train_loader = loaders.train_dataloader() val_loader = loaders.val_dataloader() test_loader = loaders.test_dataloader() From 6124ec774b0815e405707fc25fcfec1962630835 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 15:42:24 +0900 Subject: [PATCH 07/20] Adding drop_last flag to sklearn test --- tests/datamodules/test_sklearn_dataloaders.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/datamodules/test_sklearn_dataloaders.py b/tests/datamodules/test_sklearn_dataloaders.py index d763b45ccb..249f46d652 100644 --- a/tests/datamodules/test_sklearn_dataloaders.py +++ b/tests/datamodules/test_sklearn_dataloaders.py @@ -39,7 +39,7 @@ def test_dataloader(): # ----------------------------- # train + val # ----------------------------- - loaders = SklearnDataModule(X=X, y=y, x_val=x_val, y_val=y_val, test_split=0.2, random_state=1234) + loaders = SklearnDataModule(X=X, y=y, x_val=x_val, y_val=y_val, test_split=0.2, random_state=1234, drop_last=True) train_loader = loaders.train_dataloader() val_loader = loaders.val_dataloader() test_loader = loaders.test_dataloader() @@ -50,7 +50,7 @@ def test_dataloader(): # ----------------------------- # train + test # ----------------------------- - loaders = SklearnDataModule(X=X, y=y, x_test=x_test, y_test=y_test, val_split=0.2, random_state=1234) + loaders = SklearnDataModule(X=X, y=y, x_test=x_test, y_test=y_test, val_split=0.2, random_state=1234, drop_last=True) train_loader = loaders.train_dataloader() val_loader = loaders.val_dataloader() test_loader = loaders.test_dataloader() @@ -61,7 +61,7 @@ def test_dataloader(): # ----------------------------- # train + val + test # ----------------------------- - loaders = SklearnDataModule(X, y, x_val, y_val, x_test, y_test, random_state=1234) + loaders = SklearnDataModule(X, y, x_val, y_val, x_test, y_test, random_state=1234, drop_last=True) train_loader = loaders.train_dataloader() val_loader = loaders.val_dataloader() test_loader = loaders.test_dataloader() From f909ad2ff6566bd8afdfe91883d52f8513099505 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 16:47:44 +0900 Subject: [PATCH 08/20] Updating doc for reflect drop_last=False --- pl_bolts/datamodules/sklearn_datamodule.py | 2 +- tests/datamodules/test_sklearn_dataloaders.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pl_bolts/datamodules/sklearn_datamodule.py b/pl_bolts/datamodules/sklearn_datamodule.py index 07d9f91e9c..ed1f1669c1 100644 --- a/pl_bolts/datamodules/sklearn_datamodule.py +++ b/pl_bolts/datamodules/sklearn_datamodule.py @@ -124,7 +124,7 @@ class SklearnDataModule(LightningDataModule): >>> len(train_loader.dataset) 355 >>> len(train_loader) - 11 + 12 >>> # validation set >>> val_loader = loaders.val_dataloader(batch_size=32) >>> len(val_loader.dataset) diff --git a/tests/datamodules/test_sklearn_dataloaders.py b/tests/datamodules/test_sklearn_dataloaders.py index 249f46d652..68931719e6 100644 --- a/tests/datamodules/test_sklearn_dataloaders.py +++ b/tests/datamodules/test_sklearn_dataloaders.py @@ -50,7 +50,9 @@ def test_dataloader(): # ----------------------------- # train + test # ----------------------------- - loaders = SklearnDataModule(X=X, y=y, x_test=x_test, y_test=y_test, val_split=0.2, random_state=1234, drop_last=True) + loaders = SklearnDataModule( + X=X, y=y, x_test=x_test, y_test=y_test, val_split=0.2, random_state=1234, drop_last=True + ) train_loader = loaders.train_dataloader() val_loader = loaders.val_dataloader() test_loader = loaders.test_dataloader() From 5d3565b21edcb361ead0e5ab59907cb6fee55d8d Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Fri, 20 Nov 2020 14:02:55 +0900 Subject: [PATCH 09/20] Adding flags to datamodules --- .../datamodules/binary_mnist_datamodule.py | 29 +++++++++++++------ pl_bolts/datamodules/cifar10_datamodule.py | 29 +++++++++++++------ pl_bolts/datamodules/cityscapes_datamodule.py | 29 +++++++++++++------ .../datamodules/fashion_mnist_datamodule.py | 29 +++++++++++++------ pl_bolts/datamodules/imagenet_datamodule.py | 27 ++++++++++++----- pl_bolts/datamodules/kitti_datamodule.py | 9 ++++-- pl_bolts/datamodules/mnist_datamodule.py | 19 +++++++++--- pl_bolts/datamodules/stl10_datamodule.py | 23 +++++++++++---- .../datamodules/vocdetection_datamodule.py | 10 +++++-- 9 files changed, 145 insertions(+), 59 deletions(-) diff --git a/pl_bolts/datamodules/binary_mnist_datamodule.py b/pl_bolts/datamodules/binary_mnist_datamodule.py index db480c4f41..40dab900fb 100644 --- a/pl_bolts/datamodules/binary_mnist_datamodule.py +++ b/pl_bolts/datamodules/binary_mnist_datamodule.py @@ -54,6 +54,9 @@ def __init__( normalize: bool = False, seed: int = 42, batch_size: int = 32, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -64,6 +67,11 @@ def __init__( num_workers: how many workers to use for loading data normalize: If true applies image normalize batch_size: size of batch + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -79,6 +87,9 @@ def __init__( self.normalize = normalize self.seed = seed self.batch_size = batch_size + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last @property def num_classes(self): @@ -111,10 +122,10 @@ def train_dataloader(self): loader = DataLoader( dataset_train, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -133,10 +144,10 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -150,10 +161,10 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/cifar10_datamodule.py b/pl_bolts/datamodules/cifar10_datamodule.py index d8822591dd..cc81e51924 100644 --- a/pl_bolts/datamodules/cifar10_datamodule.py +++ b/pl_bolts/datamodules/cifar10_datamodule.py @@ -71,6 +71,9 @@ def __init__( num_workers: int = 16, batch_size: int = 32, seed: int = 42, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -80,6 +83,11 @@ def __init__( val_split: how many of the training images to use for the validation split num_workers: how many workers to use for loading data batch_size: number of examples per training/eval step + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -94,6 +102,9 @@ def __init__( self.num_workers = num_workers self.batch_size = batch_size self.seed = seed + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last self.data_dir = data_dir if data_dir is not None else os.getcwd() self.num_samples = 50000 - val_split @@ -128,10 +139,10 @@ def train_dataloader(self): loader = DataLoader( dataset_train, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -151,10 +162,10 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True, - drop_last=True + pin_memory=self.pin_memory, + drop_last=self.drop_last ) return loader @@ -168,10 +179,10 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/cityscapes_datamodule.py b/pl_bolts/datamodules/cityscapes_datamodule.py index 4789268888..33236500d6 100644 --- a/pl_bolts/datamodules/cityscapes_datamodule.py +++ b/pl_bolts/datamodules/cityscapes_datamodule.py @@ -68,6 +68,9 @@ def __init__( num_workers: int = 16, batch_size: int = 32, seed: int = 42, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -79,6 +82,11 @@ def __init__( target_type: targets to use, either 'instance' or 'semantic' num_workers: how many workers to use for loading data batch_size: number of examples per training/eval step + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -97,6 +105,9 @@ def __init__( self.num_workers = num_workers self.batch_size = batch_size self.seed = seed + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last self.target_transforms = None @property @@ -125,10 +136,10 @@ def train_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -150,10 +161,10 @@ def val_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True, - drop_last=True + pin_memory=self.pin_memory, + drop_last=self.drop_last ) return loader @@ -174,10 +185,10 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/fashion_mnist_datamodule.py b/pl_bolts/datamodules/fashion_mnist_datamodule.py index c50cf59572..274abb8c3c 100644 --- a/pl_bolts/datamodules/fashion_mnist_datamodule.py +++ b/pl_bolts/datamodules/fashion_mnist_datamodule.py @@ -52,6 +52,9 @@ def __init__( num_workers: int = 16, seed: int = 42, batch_size: int = 32, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -61,6 +64,11 @@ def __init__( val_split: how many of the training images to use for the validation split num_workers: how many workers to use for loading data batch_size: size of batch + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -75,6 +83,9 @@ def __init__( self.num_workers = num_workers self.seed = seed self.batch_size = batch_size + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last @property def num_classes(self): @@ -107,10 +118,10 @@ def train_dataloader(self): loader = DataLoader( dataset_train, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -130,10 +141,10 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -147,10 +158,10 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/imagenet_datamodule.py b/pl_bolts/datamodules/imagenet_datamodule.py index 3dcd43e279..3c0a61c139 100644 --- a/pl_bolts/datamodules/imagenet_datamodule.py +++ b/pl_bolts/datamodules/imagenet_datamodule.py @@ -64,6 +64,9 @@ def __init__( image_size: int = 224, num_workers: int = 16, batch_size: int = 32, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -74,6 +77,10 @@ def __init__( num_imgs_per_val_class: how many images per class for the validation set image_size: final image size num_workers: how many data workers + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch batch_size: batch_size """ super().__init__(*args, **kwargs) @@ -90,6 +97,9 @@ def __init__( self.meta_dir = meta_dir self.num_imgs_per_val_class = num_imgs_per_val_class self.batch_size = batch_size + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last self.num_samples = 1281167 - self.num_imgs_per_val_class * self.num_classes @property @@ -152,10 +162,10 @@ def train_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -177,9 +187,10 @@ def val_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -197,10 +208,10 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/kitti_datamodule.py b/pl_bolts/datamodules/kitti_datamodule.py index c07acbd5eb..099e0ebf89 100644 --- a/pl_bolts/datamodules/kitti_datamodule.py +++ b/pl_bolts/datamodules/kitti_datamodule.py @@ -28,6 +28,7 @@ def __init__( num_workers: int = 16, batch_size: int = 32, seed: int = 42, + shuffle: bool = False, *args, **kwargs, ): @@ -62,6 +63,7 @@ def __init__( num_workers: how many workers to use for loading data batch_size: the batch size seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch """ if not _TORCHVISION_AVAILABLE: raise ModuleNotFoundError( # pragma: no-cover @@ -73,6 +75,7 @@ def __init__( self.batch_size = batch_size self.num_workers = num_workers self.seed = seed + self.shuffle = shuffle # split into train, val, test kitti_dataset = KittiDataset(self.data_dir, transform=self._default_transforms()) @@ -88,21 +91,21 @@ def __init__( def train_dataloader(self): loader = DataLoader(self.trainset, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers) return loader def val_dataloader(self): loader = DataLoader(self.valset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers) return loader def test_dataloader(self): loader = DataLoader(self.testset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers) return loader diff --git a/pl_bolts/datamodules/mnist_datamodule.py b/pl_bolts/datamodules/mnist_datamodule.py index 0764e20725..f6acad4f60 100644 --- a/pl_bolts/datamodules/mnist_datamodule.py +++ b/pl_bolts/datamodules/mnist_datamodule.py @@ -52,6 +52,9 @@ def __init__( normalize: bool = False, seed: int = 42, batch_size: int = 32, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -62,6 +65,11 @@ def __init__( num_workers: how many workers to use for loading data normalize: If true applies image normalize batch_size: size of batch + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -75,8 +83,11 @@ def __init__( self.val_split = val_split self.num_workers = num_workers self.normalize = normalize - self.seed = seed self.batch_size = batch_size + self.seed = seed + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last @property def num_classes(self): @@ -107,10 +118,10 @@ def train_dataloader(self): loader = DataLoader( dataset_train, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True, + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/stl10_datamodule.py b/pl_bolts/datamodules/stl10_datamodule.py index 3842725d30..d3274d82bf 100644 --- a/pl_bolts/datamodules/stl10_datamodule.py +++ b/pl_bolts/datamodules/stl10_datamodule.py @@ -62,6 +62,9 @@ def __init__( num_workers: int = 16, batch_size: int = 32, seed: int = 42, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -72,6 +75,11 @@ def __init__( train_val_split: how many images from the labeled training split to use for validation num_workers: how many workers to use for loading data batch_size: the batch size + seed: random seed to be used for train/val/test splits + shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ super().__init__(*args, **kwargs) @@ -87,6 +95,9 @@ def __init__( self.num_workers = num_workers self.batch_size = batch_size self.seed = seed + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last self.num_unlabeled_samples = 100000 - unlabeled_val_split @property @@ -115,10 +126,10 @@ def train_dataloader(self): loader = DataLoader( dataset_train, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -153,10 +164,10 @@ def train_dataloader_mixed(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/vocdetection_datamodule.py b/pl_bolts/datamodules/vocdetection_datamodule.py index 9e75e71918..d919e1026b 100644 --- a/pl_bolts/datamodules/vocdetection_datamodule.py +++ b/pl_bolts/datamodules/vocdetection_datamodule.py @@ -113,6 +113,9 @@ def __init__( year: str = "2012", num_workers: int = 16, normalize: bool = False, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -127,6 +130,9 @@ def __init__( self.data_dir = data_dir self.num_workers = num_workers self.normalize = normalize + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last @property def num_classes(self): @@ -163,9 +169,9 @@ def train_dataloader(self, batch_size=1, transforms=None): loader = DataLoader( dataset, batch_size=batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True, + pin_memory=self.pin_memory, collate_fn=_collate_fn, ) return loader From 685fdaca2d72b984e91fef586e6d5b2d3ad35681 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 14:46:32 +0900 Subject: [PATCH 10/20] Finishing up changes --- pl_bolts/datamodules/mnist_datamodule.py | 14 ++++++---- pl_bolts/datamodules/sklearn_datamodule.py | 23 +++++++++------ .../datamodules/ssl_imagenet_datamodule.py | 23 +++++++++------ pl_bolts/datamodules/stl10_datamodule.py | 28 ++++++++++--------- .../datamodules/vocdetection_datamodule.py | 6 ++-- 5 files changed, 57 insertions(+), 37 deletions(-) diff --git a/pl_bolts/datamodules/mnist_datamodule.py b/pl_bolts/datamodules/mnist_datamodule.py index f6acad4f60..d26b023064 100644 --- a/pl_bolts/datamodules/mnist_datamodule.py +++ b/pl_bolts/datamodules/mnist_datamodule.py @@ -138,10 +138,10 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True, + drop_last=self.drop_last, + pin_memory=self.pin_memory, ) return loader @@ -153,8 +153,12 @@ def test_dataloader(self): dataset = MNIST(self.data_dir, train=False, download=False, transform=transforms) loader = DataLoader( - dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers, drop_last=True, - pin_memory=True + dataset, + batch_size=self.batch_size, + shuffle=self.shuffle, + num_workers=self.num_workers, + drop_last=self.drop_last, + pin_memory=self.pin_memory, ) return loader diff --git a/pl_bolts/datamodules/sklearn_datamodule.py b/pl_bolts/datamodules/sklearn_datamodule.py index d64652ecd5..d8e62f5878 100644 --- a/pl_bolts/datamodules/sklearn_datamodule.py +++ b/pl_bolts/datamodules/sklearn_datamodule.py @@ -151,6 +151,8 @@ def __init__( random_state=1234, shuffle=True, batch_size: int = 16, + pin_memory=False, + drop_last=False, *args, **kwargs, ): @@ -158,6 +160,9 @@ def __init__( super().__init__(*args, **kwargs) self.num_workers = num_workers self.batch_size = batch_size + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last # shuffle x and y if shuffle and _SKLEARN_AVAILABLE: @@ -199,10 +204,10 @@ def train_dataloader(self): loader = DataLoader( self.train_dataset, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -210,10 +215,10 @@ def val_dataloader(self): loader = DataLoader( self.val_dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -221,10 +226,10 @@ def test_dataloader(self): loader = DataLoader( self.test_dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/ssl_imagenet_datamodule.py b/pl_bolts/datamodules/ssl_imagenet_datamodule.py index ee50f4b091..45aa5e149e 100644 --- a/pl_bolts/datamodules/ssl_imagenet_datamodule.py +++ b/pl_bolts/datamodules/ssl_imagenet_datamodule.py @@ -27,6 +27,9 @@ def __init__( meta_dir=None, num_workers=16, batch_size: int = 32, + shuffle=False, + pin_memory=False, + drop_last=False, *args, **kwargs, ): @@ -41,6 +44,9 @@ def __init__( self.num_workers = num_workers self.meta_dir = meta_dir self.batch_size = batch_size + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last @property def num_classes(self): @@ -87,10 +93,10 @@ def train_dataloader(self, num_images_per_class=-1, add_normalize=False): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=True, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -105,9 +111,10 @@ def val_dataloader(self, num_images_per_class=50, add_normalize=False): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True + drop_last=self.drop_last + pin_memory=self.pin_memory ) return loader @@ -122,10 +129,10 @@ def test_dataloader(self, num_images_per_class, add_normalize=False): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/stl10_datamodule.py b/pl_bolts/datamodules/stl10_datamodule.py index d3274d82bf..0955aae318 100644 --- a/pl_bolts/datamodules/stl10_datamodule.py +++ b/pl_bolts/datamodules/stl10_datamodule.py @@ -191,9 +191,10 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True + drpo_last=self.drop_last + pin_memory=self.pin_memory ) return loader @@ -233,10 +234,10 @@ def val_dataloader_mixed(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -254,10 +255,10 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader @@ -272,9 +273,10 @@ def train_dataloader_labeled(self): loader = DataLoader( dataset_train, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True + drop_last=self.drop_last + pin_memory=self.pin_memory ) return loader @@ -292,10 +294,10 @@ def val_dataloader_labeled(self): loader = DataLoader( labeled_val, batch_size=self.batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=True, - pin_memory=True + drop_last=self.drop_last, + pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/vocdetection_datamodule.py b/pl_bolts/datamodules/vocdetection_datamodule.py index d919e1026b..05cef7a729 100644 --- a/pl_bolts/datamodules/vocdetection_datamodule.py +++ b/pl_bolts/datamodules/vocdetection_datamodule.py @@ -171,6 +171,7 @@ def train_dataloader(self, batch_size=1, transforms=None): batch_size=batch_size, shuffle=self.shuffle, num_workers=self.num_workers, + drop_last=self.drop_last pin_memory=self.pin_memory, collate_fn=_collate_fn, ) @@ -195,9 +196,10 @@ def val_dataloader(self, batch_size=1, transforms=None): loader = DataLoader( dataset, batch_size=batch_size, - shuffle=False, + shuffle=self.shuffle, num_workers=self.num_workers, - pin_memory=True, + drop_last=self.drop_last + pin_memory=self.pin_memory, collate_fn=_collate_fn, ) return loader From e70bb8ec3e7adfd80b2cf9f3ee89faa27eee28da Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 14:51:08 +0900 Subject: [PATCH 11/20] Fixing syntax error --- pl_bolts/datamodules/ssl_imagenet_datamodule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pl_bolts/datamodules/ssl_imagenet_datamodule.py b/pl_bolts/datamodules/ssl_imagenet_datamodule.py index 45aa5e149e..a4576f82fd 100644 --- a/pl_bolts/datamodules/ssl_imagenet_datamodule.py +++ b/pl_bolts/datamodules/ssl_imagenet_datamodule.py @@ -113,7 +113,7 @@ def val_dataloader(self, num_images_per_class=50, add_normalize=False): batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=self.drop_last + drop_last=self.drop_last, pin_memory=self.pin_memory ) return loader From 0f50cb59b33854bdb331f6362aea66efb72542af Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 14:53:30 +0900 Subject: [PATCH 12/20] More syntax errors --- pl_bolts/datamodules/stl10_datamodule.py | 2 +- pl_bolts/datamodules/vocdetection_datamodule.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pl_bolts/datamodules/stl10_datamodule.py b/pl_bolts/datamodules/stl10_datamodule.py index 0955aae318..c8d4383cc9 100644 --- a/pl_bolts/datamodules/stl10_datamodule.py +++ b/pl_bolts/datamodules/stl10_datamodule.py @@ -275,7 +275,7 @@ def train_dataloader_labeled(self): batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=self.drop_last + drop_last=self.drop_last, pin_memory=self.pin_memory ) return loader diff --git a/pl_bolts/datamodules/vocdetection_datamodule.py b/pl_bolts/datamodules/vocdetection_datamodule.py index 05cef7a729..c2115d5289 100644 --- a/pl_bolts/datamodules/vocdetection_datamodule.py +++ b/pl_bolts/datamodules/vocdetection_datamodule.py @@ -171,7 +171,7 @@ def train_dataloader(self, batch_size=1, transforms=None): batch_size=batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=self.drop_last + drop_last=self.drop_last, pin_memory=self.pin_memory, collate_fn=_collate_fn, ) @@ -198,7 +198,7 @@ def val_dataloader(self, batch_size=1, transforms=None): batch_size=batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=self.drop_last + drop_last=self.drop_last, pin_memory=self.pin_memory, collate_fn=_collate_fn, ) From d1b304e3529f91eaf69cd5fad624aa2966682f0d Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 14:55:16 +0900 Subject: [PATCH 13/20] More --- pl_bolts/datamodules/stl10_datamodule.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pl_bolts/datamodules/stl10_datamodule.py b/pl_bolts/datamodules/stl10_datamodule.py index c8d4383cc9..27047069e7 100644 --- a/pl_bolts/datamodules/stl10_datamodule.py +++ b/pl_bolts/datamodules/stl10_datamodule.py @@ -193,7 +193,7 @@ def val_dataloader(self): batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drpo_last=self.drop_last + drpo_last=self.drop_last, pin_memory=self.pin_memory ) return loader @@ -275,7 +275,7 @@ def train_dataloader_labeled(self): batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=self.drop_last, + drop_last=self.drop_last, pin_memory=self.pin_memory ) return loader From d5bfe48fb79f385d9b2a768b3be8bd1c0d5b5786 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 15:24:57 +0900 Subject: [PATCH 14/20] Adding drop_last flag to sklearn test --- tests/datamodules/test_sklearn_dataloaders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/datamodules/test_sklearn_dataloaders.py b/tests/datamodules/test_sklearn_dataloaders.py index 7e35e20c66..d763b45ccb 100644 --- a/tests/datamodules/test_sklearn_dataloaders.py +++ b/tests/datamodules/test_sklearn_dataloaders.py @@ -27,7 +27,7 @@ def test_dataloader(): # ----------------------------- # train # ----------------------------- - loaders = SklearnDataModule(X=X, y=y, val_split=0.2, test_split=0.2, random_state=1234) + loaders = SklearnDataModule(X=X, y=y, val_split=0.2, test_split=0.2, random_state=1234, drop_last=True) train_loader = loaders.train_dataloader() val_loader = loaders.val_dataloader() test_loader = loaders.test_dataloader() From 73b9c565b0b23911b21484c20cbc3839888517b4 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 15:42:24 +0900 Subject: [PATCH 15/20] Adding drop_last flag to sklearn test --- tests/datamodules/test_sklearn_dataloaders.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/datamodules/test_sklearn_dataloaders.py b/tests/datamodules/test_sklearn_dataloaders.py index d763b45ccb..249f46d652 100644 --- a/tests/datamodules/test_sklearn_dataloaders.py +++ b/tests/datamodules/test_sklearn_dataloaders.py @@ -39,7 +39,7 @@ def test_dataloader(): # ----------------------------- # train + val # ----------------------------- - loaders = SklearnDataModule(X=X, y=y, x_val=x_val, y_val=y_val, test_split=0.2, random_state=1234) + loaders = SklearnDataModule(X=X, y=y, x_val=x_val, y_val=y_val, test_split=0.2, random_state=1234, drop_last=True) train_loader = loaders.train_dataloader() val_loader = loaders.val_dataloader() test_loader = loaders.test_dataloader() @@ -50,7 +50,7 @@ def test_dataloader(): # ----------------------------- # train + test # ----------------------------- - loaders = SklearnDataModule(X=X, y=y, x_test=x_test, y_test=y_test, val_split=0.2, random_state=1234) + loaders = SklearnDataModule(X=X, y=y, x_test=x_test, y_test=y_test, val_split=0.2, random_state=1234, drop_last=True) train_loader = loaders.train_dataloader() val_loader = loaders.val_dataloader() test_loader = loaders.test_dataloader() @@ -61,7 +61,7 @@ def test_dataloader(): # ----------------------------- # train + val + test # ----------------------------- - loaders = SklearnDataModule(X, y, x_val, y_val, x_test, y_test, random_state=1234) + loaders = SklearnDataModule(X, y, x_val, y_val, x_test, y_test, random_state=1234, drop_last=True) train_loader = loaders.train_dataloader() val_loader = loaders.val_dataloader() test_loader = loaders.test_dataloader() From abc0dfec6aa7da1f5df384994e3b7ee9c9c2e3b8 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 16:47:44 +0900 Subject: [PATCH 16/20] Updating doc for reflect drop_last=False --- pl_bolts/datamodules/sklearn_datamodule.py | 2 +- tests/datamodules/test_sklearn_dataloaders.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pl_bolts/datamodules/sklearn_datamodule.py b/pl_bolts/datamodules/sklearn_datamodule.py index d8e62f5878..6b01ce1ed7 100644 --- a/pl_bolts/datamodules/sklearn_datamodule.py +++ b/pl_bolts/datamodules/sklearn_datamodule.py @@ -125,7 +125,7 @@ class SklearnDataModule(LightningDataModule): >>> len(train_loader.dataset) 355 >>> len(train_loader) - 11 + 12 >>> # validation set >>> val_loader = loaders.val_dataloader() >>> len(val_loader.dataset) diff --git a/tests/datamodules/test_sklearn_dataloaders.py b/tests/datamodules/test_sklearn_dataloaders.py index 249f46d652..68931719e6 100644 --- a/tests/datamodules/test_sklearn_dataloaders.py +++ b/tests/datamodules/test_sklearn_dataloaders.py @@ -50,7 +50,9 @@ def test_dataloader(): # ----------------------------- # train + test # ----------------------------- - loaders = SklearnDataModule(X=X, y=y, x_test=x_test, y_test=y_test, val_split=0.2, random_state=1234, drop_last=True) + loaders = SklearnDataModule( + X=X, y=y, x_test=x_test, y_test=y_test, val_split=0.2, random_state=1234, drop_last=True + ) train_loader = loaders.train_dataloader() val_loader = loaders.val_dataloader() test_loader = loaders.test_dataloader() From e9b7914cabf2d547013e4c7ecb3bcd51a32a4157 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Sun, 13 Dec 2020 18:41:14 +0900 Subject: [PATCH 17/20] Cleaning up parameters and docstring --- .../datamodules/binary_mnist_datamodule.py | 4 +- .../datamodules/fashion_mnist_datamodule.py | 7 ++- pl_bolts/datamodules/imagenet_datamodule.py | 2 +- pl_bolts/datamodules/kitti_datamodule.py | 43 +++++++++++++------ pl_bolts/datamodules/mnist_datamodule.py | 2 +- .../datamodules/ssl_imagenet_datamodule.py | 6 +-- 6 files changed, 43 insertions(+), 21 deletions(-) diff --git a/pl_bolts/datamodules/binary_mnist_datamodule.py b/pl_bolts/datamodules/binary_mnist_datamodule.py index 40dab900fb..429fd02516 100644 --- a/pl_bolts/datamodules/binary_mnist_datamodule.py +++ b/pl_bolts/datamodules/binary_mnist_datamodule.py @@ -52,8 +52,8 @@ def __init__( val_split: int = 5000, num_workers: int = 16, normalize: bool = False, - seed: int = 42, batch_size: int = 32, + seed: int = 42, shuffle: bool = False, pin_memory: bool = False, drop_last: bool = False, @@ -85,8 +85,8 @@ def __init__( self.val_split = val_split self.num_workers = num_workers self.normalize = normalize - self.seed = seed self.batch_size = batch_size + self.seed = seed self.shuffle = shuffle self.pin_memory = pin_memory self.drop_last = drop_last diff --git a/pl_bolts/datamodules/fashion_mnist_datamodule.py b/pl_bolts/datamodules/fashion_mnist_datamodule.py index a6758e43d4..54b877ea86 100644 --- a/pl_bolts/datamodules/fashion_mnist_datamodule.py +++ b/pl_bolts/datamodules/fashion_mnist_datamodule.py @@ -50,8 +50,8 @@ def __init__( data_dir: str, val_split: int = 5000, num_workers: int = 16, - seed: int = 42, batch_size: int = 32, + seed: int = 42, shuffle: bool = False, pin_memory: bool = False, drop_last: bool = False, @@ -81,8 +81,11 @@ def __init__( self.data_dir = data_dir self.val_split = val_split self.num_workers = num_workers - self.seed = seed self.batch_size = batch_size + self.seed = seed + self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last @property def num_classes(self): diff --git a/pl_bolts/datamodules/imagenet_datamodule.py b/pl_bolts/datamodules/imagenet_datamodule.py index 3c0a61c139..27e8839253 100644 --- a/pl_bolts/datamodules/imagenet_datamodule.py +++ b/pl_bolts/datamodules/imagenet_datamodule.py @@ -77,11 +77,11 @@ def __init__( num_imgs_per_val_class: how many images per class for the validation set image_size: final image size num_workers: how many data workers + batch_size: batch_size shuffle: If true shuffles the data every epoch pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before returning them drop_last: If true drops the last incomplete batch - batch_size: batch_size """ super().__init__(*args, **kwargs) diff --git a/pl_bolts/datamodules/kitti_datamodule.py b/pl_bolts/datamodules/kitti_datamodule.py index 7757635fa0..0017eacdfd 100644 --- a/pl_bolts/datamodules/kitti_datamodule.py +++ b/pl_bolts/datamodules/kitti_datamodule.py @@ -29,6 +29,8 @@ def __init__( batch_size: int = 32, seed: int = 42, shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): @@ -64,6 +66,9 @@ def __init__( batch_size: the batch size seed: random seed to be used for train/val/test splits shuffle: If true shuffles the data every epoch + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch """ if not _TORCHVISION_AVAILABLE: raise ModuleNotFoundError( # pragma: no-cover @@ -76,6 +81,8 @@ def __init__( self.num_workers = num_workers self.seed = seed self.shuffle = shuffle + self.pin_memory = pin_memory + self.drop_last = drop_last self.default_transforms = transforms.Compose([ transforms.ToTensor(), @@ -95,24 +102,36 @@ def __init__( generator=torch.Generator().manual_seed(self.seed)) def train_dataloader(self): - loader = DataLoader(self.trainset, - batch_size=self.batch_size, - shuffle=self.shuffle, - num_workers=self.num_workers) + loader = DataLoader( + self.trainset, + batch_size=self.batch_size, + shuffle=self.shuffle, + num_workers=self.num_workers, + drop_last=self.drop_last, + pin_memory=self.pin_memory, + ) return loader def val_dataloader(self): - loader = DataLoader(self.valset, - batch_size=self.batch_size, - shuffle=self.shuffle, - num_workers=self.num_workers) + loader = DataLoader( + self.valset, + batch_size=self.batch_size, + shuffle=self.shuffle, + num_workers=self.num_workers, + drop_last=self.drop_last, + pin_memory=self.pin_memory + ) return loader def test_dataloader(self): - loader = DataLoader(self.testset, - batch_size=self.batch_size, - shuffle=self.shuffle, - num_workers=self.num_workers) + loader = DataLoader( + self.testset, + batch_size=self.batch_size, + shuffle=self.shuffle, + num_workers=self.num_workers, + drop_last=self.drop_last + pin_memory=self.pin_memory, + ) return loader def _default_transforms(self): diff --git a/pl_bolts/datamodules/mnist_datamodule.py b/pl_bolts/datamodules/mnist_datamodule.py index d26b023064..fa5120cafc 100644 --- a/pl_bolts/datamodules/mnist_datamodule.py +++ b/pl_bolts/datamodules/mnist_datamodule.py @@ -50,8 +50,8 @@ def __init__( val_split: int = 5000, num_workers: int = 16, normalize: bool = False, - seed: int = 42, batch_size: int = 32, + seed: int = 42, shuffle: bool = False, pin_memory: bool = False, drop_last: bool = False, diff --git a/pl_bolts/datamodules/ssl_imagenet_datamodule.py b/pl_bolts/datamodules/ssl_imagenet_datamodule.py index a4576f82fd..1cc8482d2d 100644 --- a/pl_bolts/datamodules/ssl_imagenet_datamodule.py +++ b/pl_bolts/datamodules/ssl_imagenet_datamodule.py @@ -27,9 +27,9 @@ def __init__( meta_dir=None, num_workers=16, batch_size: int = 32, - shuffle=False, - pin_memory=False, - drop_last=False, + shuffle: bool = False, + pin_memory: bool = False, + drop_last: bool = False, *args, **kwargs, ): From 443d669d35434f48c28982378aeebcbb39df9fab Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Sun, 13 Dec 2020 18:44:03 +0900 Subject: [PATCH 18/20] Fixing syntax error --- pl_bolts/datamodules/kitti_datamodule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pl_bolts/datamodules/kitti_datamodule.py b/pl_bolts/datamodules/kitti_datamodule.py index 0017eacdfd..416cb20d76 100644 --- a/pl_bolts/datamodules/kitti_datamodule.py +++ b/pl_bolts/datamodules/kitti_datamodule.py @@ -129,7 +129,7 @@ def test_dataloader(self): batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_workers, - drop_last=self.drop_last + drop_last=self.drop_last, pin_memory=self.pin_memory, ) return loader From dd234e16eb9d9739cecc9d800002bb1d1cbd452c Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Sun, 13 Dec 2020 19:18:03 +0900 Subject: [PATCH 19/20] Fixing documentation --- pl_bolts/datamodules/sklearn_datamodule.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pl_bolts/datamodules/sklearn_datamodule.py b/pl_bolts/datamodules/sklearn_datamodule.py index 6b01ce1ed7..60774fbdec 100644 --- a/pl_bolts/datamodules/sklearn_datamodule.py +++ b/pl_bolts/datamodules/sklearn_datamodule.py @@ -131,13 +131,13 @@ class SklearnDataModule(LightningDataModule): >>> len(val_loader.dataset) 100 >>> len(val_loader) - 3 + 4 >>> # test set >>> test_loader = loaders.test_dataloader() >>> len(test_loader.dataset) 51 >>> len(test_loader) - 1 + 2 """ name = 'sklearn' From ebaaf18ce3fa0820bf3c11458684009780dfb259 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 15 Dec 2020 16:40:54 +0900 Subject: [PATCH 20/20] Hardcoding shuffle=False for val and test --- pl_bolts/datamodules/binary_mnist_datamodule.py | 4 ++-- pl_bolts/datamodules/cifar10_datamodule.py | 4 ++-- pl_bolts/datamodules/cityscapes_datamodule.py | 4 ++-- pl_bolts/datamodules/fashion_mnist_datamodule.py | 4 ++-- pl_bolts/datamodules/imagenet_datamodule.py | 4 ++-- pl_bolts/datamodules/kitti_datamodule.py | 4 ++-- pl_bolts/datamodules/mnist_datamodule.py | 4 ++-- pl_bolts/datamodules/sklearn_datamodule.py | 4 ++-- pl_bolts/datamodules/ssl_imagenet_datamodule.py | 4 ++-- pl_bolts/datamodules/stl10_datamodule.py | 8 ++++---- pl_bolts/datamodules/vocdetection_datamodule.py | 2 +- 11 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pl_bolts/datamodules/binary_mnist_datamodule.py b/pl_bolts/datamodules/binary_mnist_datamodule.py index 429fd02516..245c25cee5 100644 --- a/pl_bolts/datamodules/binary_mnist_datamodule.py +++ b/pl_bolts/datamodules/binary_mnist_datamodule.py @@ -144,7 +144,7 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory @@ -161,7 +161,7 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory diff --git a/pl_bolts/datamodules/cifar10_datamodule.py b/pl_bolts/datamodules/cifar10_datamodule.py index cc81e51924..4287c6ca97 100644 --- a/pl_bolts/datamodules/cifar10_datamodule.py +++ b/pl_bolts/datamodules/cifar10_datamodule.py @@ -162,7 +162,7 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, pin_memory=self.pin_memory, drop_last=self.drop_last @@ -179,7 +179,7 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory diff --git a/pl_bolts/datamodules/cityscapes_datamodule.py b/pl_bolts/datamodules/cityscapes_datamodule.py index 33236500d6..435b83168e 100644 --- a/pl_bolts/datamodules/cityscapes_datamodule.py +++ b/pl_bolts/datamodules/cityscapes_datamodule.py @@ -161,7 +161,7 @@ def val_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, pin_memory=self.pin_memory, drop_last=self.drop_last @@ -185,7 +185,7 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory diff --git a/pl_bolts/datamodules/fashion_mnist_datamodule.py b/pl_bolts/datamodules/fashion_mnist_datamodule.py index 54b877ea86..f1780245e0 100644 --- a/pl_bolts/datamodules/fashion_mnist_datamodule.py +++ b/pl_bolts/datamodules/fashion_mnist_datamodule.py @@ -141,7 +141,7 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory @@ -158,7 +158,7 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory diff --git a/pl_bolts/datamodules/imagenet_datamodule.py b/pl_bolts/datamodules/imagenet_datamodule.py index 27e8839253..fcc3682531 100644 --- a/pl_bolts/datamodules/imagenet_datamodule.py +++ b/pl_bolts/datamodules/imagenet_datamodule.py @@ -187,7 +187,7 @@ def val_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory @@ -208,7 +208,7 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory diff --git a/pl_bolts/datamodules/kitti_datamodule.py b/pl_bolts/datamodules/kitti_datamodule.py index 416cb20d76..2355079f8a 100644 --- a/pl_bolts/datamodules/kitti_datamodule.py +++ b/pl_bolts/datamodules/kitti_datamodule.py @@ -116,7 +116,7 @@ def val_dataloader(self): loader = DataLoader( self.valset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory @@ -127,7 +127,7 @@ def test_dataloader(self): loader = DataLoader( self.testset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory, diff --git a/pl_bolts/datamodules/mnist_datamodule.py b/pl_bolts/datamodules/mnist_datamodule.py index fa5120cafc..d14a480610 100644 --- a/pl_bolts/datamodules/mnist_datamodule.py +++ b/pl_bolts/datamodules/mnist_datamodule.py @@ -138,7 +138,7 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory, @@ -155,7 +155,7 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory, diff --git a/pl_bolts/datamodules/sklearn_datamodule.py b/pl_bolts/datamodules/sklearn_datamodule.py index 60774fbdec..db6a7903ff 100644 --- a/pl_bolts/datamodules/sklearn_datamodule.py +++ b/pl_bolts/datamodules/sklearn_datamodule.py @@ -215,7 +215,7 @@ def val_dataloader(self): loader = DataLoader( self.val_dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory @@ -226,7 +226,7 @@ def test_dataloader(self): loader = DataLoader( self.test_dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory diff --git a/pl_bolts/datamodules/ssl_imagenet_datamodule.py b/pl_bolts/datamodules/ssl_imagenet_datamodule.py index 1cc8482d2d..854f3eaa2d 100644 --- a/pl_bolts/datamodules/ssl_imagenet_datamodule.py +++ b/pl_bolts/datamodules/ssl_imagenet_datamodule.py @@ -111,7 +111,7 @@ def val_dataloader(self, num_images_per_class=50, add_normalize=False): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory @@ -129,7 +129,7 @@ def test_dataloader(self, num_images_per_class, add_normalize=False): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory diff --git a/pl_bolts/datamodules/stl10_datamodule.py b/pl_bolts/datamodules/stl10_datamodule.py index 27047069e7..b319ebefab 100644 --- a/pl_bolts/datamodules/stl10_datamodule.py +++ b/pl_bolts/datamodules/stl10_datamodule.py @@ -191,7 +191,7 @@ def val_dataloader(self): loader = DataLoader( dataset_val, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drpo_last=self.drop_last, pin_memory=self.pin_memory @@ -234,7 +234,7 @@ def val_dataloader_mixed(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory @@ -255,7 +255,7 @@ def test_dataloader(self): loader = DataLoader( dataset, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory @@ -294,7 +294,7 @@ def val_dataloader_labeled(self): loader = DataLoader( labeled_val, batch_size=self.batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory diff --git a/pl_bolts/datamodules/vocdetection_datamodule.py b/pl_bolts/datamodules/vocdetection_datamodule.py index c2115d5289..0919702a33 100644 --- a/pl_bolts/datamodules/vocdetection_datamodule.py +++ b/pl_bolts/datamodules/vocdetection_datamodule.py @@ -196,7 +196,7 @@ def val_dataloader(self, batch_size=1, transforms=None): loader = DataLoader( dataset, batch_size=batch_size, - shuffle=self.shuffle, + shuffle=False, num_workers=self.num_workers, drop_last=self.drop_last, pin_memory=self.pin_memory,