Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: Cache datasets #315

Merged
merged 16 commits into from
Nov 6, 2020
11 changes: 5 additions & 6 deletions .github/workflows/ci_test-full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,11 @@ jobs:
pip list
shell: bash

#- name: Cache datasets
# uses: actions/cache@v2
# with:
# path: Datasets # This path is specific to Ubuntu
# # Look to see if there is a cache hit for the corresponding requirements file
# key: pl-datasets
- name: Cache datasets
uses: actions/cache@v2
with:
path: ./datasets
key: pl-datasets-${{ hashFiles('tests/conftest.py') }}

- name: Tests
run: |
Expand Down
3 changes: 1 addition & 2 deletions pl_bolts/models/mnist_module.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from argparse import ArgumentParser
from warnings import warn

Expand Down Expand Up @@ -70,7 +69,7 @@ def val_dataloader(self):
return loader

def test_dataloader(self):
test_dataset = MNIST(os.getcwd(), train=False, download=True, transform=transforms.ToTensor())
test_dataset = MNIST(self.hparams.data_dir, train=False, download=True, transform=transforms.ToTensor())
loader = DataLoader(test_dataset, batch_size=self.hparams.batch_size, num_workers=self.hparams.num_workers)
return loader

Expand Down
3 changes: 2 additions & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

from pytorch_lightning import seed_everything

TEST_ROOT = os.path.dirname(__file__)
TEST_ROOT = os.path.realpath(os.path.dirname(__file__))
PACKAGE_ROOT = os.path.dirname(TEST_ROOT)
DATASETS_PATH = os.path.join(PACKAGE_ROOT, 'datasets')
# generate a list of random seeds for each test
ROOT_SEED = 1234

Expand Down
2 changes: 1 addition & 1 deletion tests/callbacks/test_info_callbacks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pl_bolts.callbacks import PrintTableMetricsCallback


def test_printtable_metrics_callback(tmpdir):
def test_printtable_metrics_callback():
callback = PrintTableMetricsCallback()

metrics_a = {'loss': 1.0, 'epoch': 0}
Expand Down
2 changes: 1 addition & 1 deletion tests/callbacks/test_param_update_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pl_bolts.callbacks.byol_updates import BYOLMAWeightUpdate


def test_byol_ma_weight_update_callback(tmpdir):
def test_byol_ma_weight_update_callback():
a = nn.Linear(100, 10)
b = deepcopy(a)
a_original = deepcopy(a)
Expand Down
2 changes: 1 addition & 1 deletion tests/callbacks/test_variational_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pl_bolts.models.gans import GAN


def test_latent_dim_interpolator(tmpdir):
def test_latent_dim_interpolator():

class FakeTrainer(object):
def __init__(self):
Expand Down
15 changes: 15 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pathlib import Path

import pytest


# GitHub Actions use this path to cache datasets.
# Use `datadir` fixture where possible and use `DATASETS_PATH` in
# `pytest.mark.parametrize()` where you cannot use `datadir`.
# https://github.com/pytest-dev/pytest/issues/349
from tests import DATASETS_PATH


@pytest.fixture(scope="session")
def datadir():
return Path(DATASETS_PATH)
4 changes: 2 additions & 2 deletions tests/datamodules/test_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from pl_bolts.datasets.cifar10_dataset import CIFAR10


def test_async_dataloader(tmpdir):
ds = CIFAR10(tmpdir)
def test_async_dataloader(datadir):
ds = CIFAR10(data_dir=datadir)

if torch.cuda.device_count() > 0: # Can only run this test with a GPU
device = torch.device('cuda', 0)
Expand Down
11 changes: 6 additions & 5 deletions tests/datamodules/test_datamodules.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from pl_bolts.datamodules import CityscapesDataModule


def test_dev_datasets(tmpdir):
ds = CIFAR10(tmpdir)
def test_dev_datasets(datadir):

ds = CIFAR10(data_dir=datadir)
for b in ds:
pass

Expand Down Expand Up @@ -35,14 +36,14 @@ def _create_synth_Cityscapes_dataset(path_dir):
fine_labels_dir / split / city / semantic_target_name)


def test_cityscapes_datamodule(tmpdir):
def test_cityscapes_datamodule(datadir):

_create_synth_Cityscapes_dataset(tmpdir)
_create_synth_Cityscapes_dataset(datadir)

batch_size = 1
target_types = ['semantic', 'instance']
for target_type in target_types:
dm = CityscapesDataModule(tmpdir,
dm = CityscapesDataModule(datadir,
num_workers=0,
batch_size=batch_size,
target_type=target_type)
Expand Down
2 changes: 1 addition & 1 deletion tests/datamodules/test_sklearn_dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
' install it with `pip install sklearn`.')


def test_dataloader(tmpdir):
def test_dataloader():
seed_everything()

X = np.random.rand(5, 2)
Expand Down
8 changes: 4 additions & 4 deletions tests/datasets/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,31 @@
from pl_bolts.datasets import DummyDataset, RandomDataset, RandomDictDataset, RandomDictStringDataset


def test_dummy_ds(tmpdir):
def test_dummy_ds():
ds = DummyDataset((1, 2), num_samples=100)
dl = DataLoader(ds)

for b in dl:
pass


def test_rand_ds(tmpdir):
def test_rand_ds():
ds = RandomDataset(32, num_samples=100)
dl = DataLoader(ds)

for b in dl:
pass


def test_rand_dict_ds(tmpdir):
def test_rand_dict_ds():
ds = RandomDictDataset(32, num_samples=100)
dl = DataLoader(ds)

for b in dl:
pass


def test_rand_str_dict_ds(tmpdir):
def test_rand_str_dict_ds():
ds = RandomDictStringDataset(32, num_samples=100)
dl = DataLoader(ds)

Expand Down
30 changes: 15 additions & 15 deletions tests/models/self_supervised/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@

# TODO: this test is hanging (runs for more then 10min) so we need to use GPU or optimize it...
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
def test_cpcv2(tmpdir):
def test_cpcv2(tmpdir, datadir):
seed_everything()

datamodule = CIFAR10DataModule(data_dir=tmpdir, num_workers=0, batch_size=2)
datamodule = CIFAR10DataModule(data_dir=datadir, num_workers=0, batch_size=2)
datamodule.train_transforms = CPCTrainTransformsCIFAR10()
datamodule.val_transforms = CPCEvalTransformsCIFAR10()

model = CPCV2(encoder='resnet18', data_dir=tmpdir, batch_size=2, online_ft=True, datamodule=datamodule)
model = CPCV2(encoder='resnet18', data_dir=datadir, batch_size=2, online_ft=True, datamodule=datamodule)
trainer = pl.Trainer(fast_dev_run=True, max_epochs=1, default_root_dir=tmpdir)
trainer.fit(model)
loss = trainer.progress_bar_dict['val_nce']
Expand All @@ -32,51 +32,51 @@ def test_cpcv2(tmpdir):

# TODO: this test is hanging (runs for more then 10min) so we need to use GPU or optimize it...
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
def test_byol(tmpdir):
def test_byol(tmpdir, datadir):
seed_everything()

datamodule = CIFAR10DataModule(data_dir=tmpdir, num_workers=0, batch_size=2)
datamodule = CIFAR10DataModule(data_dir=datadir, num_workers=0, batch_size=2)
datamodule.train_transforms = CPCTrainTransformsCIFAR10()
datamodule.val_transforms = CPCEvalTransformsCIFAR10()

model = BYOL(data_dir=tmpdir, num_classes=datamodule)
model = BYOL(data_dir=datadir, num_classes=datamodule)
trainer = pl.Trainer(fast_dev_run=True, max_epochs=1, default_root_dir=tmpdir, max_steps=2)
trainer.fit(model, datamodule)
loss = trainer.progress_bar_dict['loss']

assert float(loss) < 1.0


def test_amdim(tmpdir):
def test_amdim(tmpdir, datadir):
seed_everything()

model = AMDIM(data_dir=tmpdir, batch_size=2, online_ft=True, encoder='resnet18')
model = AMDIM(data_dir=datadir, batch_size=2, online_ft=True, encoder='resnet18')
trainer = pl.Trainer(fast_dev_run=True, max_epochs=1, default_root_dir=tmpdir)
trainer.fit(model)
loss = trainer.progress_bar_dict['loss']

assert float(loss) > 0


def test_moco(tmpdir):
def test_moco(tmpdir, datadir):
seed_everything()

datamodule = CIFAR10DataModule(tmpdir, num_workers=0, batch_size=2)
datamodule = CIFAR10DataModule(data_dir=datadir, num_workers=0, batch_size=2)
datamodule.train_transforms = Moco2TrainCIFAR10Transforms()
datamodule.val_transforms = Moco2EvalCIFAR10Transforms()

model = MocoV2(data_dir=tmpdir, batch_size=2, online_ft=True)
model = MocoV2(data_dir=datadir, batch_size=2, online_ft=True)
trainer = pl.Trainer(fast_dev_run=True, max_epochs=1, default_root_dir=tmpdir, callbacks=[MocoLRScheduler()])
trainer.fit(model, datamodule=datamodule)
loss = trainer.progress_bar_dict['loss']

assert float(loss) > 0


def test_simclr(tmpdir):
def test_simclr(tmpdir, datadir):
seed_everything()

datamodule = CIFAR10DataModule(tmpdir, num_workers=0, batch_size=2)
datamodule = CIFAR10DataModule(data_dir=datadir, num_workers=0, batch_size=2)
datamodule.train_transforms = SimCLRTrainDataTransform(32)
datamodule.val_transforms = SimCLREvalDataTransform(32)

Expand All @@ -88,14 +88,14 @@ def test_simclr(tmpdir):
assert float(loss) > 0


def test_swav(tmpdir):
def test_swav(tmpdir, datadir):
seed_everything()

batch_size = 2

# inputs, y = batch (doesn't receive y for some reason)
datamodule = CIFAR10DataModule(
data_dir=tmpdir,
data_dir=datadir,
batch_size=batch_size,
num_workers=0
)
Expand Down
6 changes: 3 additions & 3 deletions tests/models/self_supervised/test_resnets.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
)


def test_cpc_resnet(tmpdir):
def test_cpc_resnet():
x = torch.rand(3, 3, 64, 64)
model = cpc_resnet50(x)
model(x)
Expand All @@ -33,7 +33,7 @@ def test_cpc_resnet(tmpdir):
wide_resnet50_2,
wide_resnet101_2
])
def test_torchvision_resnets(tmpdir, model_class):
def test_torchvision_resnets(model_class):
x = torch.rand(3, 3, 64, 64)
model = model_class()
model(x)
Expand All @@ -44,7 +44,7 @@ def test_torchvision_resnets(tmpdir, model_class):
64,
128
])
def test_amdim_encoder(tmpdir, size):
def test_amdim_encoder(size):
dummy_batch = torch.zeros((2, 3, size, size))
model = AMDIMEncoder(dummy_batch, encoder_size=size)
model.init_weights()
Expand Down
26 changes: 19 additions & 7 deletions tests/models/self_supervised/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@
import pytest
import torch

from tests import DATASETS_PATH

@pytest.mark.parametrize('cli_args', ["--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2"])

@pytest.mark.parametrize('cli_args', [
f"--data_dir {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2"
])
def test_cli_run_self_supervised_amdim(cli_args):
"""Test running CLI for an example with default params."""
from pl_bolts.models.self_supervised.amdim.amdim_module import cli_main
Expand All @@ -16,7 +20,9 @@ def test_cli_run_self_supervised_amdim(cli_args):

# TODO: this test is hanging (runs for more then 10min) so we need to use GPU or optimize it...
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
@pytest.mark.parametrize('cli_args', ['--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --encoder resnet18'])
@pytest.mark.parametrize('cli_args', [
f'--data_dir {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --encoder resnet18'
])
def test_cli_run_self_supervised_cpc(cli_args):
"""Test running CLI for an example with default params."""
from pl_bolts.models.self_supervised.cpc.cpc_module import cli_main
Expand All @@ -26,7 +32,9 @@ def test_cli_run_self_supervised_cpc(cli_args):
cli_main()


@pytest.mark.parametrize('cli_args', ['--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2'])
@pytest.mark.parametrize('cli_args', [
f'--data_dir {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2'
])
def test_cli_run_self_supervised_moco(cli_args):
"""Test running CLI for an example with default params."""
from pl_bolts.models.self_supervised.moco.moco2_module import cli_main
Expand All @@ -36,7 +44,9 @@ def test_cli_run_self_supervised_moco(cli_args):
cli_main()


@pytest.mark.parametrize('cli_args', ['--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --online_ft'])
@pytest.mark.parametrize('cli_args', [
f'--data_dir {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --online_ft'
])
def test_cli_run_self_supervised_simclr(cli_args):
"""Test running CLI for an example with default params."""
from pl_bolts.models.self_supervised.simclr.simclr_module import cli_main
Expand All @@ -46,7 +56,9 @@ def test_cli_run_self_supervised_simclr(cli_args):
cli_main()


@pytest.mark.parametrize('cli_args', ['--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --online_ft'])
@pytest.mark.parametrize('cli_args', [
f'--data_dir {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --online_ft'
])
def test_cli_run_self_supervised_byol(cli_args):
"""Test running CLI for an example with default params."""
from pl_bolts.models.self_supervised.byol.byol_module import cli_main
Expand All @@ -58,8 +70,8 @@ def test_cli_run_self_supervised_byol(cli_args):

@pytest.mark.parametrize(
'cli_args', [
'--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --gpus 0 --arch resnet18'
' --hidden_mlp 512 --fp32 --sinkhorn_iterations 1 --nmb_prototypes 2 --dataset cifar10'
f'--dataset cifar10 --data_path {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2'
' --gpus 0 --arch resnet18 --hidden_mlp 512 --fp32 --sinkhorn_iterations 1 --nmb_prototypes 2'
]
)
def test_cli_run_self_supervised_swav(cli_args):
Expand Down
Loading