diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index f21cfd95c3..266637d038 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -21,26 +21,26 @@ jobs: matrix: # PyTorch 1.5 is failing on Win and bolts requires torchvision>=0.5 os: [ubuntu-20.04, macOS-10.15, windows-2019] - python-version: [3.6, 3.8] + python-version: [3.7, 3.9] requires: ['oldest', 'latest'] - topic: [['devel']] + topic: [['core']] release: [ 'stable' ] exclude: - - os: ubuntu-20.04 - python-version: 3.8 - requires: 'latest' + # Skip if torch<1.8 and py3.9 on Linux: https://github.com/pytorch/pytorch/issues/50014 + - { os: ubuntu-20.04, python-version: 3.9, requires: 'oldest' } + - { os: ubuntu-20.04, python-version: 3.9, requires: 'latest' } include: - - { os: 'ubuntu-20.04', python-version: 3.9, requires: 'latest', release: 'pre', topic: [ 'devel' ] } - - { os: 'ubuntu-20.04', python-version: 3.8, requires: 'latest', release: 'stable', topic: [ 'image' ] } - - { os: 'ubuntu-20.04', python-version: 3.8, requires: 'latest', release: 'stable', topic: [ 'image','image_extras' ] } - - { os: 'ubuntu-20.04', python-version: 3.8, requires: 'latest', release: 'stable', topic: [ 'video' ] } - - { os: 'ubuntu-20.04', python-version: 3.8, requires: 'latest', release: 'stable', topic: [ 'video','video_extras' ] } - - { os: 'ubuntu-20.04', python-version: 3.8, requires: 'latest', release: 'stable', topic: [ 'tabular' ] } - - { os: 'ubuntu-20.04', python-version: 3.8, requires: 'latest', release: 'stable', topic: [ 'text' ] } + - { os: 'ubuntu-20.04', python-version: 3.9, requires: 'latest', release: 'pre', topic: [ 'core' ] } + - { os: 'ubuntu-20.04', python-version: 3.9, requires: 'latest', release: 'stable', topic: [ 'image' ] } + - { os: 'ubuntu-20.04', python-version: 3.9, requires: 'latest', release: 'stable', topic: [ 'image','image_extras' ] } + - { os: 'ubuntu-20.04', python-version: 3.9, requires: 'latest', release: 'stable', topic: [ 'video' ] } + - { os: 'ubuntu-20.04', python-version: 3.9, requires: 'latest', release: 'stable', topic: [ 'video','video_extras' ] } + - { os: 'ubuntu-20.04', python-version: 3.9, requires: 'latest', release: 'stable', topic: [ 'tabular' ] } + - { os: 'ubuntu-20.04', python-version: 3.9, requires: 'latest', release: 'stable', topic: [ 'text' ] } - { os: 'ubuntu-20.04', python-version: 3.8, requires: 'latest', release: 'stable', topic: [ 'pointcloud' ] } - - { os: 'ubuntu-20.04', python-version: 3.8, requires: 'latest', release: 'stable', topic: [ 'serve' ] } - - { os: 'ubuntu-20.04', python-version: 3.8, requires: 'latest', release: 'stable', topic: [ 'graph' ] } - - { os: 'ubuntu-20.04', python-version: 3.8, requires: 'latest', release: 'stable', topic: [ 'audio' ] } + - { os: 'ubuntu-20.04', python-version: 3.9, requires: 'latest', release: 'stable', topic: [ 'serve' ] } + - { os: 'ubuntu-20.04', python-version: 3.9, requires: 'latest', release: 'stable', topic: [ 'graph' ] } + - { os: 'ubuntu-20.04', python-version: 3.9, requires: 'latest', release: 'stable', topic: [ 'audio' ] } # Timeout: https://stackoverflow.com/a/59076067/4521646 timeout-minutes: 35 @@ -67,15 +67,10 @@ jobs: - name: Set min. dependencies if: matrix.requires == 'oldest' run: | - python -c "req = open('requirements.txt').read().replace('>', '=') ; open('requirements.txt', 'w').write(req)" - - - name: Filter requirements - run: | - import sys - if sys.version_info.minor < 7: - fname = 'requirements.txt' - lines = [line for line in open(fname).readlines() if not line.startswith('pytorchvideo')] - open(fname, 'w').writelines(lines) + fname = 'requirements.txt' + ignore = ['pandas', 'torchmetrics'] + lines = [line if any([line.startswith(package) for package in ignore]) else line.replace('>', '=') for line in open(fname).readlines()] + open(fname, 'w').writelines(lines) shell: python - run: echo "::set-output name=period::$(python -c 'import time ; days = time.time() / 60 / 60 / 24 ; print(int(days / 7))' 2>&1)" diff --git a/CHANGELOG.md b/CHANGELOG.md index dab22e35db..3aa4a068e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed +- Fixed a bug where LR schedulers from HuggingFace could not be used with newer versions of PyTorch Lightning ([#1307](https://github.com/PyTorchLightning/lightning-flash/pull/1307)) - Fixed a bug where the default Flash zero configurations for `ObjectDetector`, `InstanceSegmentation`, and `KeypointDetector` would error with the latest version of some requirements ([#1306](https://github.com/PyTorchLightning/lightning-flash/pull/1306)) - Fixed plain `LightningModule` support for Flash data modules. ([#1281](https://github.com/PyTorchLightning/lightning-flash/pull/1281)) diff --git a/flash/core/model.py b/flash/core/model.py index 3e200b30d5..704ffb6564 100644 --- a/flash/core/model.py +++ b/flash/core/model.py @@ -629,25 +629,13 @@ def get_num_training_steps(self) -> int: """Total training steps inferred from datamodule and devices.""" if not getattr(self, "trainer", None): raise MisconfigurationException("The LightningModule isn't attached to the trainer yet.") - if isinstance(self.trainer.limit_train_batches, int) and self.trainer.limit_train_batches != 0: - dataset_size = self.trainer.limit_train_batches - elif isinstance(self.trainer.limit_train_batches, float): - # limit_train_batches is a percentage of batches - dataset_size = len(self.train_dataloader()) - dataset_size = int(dataset_size * self.trainer.limit_train_batches) - else: - dataset_size = len(self.train_dataloader()) - num_devices = max(1, self.trainer.num_gpus, self.trainer.num_processes) - if self.trainer.tpu_cores: - num_devices = max(num_devices, self.trainer.tpu_cores) + if hasattr(self.trainer, "estimated_stepping_batches"): + return self.trainer.estimated_stepping_batches - effective_batch_size = self.trainer.accumulate_grad_batches * num_devices - max_estimated_steps = (dataset_size // effective_batch_size) * self.trainer.max_epochs + from flash.core.trainer import Trainer - if self.trainer.max_steps and self.trainer.max_steps < max_estimated_steps: - return self.trainer.max_steps - return max_estimated_steps + return Trainer.estimated_stepping_batches.fget(self.trainer) @staticmethod def _compute_warmup(num_training_steps: int, num_warmup_steps: Union[int, float]) -> int: diff --git a/flash/core/trainer.py b/flash/core/trainer.py index 5467d418b6..18ff14acae 100644 --- a/flash/core/trainer.py +++ b/flash/core/trainer.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import inspect +import math import warnings from argparse import ArgumentParser, Namespace from functools import wraps @@ -21,6 +22,7 @@ from pytorch_lightning import LightningDataModule, LightningModule from pytorch_lightning import Trainer as PlTrainer from pytorch_lightning.callbacks import BaseFinetuning +from pytorch_lightning.utilities import rank_zero_info from pytorch_lightning.utilities.argparse import add_argparse_args, get_init_arguments_and_types, parse_env_variables from pytorch_lightning.utilities.exceptions import MisconfigurationException from torch.utils.data import DataLoader @@ -31,6 +33,7 @@ from flash.core.data.io.transform_predictions import TransformPredictions from flash.core.model import Task from flash.core.registry import FlashRegistry +from flash.core.utilities.imports import _PL_GREATER_EQUAL_1_4_0, _PL_GREATER_EQUAL_1_5_0, _PL_GREATER_EQUAL_1_6_0 def from_argparse_args(cls, args: Union[Namespace, ArgumentParser], **kwargs): @@ -239,3 +242,61 @@ def from_argparse_args(cls, args: Union[Namespace, ArgumentParser], **kwargs) -> # the lightning trainer implementation does not support subclasses. # context: https://github.com/PyTorchLightning/lightning-flash/issues/342#issuecomment-848892447 return from_argparse_args(Trainer, args, **kwargs) + + @property + def estimated_stepping_batches(self) -> Union[int, float]: + """Estimated stepping batches for the complete training inferred from DataLoaders, gradient accumulation + factor and distributed setup. + + Examples + ________ + + .. code-block:: python + + def configure_optimizers(self): + optimizer = ... + scheduler = torch.optim.lr_scheduler.OneCycleLR( + optimizer, max_lr=1e-3, total_steps=self.trainer.estimated_stepping_batches + ) + return [optimizer], [scheduler] + """ + if _PL_GREATER_EQUAL_1_6_0: + return super().estimated_stepping_batches + # Copied from PL 1.6 + accumulation_scheduler = self.accumulation_scheduler + + if accumulation_scheduler.epochs != [0]: + raise MisconfigurationException( + "Estimated stepping batches cannot be computed with different" + " `accumulate_grad_batches` at different epochs." + ) + + # infinite training + if self.max_epochs == -1 and self.max_steps == -1: + return float("inf") + + if self.train_dataloader is None: + rank_zero_info("Loading `train_dataloader` to estimate number of stepping batches.") + if _PL_GREATER_EQUAL_1_5_0: + self.reset_train_dataloader() + else: + self.reset_train_dataloader(self.lightning_module) + + total_batches = self.num_training_batches + + # iterable dataset + if total_batches == float("inf"): + return self.max_steps + + if _PL_GREATER_EQUAL_1_4_0: + self.accumulate_grad_batches = accumulation_scheduler.get_accumulate_grad_batches(self.current_epoch) + else: + # Call the callback hook manually to guarantee that `self.accumulate_grad_batches` has been set + accumulation_scheduler.on_train_epoch_start(self, self.lightning_module) + effective_batch_size = self.accumulate_grad_batches + max_estimated_steps = math.ceil(total_batches / effective_batch_size) * max(self.max_epochs, 1) + + max_estimated_steps = ( + min(max_estimated_steps, self.max_steps) if self.max_steps not in [None, -1] else max_estimated_steps + ) + return max_estimated_steps diff --git a/flash/text/classification/data.py b/flash/text/classification/data.py index ecee9ea2e5..7827901b87 100644 --- a/flash/text/classification/data.py +++ b/flash/text/classification/data.py @@ -779,14 +779,12 @@ def from_labelstudio( Args: export_json: path to label studio export file - train_export_json: path to label studio export file for train set, - overrides export_json if specified + train_export_json: path to label studio export file for train set, overrides export_json if specified val_export_json: path to label studio export file for validation test_export_json: path to label studio export file for test predict_export_json: path to label studio export file for predict data_folder: path to label studio data folder - train_data_folder: path to label studio data folder for train data set, - overrides data_folder if specified + train_data_folder: path to label studio data folder for train data set, overrides data_folder if specified val_data_folder: path to label studio data folder for validation data test_data_folder: path to label studio data folder for test data predict_data_folder: path to label studio data folder for predict data diff --git a/setup.py b/setup.py index 09c76a960b..b260882a02 100644 --- a/setup.py +++ b/setup.py @@ -53,10 +53,8 @@ def _expand_reqs(extras: dict, keys: list) -> list: base_req = setup_tools._load_requirements(path_dir=_PATH_ROOT, file_name="requirements.txt") # find all extra requirements _load_req = partial(setup_tools._load_requirements, path_dir=_PATH_REQUIRE) -SKIP_REQ_FILES = "devel.txt" found_req_files = sorted(os.path.basename(p) for p in glob.glob(os.path.join(_PATH_REQUIRE, "*.txt"))) -# filter unwanted files -found_req_files = [n for n in found_req_files if n not in SKIP_REQ_FILES] +# remove datatype prefix found_req_names = [os.path.splitext(req)[0].replace("datatype_", "") for req in found_req_files] # define basic and extra extras extras_req = { @@ -71,6 +69,7 @@ def _expand_reqs(extras: dict, keys: list) -> list: ) # some extra combinations extras_req["vision"] = _expand_reqs(extras_req, ["image", "video"]) +extras_req["core"] = _expand_reqs(extras_req, ["image", "tabular", "text"]) extras_req["all"] = _expand_reqs(extras_req, ["vision", "tabular", "text", "audio"]) extras_req["dev"] = _expand_reqs(extras_req, ["all", "test", "docs"]) # filter the uniques diff --git a/tests/core/test_model.py b/tests/core/test_model.py index a617e284d4..7b16da25fb 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -23,6 +23,7 @@ import pytest import pytorch_lightning as pl import torch +from pytorch_lightning import LightningDataModule from pytorch_lightning.callbacks import Callback from torch import nn, Tensor from torch.nn import functional as F @@ -405,13 +406,32 @@ def test_external_optimizers_torch_optimizer(tmpdir, optim): ("cosine_with_hard_restarts_schedule_with_warmup", {"num_warmup_steps": 0.1, "num_cycles": 3}), ], ) -def test_external_schedulers_provider_hf_transformers(tmpdir, optim, sched): +@pytest.mark.parametrize("use_datamodule", [False, True]) +@pytest.mark.parametrize("limit", [None, 5, 0.1]) +def test_external_schedulers_provider_hf_transformers(tmpdir, optim, sched, use_datamodule, limit): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) task = ClassificationTask(model, optimizer=deepcopy(optim), lr_scheduler=deepcopy(sched), loss_fn=F.nll_loss) - trainer = flash.Trainer(max_epochs=1, limit_train_batches=10, gpus=torch.cuda.device_count()) - ds = DummyDataset() - trainer.fit(task, train_dataloader=DataLoader(ds)) + if limit is not None: + batch_count = limit if isinstance(limit, int) else int(limit * 10) + trainer = flash.Trainer(max_epochs=1, limit_train_batches=limit) + else: + batch_count = 10 + trainer = flash.Trainer(max_epochs=1) + + ds = DummyDataset(num_samples=10) + + if use_datamodule: + + class TestDataModule(LightningDataModule): + def train_dataloader(self): + return DataLoader(ds) + + trainer.fit(task, datamodule=TestDataModule()) + else: + trainer.fit(task, train_dataloader=DataLoader(ds)) + + assert task.get_num_training_steps() == batch_count assert isinstance(trainer.optimizers[0], torch.optim.Adadelta) assert isinstance(trainer.lr_schedulers[0]["scheduler"], torch.optim.lr_scheduler.LambdaLR) diff --git a/tests/examples/test_scripts.py b/tests/examples/test_scripts.py index 1162caa91d..80c7ca91ae 100644 --- a/tests/examples/test_scripts.py +++ b/tests/examples/test_scripts.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +import sys from pathlib import Path from unittest import mock @@ -114,7 +115,13 @@ "tabular_forecasting.py", marks=pytest.mark.skipif(not _TABULAR_TESTING, reason="tabular libraries aren't installed"), ), - pytest.param("template.py", marks=pytest.mark.skipif(not _SKLEARN_AVAILABLE, reason="sklearn isn't installed")), + pytest.param( + "template.py", + marks=[ + pytest.mark.skipif(not _SKLEARN_AVAILABLE, reason="sklearn isn't installed"), + pytest.mark.skipif(sys.version_info >= (3, 9), reason="Undiagnosed segmentation fault in 3.9"), + ], + ), pytest.param( "text_classification.py", marks=pytest.mark.skipif(not _TEXT_TESTING, reason="text libraries aren't installed"),