Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Commit

Permalink
Data Pipeline V2: Cleanup (#1018)
Browse files Browse the repository at this point in the history
  • Loading branch information
tchaton authored Dec 2, 2021
1 parent 78c60ed commit 9fac5e2
Show file tree
Hide file tree
Showing 20 changed files with 537 additions and 97 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Added support for comma delimited multi-label targets to the `ImageClassifier` ([#997](https://github.com/PyTorchLightning/lightning-flash/pull/997))

- Added `datapipeline_state` on dataset creation within the `from_*` methods from the `DataModule` ([#1018](https://github.com/PyTorchLightning/lightning-flash/pull/1018))

### Changed

- Changed `DataSource` to `Input` ([#929](https://github.com/PyTorchLightning/lightning-flash/pull/929))
Expand All @@ -34,6 +36,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Added `Output` suffix to `Preds`, `FiftyOneDetectionLabels`, `SegmentationLabels`, `FiftyOneDetectionLabels`, `DetectionLabels`, `Classes`, `FiftyOneLabels`, `Labels`, `Logits`, `Probabilities` ([#1011](https://github.com/PyTorchLightning/lightning-flash/pull/1011))


- Changed `from_files` and `from_folders` from `ObjectDetectionData`, `InstanceSegmentationData`, `KeypointDetectionData` to support only the `predicting` stage ([#1018](https://github.com/PyTorchLightning/lightning-flash/pull/1018))

### Deprecated

- Deprecated `flash.core.data.process.Serializer` in favour of `flash.core.data.io.output.Output` ([#927](https://github.com/PyTorchLightning/lightning-flash/pull/927))
Expand Down
2 changes: 1 addition & 1 deletion flash/core/integrations/icevision/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def load_data(
def predict_load_data(
self, paths: Union[str, List[str]], ann_file: Optional[str] = None, parser: Optional[Type["Parser"]] = None
) -> List[Dict[str, Any]]:
if parser is not None:
if parser is not None and parser != Parser:
return self.load_data(paths, ann_file, parser)
paths = list_valid_files(paths, valid_extensions=IMG_EXTENSIONS + NP_EXTENSIONS)
return [{DataKeys.INPUT: path} for path in paths]
Expand Down
4 changes: 3 additions & 1 deletion flash/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,9 @@ def predict(
# <hack> Temporary fix to support new `Input` object
input = data_pipeline._input_transform_pipeline.input_of_name(input or "default")

if inspect.isclass(input) and issubclass(input, NewInputBase):
if (inspect.isclass(input) and issubclass(input, NewInputBase)) or (
isinstance(input, functools.partial) and issubclass(input.func, NewInputBase)
):
dataset = input(running_stage, x, data_pipeline_state=self._data_pipeline_state)
else:
dataset = input.generate_dataset(x, running_stage)
Expand Down
3 changes: 2 additions & 1 deletion flash/core/utilities/flash_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ def add_arguments_to_parser(self, parser) -> None:
)
or (not hasattr(DataModule, function) and not self.legacy)
):
self.add_subcommand_from_function(subcommands, getattr(self.local_datamodule_class, function))
if getattr(self.local_datamodule_class, function) is not None:
self.add_subcommand_from_function(subcommands, getattr(self.local_datamodule_class, function))

for datamodule_builder in self.additional_datamodule_builders:
self.add_subcommand_from_function(subcommands, datamodule_builder)
Expand Down
20 changes: 16 additions & 4 deletions flash/graph/classification/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from torch.utils.data import Dataset

from flash.core.data.data_module import DataModule
from flash.core.data.data_pipeline import DataPipelineState
from flash.core.data.io.input import InputFormat
from flash.core.data.io.input_transform import InputTransform
from flash.core.utilities.imports import _GRAPH_AVAILABLE
Expand Down Expand Up @@ -74,11 +75,14 @@ def from_datasets(
predict_transform: Optional[Dict[str, Callable]] = None,
**data_module_kwargs,
) -> "GraphClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
GraphDatasetInput(RunningStage.TRAINING, train_dataset),
GraphDatasetInput(RunningStage.VALIDATING, val_dataset),
GraphDatasetInput(RunningStage.TESTING, test_dataset),
GraphDatasetInput(RunningStage.PREDICTING, predict_dataset),
GraphDatasetInput(RunningStage.TRAINING, train_dataset, **dataset_kwargs),
GraphDatasetInput(RunningStage.VALIDATING, val_dataset, **dataset_kwargs),
GraphDatasetInput(RunningStage.TESTING, test_dataset, **dataset_kwargs),
GraphDatasetInput(RunningStage.PREDICTING, predict_dataset, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand All @@ -94,3 +98,11 @@ def num_features(self):
n_cls_val = getattr(self.val_dataset, "num_features", None)
n_cls_test = getattr(self.test_dataset, "num_features", None)
return n_cls_train or n_cls_val or n_cls_test

from_folders = None
from_files = None
from_tensors = None
from_numpy = None
from_json = None
from_csv = None
from_fiftyone = None
107 changes: 59 additions & 48 deletions flash/image/classification/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,14 @@ def from_files(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationFilesInput(RunningStage.TRAINING, train_files, train_targets),
ImageClassificationFilesInput(RunningStage.VALIDATING, val_files, val_targets),
ImageClassificationFilesInput(RunningStage.TESTING, test_files, test_targets),
ImageClassificationFilesInput(RunningStage.PREDICTING, predict_files),
ImageClassificationFilesInput(RunningStage.TRAINING, train_files, train_targets, **dataset_kwargs),
ImageClassificationFilesInput(RunningStage.VALIDATING, val_files, val_targets, **dataset_kwargs),
ImageClassificationFilesInput(RunningStage.TESTING, test_files, test_targets, **dataset_kwargs),
ImageClassificationFilesInput(RunningStage.PREDICTING, predict_files, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand All @@ -259,11 +262,14 @@ def from_folders(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationFolderInput(RunningStage.TRAINING, train_folder),
ImageClassificationFolderInput(RunningStage.VALIDATING, val_folder),
ImageClassificationFolderInput(RunningStage.TESTING, test_folder),
ImageClassificationFolderInput(RunningStage.PREDICTING, predict_folder),
ImageClassificationFolderInput(RunningStage.TRAINING, train_folder, **dataset_kwargs),
ImageClassificationFolderInput(RunningStage.VALIDATING, val_folder, **dataset_kwargs),
ImageClassificationFolderInput(RunningStage.TESTING, test_folder, **dataset_kwargs),
ImageClassificationFolderInput(RunningStage.PREDICTING, predict_folder, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -291,11 +297,14 @@ def from_numpy(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationNumpyInput(RunningStage.TRAINING, train_data, train_targets),
ImageClassificationNumpyInput(RunningStage.VALIDATING, val_data, val_targets),
ImageClassificationNumpyInput(RunningStage.TESTING, test_data, test_targets),
ImageClassificationNumpyInput(RunningStage.PREDICTING, predict_data),
ImageClassificationNumpyInput(RunningStage.TRAINING, train_data, train_targets, **dataset_kwargs),
ImageClassificationNumpyInput(RunningStage.VALIDATING, val_data, val_targets, **dataset_kwargs),
ImageClassificationNumpyInput(RunningStage.TESTING, test_data, test_targets, **dataset_kwargs),
ImageClassificationNumpyInput(RunningStage.PREDICTING, predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -323,11 +332,14 @@ def from_tensors(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationTensorInput(RunningStage.TRAINING, train_data, train_targets),
ImageClassificationTensorInput(RunningStage.VALIDATING, val_data, val_targets),
ImageClassificationTensorInput(RunningStage.TESTING, test_data, test_targets),
ImageClassificationTensorInput(RunningStage.PREDICTING, predict_data),
ImageClassificationTensorInput(RunningStage.TRAINING, train_data, train_targets, **dataset_kwargs),
ImageClassificationTensorInput(RunningStage.VALIDATING, val_data, val_targets, **dataset_kwargs),
ImageClassificationTensorInput(RunningStage.TESTING, test_data, test_targets, **dataset_kwargs),
ImageClassificationTensorInput(RunningStage.PREDICTING, predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -362,23 +374,19 @@ def from_data_frame(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

train_data = (train_data_frame, input_field, target_fields, train_images_root, train_resolver)
val_data = (val_data_frame, input_field, target_fields, val_images_root, val_resolver)
test_data = (test_data_frame, input_field, target_fields, test_images_root, test_resolver)
predict_data = (predict_data_frame, input_field, predict_images_root, predict_resolver)

return cls(
ImageClassificationDataFrameInput(
RunningStage.TRAINING, train_data_frame, input_field, target_fields, train_images_root, train_resolver
),
ImageClassificationCSVInput(
RunningStage.VALIDATING, val_data_frame, input_field, target_fields, val_images_root, val_resolver
),
ImageClassificationCSVInput(
RunningStage.TESTING, test_data_frame, input_field, target_fields, test_images_root, test_resolver
),
ImageClassificationCSVInput(
RunningStage.PREDICTING,
predict_data_frame,
input_field,
root=predict_images_root,
resolver=predict_resolver,
),
ImageClassificationCSVInput(RunningStage.TRAINING, *train_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.VALIDATING, *val_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.TESTING, *test_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.PREDICTING, *predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -413,19 +421,19 @@ def from_csv(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

train_data = (train_file, input_field, target_fields, train_images_root, train_resolver)
val_data = (val_file, input_field, target_fields, val_images_root, val_resolver)
test_data = (test_file, input_field, target_fields, test_images_root, test_resolver)
predict_data = (predict_file, input_field, predict_images_root, predict_resolver)

return cls(
ImageClassificationCSVInput(
RunningStage.TRAINING, train_file, input_field, target_fields, train_images_root, train_resolver
),
ImageClassificationCSVInput(
RunningStage.VALIDATING, val_file, input_field, target_fields, val_images_root, val_resolver
),
ImageClassificationCSVInput(
RunningStage.TESTING, test_file, input_field, target_fields, test_images_root, test_resolver
),
ImageClassificationCSVInput(
RunningStage.PREDICTING, predict_file, input_field, root=predict_images_root, resolver=predict_resolver
),
ImageClassificationCSVInput(RunningStage.TRAINING, *train_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.VALIDATING, *val_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.TESTING, *test_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.PREDICTING, *predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand All @@ -452,11 +460,14 @@ def from_fiftyone(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationFiftyOneInput(RunningStage.TRAINING, train_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.VALIDATING, val_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.TESTING, test_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.PREDICTING, predict_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.TRAINING, train_dataset, label_field, **dataset_kwargs),
ImageClassificationFiftyOneInput(RunningStage.VALIDATING, val_dataset, label_field, **dataset_kwargs),
ImageClassificationFiftyOneInput(RunningStage.TESTING, test_dataset, label_field, **dataset_kwargs),
ImageClassificationFiftyOneInput(RunningStage.PREDICTING, predict_dataset, label_field, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down
Loading

0 comments on commit 9fac5e2

Please sign in to comment.