Lightning-Universe · Borda · Feb 10, 2021 · Feb 2, 2021 · Feb 3, 2021 · Feb 3, 2021
@@ -47,7 +47,7 @@ def __init__(self, attr_names: Union[str, List[str]] = "backbone", train_bn: boo
         Args:
             attr_names: Name(s) of the module attributes of the model to be frozen.
 
-            train_bn: Wether to train Batch Norm layer
+            train_bn: Whether to train Batch Norm layer
 
         """
 

@@ -0,0 +1,192 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from typing import Any, Callable, Optional, Tuple
+
+import torch
+from PIL import Image
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from torchvision import transforms as T
+
+from flash.core.data import TaskDataPipeline
+from flash.core.data.datamodule import DataModule
+from flash.core.data.utils import _contains_any_tensor
+from flash.vision.classification.data import _pil_loader
+
+try:
+    from pycocotools.coco import COCO
+except ImportError:
+    COCO = None
+
+
+class CustomCOCODataset(torch.utils.data.Dataset):
+
+    def __init__(
+        self,
+        root: str,
+        ann_file: str,
+        transforms: Optional[Callable] = None,
+    ):
+        if COCO is None:
+            raise ImportError("Kindly install the COCO API `pycocotools` to use the Dataset")
+
+        self.root = root
+        self.transforms = transforms
+        self.coco = COCO(ann_file)
+        self.ids = list(sorted(self.coco.imgs.keys()))
+
+    @property
+    def num_classes(self):
+        categories = self.coco.loadCats(self.coco.getCatIds())
+        if not categories:
+            raise ValueError("No Categories found")
+        return categories[-1]["id"] + 1
+
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        coco = self.coco
+        img_idx = self.ids[index]
+
+        ann_ids = coco.getAnnIds(imgIds=img_idx)
+        annotations = coco.loadAnns(ann_ids)
+
+        image_path = coco.loadImgs(img_idx)[0]["file_name"]
+        img = Image.open(os.path.join(self.root, image_path))
+
+        boxes = []
+        labels = []
+        areas = []
+        iscrowd = []
+
+        for obj in annotations:
+            xmin = obj["bbox"][0]
+            ymin = obj["bbox"][1]
+            xmax = xmin + obj["bbox"][2]
+            ymax = ymin + obj["bbox"][3]
+
+            boxes.append([xmin, ymin, xmax, ymax])
+            labels.append(obj["category_id"] - 1)
+            areas.append(obj["area"])
+            iscrowd.append(obj["iscrowd"])
+
+        target = {}
+        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
+        target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
+        target["image_id"] = torch.tensor([img_idx])
+        target["area"] = torch.as_tensor(areas, dtype=torch.float32)
+        target["iscrowd"] = torch.as_tensor(iscrowd, dtype=torch.int64)
+
+        if self.transforms is not None:
+            img = self.transforms(img)
+
+        return img, target
+
+    def __len__(self):
+        return len(self.ids)
+
+
+_default_transform = T.Compose([T.ToTensor()])
+
+
+def collate_fn(batch):
+    return tuple(zip(*batch))
+
+
+def _coco_remove_images_without_annotations(dataset):
+    # Ref: https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py
+
+    def _has_only_empty_bbox(anno):
+        return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
+
+    def _has_valid_annotation(anno):
+        # if it's empty, there is no annotation
+        if len(anno) == 0:
+            return False
+        # if all boxes have close to zero area, there is no annotation
+        if _has_only_empty_bbox(anno):
+            return False
+        return True
+
+    ids = []
+    for ds_idx, img_id in enumerate(dataset.ids):
+        ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
+        anno = dataset.coco.loadAnns(ann_ids)
+        if _has_valid_annotation(anno):
+            ids.append(ds_idx)
+
+    dataset = torch.utils.data.Subset(dataset, ids)
+    return dataset
+
+
+class ImageDetectorDataPipeline(TaskDataPipeline):
+
+    def __init__(self, valid_transform: Optional[Callable] = _default_transform, loader: Callable = _pil_loader):
+        self._valid_transform = valid_transform
+        self._loader = loader
+
+    def before_collate(self, samples: Any) -> Any:
+        if _contains_any_tensor(samples):
+            return samples
+
+        if isinstance(samples, str):
+            samples = [samples]
+
+        if isinstance(samples, (list, tuple)) and all(isinstance(p, str) for p in samples):
+            outputs = []
+            for sample in samples:
+                output = self._loader(sample)
+                outputs.append(self._valid_transform(output))
+            return outputs
+        raise MisconfigurationException("The samples should either be a tensor, a list of paths or a path.")
+
+
+class ImageDetectionData(DataModule):
+
+    @classmethod
+    def from_coco(
+        cls,
+        train_folder: Optional[str] = None,
+        train_ann_file: Optional[str] = None,
+        train_transform: Optional[Callable] = _default_transform,
+        valid_folder: Optional[str] = None,
+        valid_ann_file: Optional[str] = None,
+        valid_transform: Optional[Callable] = _default_transform,
+        test_folder: Optional[str] = None,
+        test_ann_file: Optional[str] = None,
+        test_transform: Optional[Callable] = _default_transform,
+        batch_size: int = 4,
+        num_workers: Optional[int] = None,
+        **kwargs
+    ):
+        train_ds = CustomCOCODataset(train_folder, train_ann_file, train_transform)
+        num_classes = train_ds.num_classes
+        train_ds = _coco_remove_images_without_annotations(train_ds)
+
+        valid_ds = (
+            CustomCOCODataset(valid_folder, valid_ann_file, valid_transform) if valid_folder is not None else None
+        )
+
+        test_ds = (CustomCOCODataset(test_folder, test_ann_file, test_transform) if test_folder is not None else None)
+
+        datamodule = cls(
+            train_ds=train_ds,
+            valid_ds=valid_ds,
+            test_ds=test_ds,
+            batch_size=batch_size,
+            num_workers=num_workers,
+        )
+
+        datamodule.num_classes = num_classes
+        datamodule.data_pipeline = ImageDetectorDataPipeline()
+        datamodule.data_pipeline.collate_fn = collate_fn
+        return datamodule
diff --git a/flash/vision/detection/dataset.py b/flash/vision/detection/dataset.py
@@ -0,0 +1,20 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from flash.core.data import download_data
+
+
+def coco128_data_download(path: str):
+    # Dataset Credit: https://www.kaggle.com/ultralytics/coco128
+    URL = "https://github.com/zhiqwang/yolov5-rt-stack/releases/download/v0.3.0/coco128.zip"
+    download_data(URL, path)
@@ -0,0 +1,29 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytorch_lightning as pl
+
+from flash.core.finetuning import FlashBaseFinetuning
+
+
+class ImageDetectorFineTuning(FlashBaseFinetuning):
+    """
+    Freezes the backbone during Detector training.
+    """
+
+    def __init__(self, train_bn: bool = True):
+        self.train_bn = train_bn
+
+    def freeze_before_training(self, pl_module: pl.LightningModule) -> None:
+        model = pl_module.model
+        self.freeze(module=model.backbone, train_bn=self.train_bn)
@@ -11,21 +11,38 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Callable, Mapping, Sequence, Type, Union
+from typing import Any, Callable, Mapping, Optional, Sequence, Type, Union
 
 import torch
 import torchvision
 from torch import nn
 from torch.optim import Optimizer
+from torchvision.ops import box_iou
 
-from flash.core.classification import ClassificationTask
+from flash.core import Task
+from flash.core.data import DataPipeline
+from flash.core.model import predict_context
+from flash.vision.detection.data import _default_transform, ImageDetectorDataPipeline
+from flash.vision.detection.finetuning import ImageDetectorFineTuning
 
 _models = {"fasterrcnn_resnet50_fpn": torchvision.models.detection.fasterrcnn_resnet50_fpn}
 
 
-class ImageDetector(ClassificationTask):
+def _evaluate_iou(target, pred):
+    """
+    Evaluate intersection over union (IOU) for target from dataset and output prediction
+    from model
+    """
+    if pred["boxes"].shape[0] == 0:
+        # no box detected, 0 IOU
+        return torch.tensor(0.0, device=pred["boxes"].device)
+    return box_iou(target["boxes"], pred["boxes"]).diag().mean()
+
+
+class ImageDetector(Task):
     """Image detection task
 
+    Ref: Lightning Bolts https://github.com/PyTorchLightning/pytorch-lightning-bolts
     Args:
         num_classes: the number of classes for detection, including background
         model: either a string of :attr`_models` or a custom nn.Module.
@@ -59,10 +76,6 @@ def __init__(
                 head = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
                 model.roi_heads.box_predictor = head
 
-        if loss is None:
-            # TODO: maybe better way of handling no loss,
-            loss = {}
-
         super().__init__(
             model=model,
             loss_fn=loss,
@@ -81,7 +94,51 @@ def training_step(self, batch, batch_idx) -> Any:
         # fasterrcnn takes both images and targets for training, returns loss_dict
         loss_dict = self.model(images, targets)
         loss = sum(loss_dict.values())
-        for k, v in loss_dict.items():
-            self.log("train_k", v)
-
+        self.log_dict({f"train_{k}": v for k, v in loss_dict.items()}, on_step=True, on_epoch=True, prog_bar=True)
         return loss
+
+    def validation_step(self, batch, batch_idx):
+        images, targets = batch
+        # fasterrcnn takes only images for eval() mode
+        outs = self.model(images)
+        iou = torch.stack([_evaluate_iou(t, o) for t, o in zip(targets, outs)]).mean()
+        return {"val_iou": iou}
+
+    def validation_epoch_end(self, outs):
+        avg_iou = torch.stack([o["val_iou"] for o in outs]).mean()
+        logs = {"val_iou": avg_iou}
+        return {"avg_val_iou": avg_iou, "log": logs}
+
+    def test_step(self, batch, batch_idx):
+        images, targets = batch
+        # fasterrcnn takes only images for eval() mode
+        outs = self.model(images)
+        iou = torch.stack([_evaluate_iou(t, o) for t, o in zip(targets, outs)]).mean()
+        return {"test_iou": iou}
+
+    def test_epoch_end(self, outs):
+        avg_iou = torch.stack([o["test_iou"] for o in outs]).mean()
+        logs = {"test_iou": avg_iou}
+        return {"avg_test_iou": avg_iou, "log": logs}
+
+    @predict_context
+    def predict(
+        self,
+        x: Any,
+        batch_idx: Optional[int] = None,
+        skip_collate_fn: bool = False,
+        dataloader_idx: Optional[int] = None,
+        data_pipeline: Optional[DataPipeline] = None,
+    ) -> Any:
+
+        data_pipeline = data_pipeline or self.default_pipeline()
+        batch = x if skip_collate_fn else data_pipeline.collate_fn(x)
+        predictions = self.forward(batch)
+        return data_pipeline.uncollate_fn(predictions)
+
+    @staticmethod
+    def default_pipeline() -> ImageDetectorDataPipeline:
+        return ImageDetectorDataPipeline()
+
+    def configure_finetune_callback(self):
+        return [ImageDetectorFineTuning(train_bn=True)]
@@ -0,0 +1,26 @@
+import flash
+from flash.vision.detection.data import ImageDetectionData
+from flash.vision.detection.dataset import coco128_data_download
+from flash.vision.detection.model import ImageDetector
+
+# 1. Download the data
+coco128_data_download("data/")
+
+# 2. Load the Data
+datamodule = ImageDetectionData.from_coco(
+    train_folder="data/coco128/images/train2017/",
+    train_ann_file="data/coco128/annotations/instances_train2017.json",
+    batch_size=2
+)
+
+# 3. Build the model
+model = ImageDetector(num_classes=datamodule.num_classes)
+
+# 4. Create the trainer. Run twice on data
+trainer = flash.Trainer(max_epochs=2)
+
+# 5. Finetune the model
+trainer.finetune(model, datamodule)
+
+# 6. Save it!
+trainer.save_checkpoint("image_detection_model.pt")
@@ -12,3 +12,5 @@ tqdm  # comes with 3rd-party dependency
 rouge-score>=0.0.4
 sentencepiece>=0.1.95
 pytorch-lightning-bolts==0.3.0
+filelock  # comes with 3rd-party dependency
+pycocotools>=2.0.2 ; python_version >= "3.7"
-Original file line number
+Diff line change
@@ Expand Up @@
             Args:
                 attr_names: Name(s) of the module attributes of the model to be frozen.
-                train_bn: Wether to train Batch Norm layer
+                train_bn: Whether to train Batch Norm layer
             """
@@ Expand Down @@