Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replaced to_tensor() with pil_to_tensor() + convert_image_dtype() #298

Merged
merged 3 commits into from
Feb 1, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Replaced to_tensor() with pil_to_tensor() + convert_image_dtype()
zhiqwang committed Feb 1, 2022
commit 5e68a4a1d0efd81ea04ba39a5cf70d176c3813a4
2 changes: 1 addition & 1 deletion yolort/data/coco_eval.py
Original file line number Diff line number Diff line change
@@ -120,7 +120,7 @@ def compute(self):
def derive_coco_results(self, class_names: Optional[List[str]] = None):
"""
Derive the desired score numbers from summarized COCOeval. Modified from
https://github.com/facebookresearch/detectron2/blob/7205996/detectron2/evaluation/coco_evaluation.py#L291
https://github.com/facebookresearch/detectron2/blob/main/detectron2/evaluation/coco_evaluation.py

Args:
coco_eval (None or COCOEval): None represents no predictions from model.
77 changes: 45 additions & 32 deletions yolort/data/transforms.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
"""
Transforms for Data Augmentation
Mostly copy-paste from https://github.com/pytorch/vision/blob/0013d93/references/detection/transforms.py
"""
from typing import List, Tuple, Dict, Optional
from typing import List, Dict, Tuple, Optional

import torch
import torchvision
from torch import nn, Tensor
from torchvision.ops import boxes as box_ops
from torchvision.transforms import functional as F
from torchvision.transforms import transforms as T

@@ -19,28 +15,17 @@
)


def collate_fn(batch):
return tuple(zip(*batch))
def _flip_coco_person_keypoints(kps, width):
flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
flipped_data = kps[:, flip_inds]
flipped_data[..., 0] = width - flipped_data[..., 0]
# Maintain COCO convention that if visibility == 0, then x, y = 0
inds = flipped_data[..., 2] == 0
flipped_data[inds] = 0
return flipped_data


def default_train_transforms(hflip_prob=0.5):

return Compose(
[
RandomPhotometricDistort(),
RandomZoomOut(),
RandomIoUCrop(),
RandomHorizontalFlip(p=hflip_prob),
ToTensor(),
]
)


def default_val_transforms():
return ToTensor()


class Compose(object):
class Compose:
def __init__(self, transforms):
self.transforms = transforms

@@ -52,9 +37,7 @@ def __call__(self, image, target):

class RandomHorizontalFlip(T.RandomHorizontalFlip):
def forward(
self,
image: Tensor,
target: Optional[Dict[str, Tensor]] = None,
self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
if torch.rand(1) < self.p:
image = F.hflip(image)
@@ -63,6 +46,10 @@ def forward(
target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]]
if "masks" in target:
target["masks"] = target["masks"].flip(-1)
if "keypoints" in target:
keypoints = target["keypoints"]
keypoints = _flip_coco_person_keypoints(keypoints, width)
target["keypoints"] = keypoints
return image, target


@@ -72,7 +59,32 @@ def forward(
image: Tensor,
target: Optional[Dict[str, Tensor]] = None,
) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
image = F.to_tensor(image)
image = F.pil_to_tensor(image)
image = F.convert_image_dtype(image)
return image, target


class PILToTensor(nn.Module):
def forward(
self,
image: Tensor,
target: Optional[Dict[str, Tensor]] = None,
) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
image = F.pil_to_tensor(image)
return image, target


class ConvertImageDtype(nn.Module):
def __init__(self, dtype: torch.dtype) -> None:
super().__init__()
self.dtype = dtype

def forward(
self,
image: Tensor,
target: Optional[Dict[str, Tensor]] = None,
) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
image = F.convert_image_dtype(image, self.dtype)
return image, target


@@ -148,7 +160,7 @@ def forward(

# check at least 1 box with jaccard limitations
boxes = target["boxes"][is_within_crop_area]
ious = box_ops.box_iou(
ious = torchvision.ops.boxes.box_iou(
boxes,
torch.tensor(
[[left, top, right, bottom]],
@@ -291,7 +303,8 @@ def forward(

is_pil = F._is_pil_image(image)
if is_pil:
image = F.to_tensor(image)
image = F.pil_to_tensor(image)
image = F.convert_image_dtype(image)
image = image[..., permutation, :, :]
if is_pil:
image = F.to_pil_image(image)