diff --git a/mmpose/datasets/transforms/__init__.py b/mmpose/datasets/transforms/__init__.py index 357022c35f..fb9a5fc0bb 100644 --- a/mmpose/datasets/transforms/__init__.py +++ b/mmpose/datasets/transforms/__init__.py @@ -1,12 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. from .bottomup_transforms import (BottomupGetHeatmapMask, BottomupRandomAffine, - BottomupResize) + BottomupRandomChoiceResize, + BottomupRandomCrop, BottomupResize) from .common_transforms import (Albumentation, FilterAnnotations, GenerateTarget, GetBBoxCenterScale, PhotometricDistortion, RandomBBoxTransform, - RandomFlip, RandomHalfBody, YOLOXHSVRandomAug, - BottomupRandomChoiceResize, - BottomupRandomCrop, BottomupResize) + RandomFlip, RandomHalfBody, YOLOXHSVRandomAug) from .converting import KeypointConverter from .formatting import PackPoseInputs from .loading import LoadImage diff --git a/mmpose/models/data_preprocessors/__init__.py b/mmpose/models/data_preprocessors/__init__.py index 530b3d1699..dc6f122320 100644 --- a/mmpose/models/data_preprocessors/__init__.py +++ b/mmpose/models/data_preprocessors/__init__.py @@ -2,4 +2,7 @@ from .batch_augmentation import BatchSyncRandomResize from .data_preprocessor import BatchShapeDataPreprocessor, PoseDataPreprocessor -__all__ = ['PoseDataPreprocessor', 'BatchSyncRandomResize', 'BatchShapeDataPreprocessor'] +__all__ = [ + 'PoseDataPreprocessor', 'BatchSyncRandomResize', + 'BatchShapeDataPreprocessor' +] diff --git a/mmpose/models/data_preprocessors/data_preprocessor.py b/mmpose/models/data_preprocessors/data_preprocessor.py index a37231f439..84f3da113a 100644 --- a/mmpose/models/data_preprocessors/data_preprocessor.py +++ b/mmpose/models/data_preprocessors/data_preprocessor.py @@ -1,26 +1,17 @@ # Copyright (c) OpenMMLab. All rights reserved. -<<<<<<< HEAD -from typing import List, Optional, Sequence, Union - -import numpy as np -import torch -import torch.nn as nn -from mmengine.model import ImgDataPreprocessor -from mmengine.utils import is_seq_of -======= import math from numbers import Number -from typing import Optional, Sequence, Union +from typing import List, Optional, Sequence, Union import numpy as np import torch +import torch.nn as nn import torch.nn.functional as F -import torchvision.transforms.functional +import torchvision from mmengine.model import ImgDataPreprocessor from mmengine.model.utils import stack_batch from mmengine.utils import is_seq_of from PIL import Image ->>>>>>> support edpose from mmpose.registry import MODELS @@ -29,11 +20,90 @@ class PoseDataPreprocessor(ImgDataPreprocessor): """Image pre-processor for pose estimation tasks.""" -<<<<<<< HEAD def __init__(self, mean: Sequence[float] = None, std: Sequence[float] = None, -======= + pad_size_divisor: int = 1, + pad_value: Union[float, int] = 0, + bgr_to_rgb: bool = False, + rgb_to_bgr: bool = False, + non_blocking: Optional[bool] = False, + batch_augments: Optional[List[dict]] = None): + super().__init__( + mean=mean, + std=std, + pad_size_divisor=pad_size_divisor, + pad_value=pad_value, + bgr_to_rgb=bgr_to_rgb, + rgb_to_bgr=rgb_to_bgr, + non_blocking=non_blocking) + if batch_augments is not None: + self.batch_augments = nn.ModuleList( + [MODELS.build(aug) for aug in batch_augments]) + else: + self.batch_augments = None + + def forward(self, data: dict, training: bool = False) -> dict: + """Perform normalization, padding and bgr2rgb conversion based on + ``BaseDataPreprocessor``. + + Args: + data (dict): Data sampled from dataloader. + training (bool): Whether to enable training time augmentation. + + Returns: + dict: Data in the same format as the model input. + """ + batch_pad_shape = self._get_pad_shape(data) + data = super().forward(data=data, training=training) + inputs, data_samples = data['inputs'], data['data_samples'] + batch_input_shape = tuple(inputs[0].size()[-2:]) + for data_sample, pad_shape in zip(data_samples, batch_pad_shape): + data_sample.set_metainfo({ + 'batch_input_shape': batch_input_shape, + 'pad_shape': pad_shape + }) + + if training and self.batch_augments is not None: + for batch_aug in self.batch_augments: + inputs, data_samples = batch_aug(inputs, data_samples) + + return {'inputs': inputs, 'data_samples': data_samples} + + def _get_pad_shape(self, data: dict) -> List[tuple]: + """Get the pad_shape of each image based on data and + pad_size_divisor.""" + _batch_inputs = data['inputs'] + # Process data with `pseudo_collate`. + if is_seq_of(_batch_inputs, torch.Tensor): + batch_pad_shape = [] + for ori_input in _batch_inputs: + pad_h = int( + np.ceil(ori_input.shape[1] / + self.pad_size_divisor)) * self.pad_size_divisor + pad_w = int( + np.ceil(ori_input.shape[2] / + self.pad_size_divisor)) * self.pad_size_divisor + batch_pad_shape.append((pad_h, pad_w)) + # Process data with `default_collate`. + elif isinstance(_batch_inputs, torch.Tensor): + assert _batch_inputs.dim() == 4, ( + 'The input of `ImgDataPreprocessor` should be a NCHW tensor ' + 'or a list of tensor, but got a tensor with shape: ' + f'{_batch_inputs.shape}') + pad_h = int( + np.ceil(_batch_inputs.shape[1] / + self.pad_size_divisor)) * self.pad_size_divisor + pad_w = int( + np.ceil(_batch_inputs.shape[2] / + self.pad_size_divisor)) * self.pad_size_divisor + batch_pad_shape = [(pad_h, pad_w)] * _batch_inputs.shape[0] + else: + raise TypeError('Output of `cast_data` should be a dict ' + 'or a tuple with inputs and data_samples, but got' + f'{type(data)}: {data}') + return batch_pad_shape + @MODELS.register_module() class BatchShapeDataPreprocessor(ImgDataPreprocessor): @@ -78,17 +148,12 @@ class BatchShapeDataPreprocessor(ImgDataPreprocessor): def __init__(self, mean: Sequence[Number] = None, std: Sequence[Number] = None, ->>>>>>> support edpose pad_size_divisor: int = 1, pad_value: Union[float, int] = 0, bgr_to_rgb: bool = False, rgb_to_bgr: bool = False, non_blocking: Optional[bool] = False, -<<<<<<< HEAD - batch_augments: Optional[List[dict]] = None): -======= normalize_bakend: str = 'cv2'): ->>>>>>> support edpose super().__init__( mean=mean, std=std, @@ -97,21 +162,10 @@ def __init__(self, bgr_to_rgb=bgr_to_rgb, rgb_to_bgr=rgb_to_bgr, non_blocking=non_blocking) -<<<<<<< HEAD - if batch_augments is not None: - self.batch_augments = nn.ModuleList( - [MODELS.build(aug) for aug in batch_augments]) - else: - self.batch_augments = None - - def forward(self, data: dict, training: bool = False) -> dict: - """Perform normalization, padding and bgr2rgb conversion based on -======= self.normalize_bakend = normalize_bakend def forward(self, data: dict, training: bool = False) -> dict: """Perform normalization、padding and bgr2rgb conversion based on ->>>>>>> support edpose ``BaseDataPreprocessor``. Args: @@ -121,39 +175,6 @@ def forward(self, data: dict, training: bool = False) -> dict: Returns: dict: Data in the same format as the model input. """ -<<<<<<< HEAD - batch_pad_shape = self._get_pad_shape(data) - data = super().forward(data=data, training=training) - inputs, data_samples = data['inputs'], data['data_samples'] - batch_input_shape = tuple(inputs[0].size()[-2:]) - for data_sample, pad_shape in zip(data_samples, batch_pad_shape): - data_sample.set_metainfo({ - 'batch_input_shape': batch_input_shape, - 'pad_shape': pad_shape - }) - - if training and self.batch_augments is not None: - for batch_aug in self.batch_augments: - inputs, data_samples = batch_aug(inputs, data_samples) - - return {'inputs': inputs, 'data_samples': data_samples} - - def _get_pad_shape(self, data: dict) -> List[tuple]: - """Get the pad_shape of each image based on data and - pad_size_divisor.""" - _batch_inputs = data['inputs'] - # Process data with `pseudo_collate`. - if is_seq_of(_batch_inputs, torch.Tensor): - batch_pad_shape = [] - for ori_input in _batch_inputs: - pad_h = int( - np.ceil(ori_input.shape[1] / - self.pad_size_divisor)) * self.pad_size_divisor - pad_w = int( - np.ceil(ori_input.shape[2] / - self.pad_size_divisor)) * self.pad_size_divisor - batch_pad_shape.append((pad_h, pad_w)) -======= if self.normalize_bakend == 'cv2': data = super().forward(data=data, training=training) else: @@ -215,27 +236,12 @@ def normalize_pillow(self, # Pad and stack Tensor. batch_inputs = stack_batch(batch_inputs, self.pad_size_divisor, self.pad_value) ->>>>>>> support edpose # Process data with `default_collate`. elif isinstance(_batch_inputs, torch.Tensor): assert _batch_inputs.dim() == 4, ( 'The input of `ImgDataPreprocessor` should be a NCHW tensor ' 'or a list of tensor, but got a tensor with shape: ' f'{_batch_inputs.shape}') -<<<<<<< HEAD - pad_h = int( - np.ceil(_batch_inputs.shape[1] / - self.pad_size_divisor)) * self.pad_size_divisor - pad_w = int( - np.ceil(_batch_inputs.shape[2] / - self.pad_size_divisor)) * self.pad_size_divisor - batch_pad_shape = [(pad_h, pad_w)] * _batch_inputs.shape[0] - else: - raise TypeError('Output of `cast_data` should be a dict ' - 'or a tuple with inputs and data_samples, but got' - f'{type(data)}: {data}') - return batch_pad_shape -======= if self._channel_conversion: _batch_inputs = _batch_inputs[:, [2, 1, 0], ...] # Convert to float after channel conversion to ensure @@ -266,8 +272,7 @@ def normalize_pillow(self, else: raise TypeError('Output of `cast_data` should be a dict of ' 'list/tuple with inputs and data_samples, ' - f'but got {type(data)}: {data}') + f'but got {type(data)}: {data}') data['inputs'] = batch_inputs data.setdefault('data_samples', None) return data ->>>>>>> support edpose