diff --git a/torchvision/transforms/autoaugment.py b/torchvision/transforms/autoaugment.py index ab6d0a69e5c..a179ac8ccb9 100644 --- a/torchvision/transforms/autoaugment.py +++ b/torchvision/transforms/autoaugment.py @@ -127,8 +127,9 @@ def _get_magnitudes(): class AutoAugment(torch.nn.Module): r"""AutoAugment data augmentation method based on `"AutoAugment: Learning Augmentation Strategies from Data" `_. - The image can be a PIL Image or a Tensor, in which case it is expected - to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + If the image is torch Tensor, it should be of type torch.uint8, and it is expected + to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". Args: policy (AutoAugmentPolicy): Desired policy enum defined by @@ -136,9 +137,8 @@ class AutoAugment(torch.nn.Module): interpolation (InterpolationMode): Desired interpolation enum defined by :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. - fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed - image. If int or float, the value is used for all bands respectively. - This option is supported for PIL image and Tensor inputs. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. If input is PIL Image, the options is only available for ``Pillow>=5.0.0``. Example: diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 2fb74269195..c6012fddf36 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -59,7 +59,7 @@ def _interpolation_modes_from_int(i: int) -> InterpolationMode: def _get_image_size(img: Tensor) -> List[int]: - """Returns image sizea as (w, h) + """Returns image size as [w, h] """ if isinstance(img, torch.Tensor): return F_t._get_image_size(img) @@ -68,6 +68,8 @@ def _get_image_size(img: Tensor) -> List[int]: def _get_image_num_channels(img: Tensor) -> int: + """Returns number of image channels + """ if isinstance(img, torch.Tensor): return F_t._get_image_num_channels(img) @@ -86,8 +88,9 @@ def _is_numpy_image(img: Any) -> bool: def to_tensor(pic): """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. + This function does not support torchscript. - See ``ToTensor`` for more details. + See :class:`~torchvision.transforms.ToTensor` for more details. Args: pic (PIL Image or numpy.ndarray): Image to be converted to tensor. @@ -141,6 +144,7 @@ def to_tensor(pic): def pil_to_tensor(pic): """Convert a ``PIL Image`` to a tensor of the same type. + This function does not support torchscript. See :class:`~torchvision.transforms.PILToTensor` for more details. @@ -169,6 +173,7 @@ def pil_to_tensor(pic): def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor: """Convert a tensor image to the given ``dtype`` and scale the values accordingly + This function does not support PIL Image. Args: image (torch.Tensor): Image to be converted @@ -195,7 +200,7 @@ def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) - def to_pil_image(pic, mode=None): - """Convert a tensor or an ndarray to PIL Image. + """Convert a tensor or an ndarray to PIL Image. This function does not support torchscript. See :class:`~torchvision.transforms.ToPILImage` for more details. @@ -291,6 +296,7 @@ def to_pil_image(pic, mode=None): def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool = False) -> Tensor: """Normalize a tensor image with mean and standard deviation. + This transform does not support PIL Image. .. note:: This transform acts out of place by default, i.e., it does not mutates the input tensor. @@ -331,7 +337,7 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool def resize(img: Tensor, size: List[int], interpolation: InterpolationMode = InterpolationMode.BILINEAR) -> Tensor: r"""Resize the input image to the given size. - The image can be a PIL Image or a torch Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions Args: @@ -341,8 +347,7 @@ def resize(img: Tensor, size: List[int], interpolation: InterpolationMode = Inte the smaller edge of the image will be matched to this number maintaining the aspect ratio. i.e, if height > width, then image will be rescaled to :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`. - In torchscript mode size as single int is not supported, use a tuple or - list of length 1: ``[size, ]``. + In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. interpolation (InterpolationMode): Desired interpolation enum defined by :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, @@ -378,22 +383,22 @@ def scale(*args, **kwargs): def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor: r"""Pad the given image on all sides with the given "pad" value. - The image can be a PIL Image or a torch Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions Args: img (PIL Image or Tensor): Image to be padded. - padding (int or tuple or list): Padding on each border. If a single int is provided this - is used to pad all borders. If tuple of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple of length 4 is provided + padding (int or sequence): Padding on each border. If a single int is provided this + is used to pad all borders. If sequence of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a sequence of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. - In torchscript mode padding as single int is not supported, use a tuple or - list of length 1: ``[padding, ]``. - fill (int or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of - length 3, it is used to fill R, G, B channels respectively. - This value is only used when the padding_mode is constant. Only int value is supported for Tensors. + In torchscript mode padding as single int is not supported, use a sequence of length 1: ``[padding, ]``. + fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. + If a tuple of length 3, it is used to fill R, G, B channels respectively. + This value is only used when the padding_mode is constant. + Only number is supported for torch Tensor. + Only int or str or tuple value is supported for PIL Image. padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. - Mode symmetric is not yet supported for Tensor inputs. - constant: pads with a constant value, this value is specified with fill @@ -420,9 +425,8 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: """Crop the given image at specified location and output size. - The image can be a PIL Image or a Tensor, in which case it is expected - to have [..., H, W] shape, where ... means an arbitrary number of leading - dimensions + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions Args: img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. @@ -443,12 +447,12 @@ def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: def center_crop(img: Tensor, output_size: List[int]) -> Tensor: """Crops the given image at the center. - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions Args: img (PIL Image or Tensor): Image to be cropped. - output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int + output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int, it is used for both directions. Returns: @@ -472,7 +476,7 @@ def resized_crop( interpolation: InterpolationMode = InterpolationMode.BILINEAR ) -> Tensor: """Crop the given image and resize it to desired size. - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions Notably used in :class:`~torchvision.transforms.RandomResizedCrop`. @@ -499,12 +503,12 @@ def resized_crop( def hflip(img: Tensor) -> Tensor: - """Horizontally flip the given PIL Image or Tensor. + """Horizontally flip the given image. Args: img (PIL Image or Tensor): Image to be flipped. If img is a Tensor, it is expected to be in [..., H, W] format, - where ... means it can have an arbitrary number of trailing + where ... means it can have an arbitrary number of leading dimensions. Returns: @@ -554,7 +558,7 @@ def perspective( fill: Optional[List[float]] = None ) -> Tensor: """Perform perspective transform of the given image. - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. Args: @@ -567,11 +571,10 @@ def perspective( :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. - fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed - image. If int or float, the value is used for all bands respectively. - This option is supported for PIL image and Tensor inputs. - In torchscript mode single int/float value is not supported, please use a tuple - or list of length 1: ``[value, ]``. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. + In torchscript mode single int/float value is not supported, please use a sequence + of length 1: ``[value, ]``. If input is PIL Image, the options is only available for ``Pillow>=5.0.0``. Returns: @@ -599,16 +602,16 @@ def perspective( def vflip(img: Tensor) -> Tensor: - """Vertically flip the given PIL Image or torch Tensor. + """Vertically flip the given image. Args: img (PIL Image or Tensor): Image to be flipped. If img is a Tensor, it is expected to be in [..., H, W] format, - where ... means it can have an arbitrary number of trailing + where ... means it can have an arbitrary number of leading dimensions. Returns: - PIL Image: Vertically flipped image. + PIL Image or Tensor: Vertically flipped image. """ if not isinstance(img, torch.Tensor): return F_pil.vflip(img) @@ -618,7 +621,7 @@ def vflip(img: Tensor) -> Tensor: def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: """Crop the given image into four corners and the central crop. - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions .. Note:: @@ -629,7 +632,7 @@ def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Ten img (PIL Image or Tensor): Image to be cropped. size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is - made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). Returns: tuple: tuple (tl, tr, bl, br, center) @@ -663,7 +666,7 @@ def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[ """Generate ten cropped images from the given image. Crop the given image into four corners and the central crop plus the flipped version of these (horizontal flipping is used by default). - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions .. Note:: @@ -674,7 +677,7 @@ def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[ img (PIL Image or Tensor): Image to be cropped. size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is - made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). vertical_flip (bool): Use vertical flipping instead of horizontal Returns: @@ -702,10 +705,12 @@ def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[ def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: - """Adjust brightness of an Image. + """Adjust brightness of an image. Args: img (PIL Image or Tensor): Image to be adjusted. + If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. brightness_factor (float): How much to adjust the brightness. Can be any non negative number. 0 gives a black image, 1 gives the original image while 2 increases the brightness by a factor of 2. @@ -720,7 +725,7 @@ def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: - """Adjust contrast of an Image. + """Adjust contrast of an image. Args: img (PIL Image or Tensor): Image to be adjusted. @@ -801,6 +806,8 @@ def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor: Args: img (PIL Image or Tensor): PIL Image to be adjusted. + If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. gamma (float): Non negative real number, same as :math:`\gamma` in the equation. gamma larger than 1 make the shadows darker, while gamma smaller than 1 make dark regions lighter. @@ -870,12 +877,12 @@ def rotate( fill: Optional[List[float]] = None, resample: Optional[int] = None ) -> Tensor: """Rotate the image by angle. - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. Args: img (PIL Image or Tensor): image to be rotated. - angle (float or int): rotation angle value in degrees, counter-clockwise. + angle (number): rotation angle value in degrees, counter-clockwise. interpolation (InterpolationMode): Desired interpolation enum defined by :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. @@ -884,13 +891,12 @@ def rotate( If true, expands the output image to make it large enough to hold the entire rotated image. If false or omitted, make the output image the same size as the input image. Note that the expand flag assumes rotation around the center and no translation. - center (list or tuple, optional): Optional center of rotation. Origin is the upper left corner. + center (sequence, optional): Optional center of rotation. Origin is the upper left corner. Default is the center of the image. - fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed - image. If int or float, the value is used for all bands respectively. - This option is supported for PIL image and Tensor inputs. - In torchscript mode single int/float value is not supported, please use a tuple - or list of length 1: ``[value, ]``. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. + In torchscript mode single int/float value is not supported, please use a sequence + of length 1: ``[value, ]``. If input is PIL Image, the options is only available for ``Pillow>=5.2.0``. Returns: @@ -944,26 +950,25 @@ def affine( resample: Optional[int] = None, fillcolor: Optional[List[float]] = None ) -> Tensor: """Apply affine transformation on the image keeping image center invariant. - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. Args: img (PIL Image or Tensor): image to transform. - angle (float or int): rotation angle in degrees between -180 and 180, clockwise direction. - translate (list or tuple of integers): horizontal and vertical translations (post-rotation translation) + angle (number): rotation angle in degrees between -180 and 180, clockwise direction. + translate (sequence of integers): horizontal and vertical translations (post-rotation translation) scale (float): overall scale - shear (float or tuple or list): shear angle value in degrees between -180 to 180, clockwise direction. - If a tuple of list is specified, the first value corresponds to a shear parallel to the x axis, while + shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction. + If a sequence is specified, the first value corresponds to a shear parallel to the x axis, while the second value corresponds to a shear parallel to the y axis. interpolation (InterpolationMode): Desired interpolation enum defined by :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. - fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed - image. If int or float, the value is used for all bands respectively. - This option is supported for PIL image and Tensor inputs. - In torchscript mode single int/float value is not supported, please use a tuple - or list of length 1: ``[value, ]``. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. + In torchscript mode single int/float value is not supported, please use a sequence + of length 1: ``[value, ]``. If input is PIL Image, the options is only available for ``Pillow>=5.0.0``. fillcolor (sequence, int, float): deprecated argument and will be removed since v0.10.0. Please use `arg`:fill: instead. @@ -1047,10 +1052,11 @@ def affine( @torch.jit.unused def to_grayscale(img, num_output_channels=1): """Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image. + This transform does not support torch Tensor. Args: img (PIL Image): PIL Image to be converted to grayscale. - num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1. + num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default is 1. Returns: PIL Image: Grayscale version of the image. @@ -1066,8 +1072,8 @@ def to_grayscale(img, num_output_channels=1): def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: """Convert RGB image to grayscale version of image. - The image can be a PIL Image or a Tensor, in which case it is expected - to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions + If the image is torch Tensor, it is expected + to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions Note: Please, note that this method supports only RGB images as input. For inputs in other color spaces, @@ -1091,6 +1097,7 @@ def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool = False) -> Tensor: """ Erase the input Tensor Image with given value. + This transform does not support PIL Image. Args: img (Tensor Image): Tensor image of size (C, H, W) to be erased @@ -1115,22 +1122,21 @@ def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Tensor: - """Performs Gaussian blurring on the img by given kernel. - The image can be a PIL Image or a Tensor, in which case it is expected - to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions + """Performs Gaussian blurring on the image by given kernel. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. Args: img (PIL Image or Tensor): Image to be blurred kernel_size (sequence of ints or int): Gaussian kernel size. Can be a sequence of integers like ``(kx, ky)`` or a single integer for square kernels. - In torchscript mode kernel_size as single int is not supported, use a tuple or - list of length 1: ``[ksize, ]``. + In torchscript mode kernel_size as single int is not supported, use a sequence of length 1: ``[ksize, ]``. sigma (sequence of floats or float, optional): Gaussian kernel standard deviation. Can be a sequence of floats like ``(sigma_x, sigma_y)`` or a single float to define the same sigma in both X/Y directions. If None, then it is computed using ``kernel_size`` as ``sigma = 0.3 * ((kernel_size - 1) * 0.5 - 1) + 0.8``. Default, None. In torchscript mode sigma as single float is - not supported, use a tuple or list of length 1: ``[sigma, ]``. + not supported, use a sequence of length 1: ``[sigma, ]``. Returns: PIL Image or Tensor: Gaussian Blurred version of the image. @@ -1175,13 +1181,13 @@ def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[floa def invert(img: Tensor) -> Tensor: - """Invert the colors of an RGB/grayscale PIL Image or torch Tensor. + """Invert the colors of an RGB/grayscale image. Args: img (PIL Image or Tensor): Image to have its colors inverted. - If img is a Tensor, it is expected to be in [..., H, W] format, - where ... means it can have an arbitrary number of trailing - dimensions. + If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". Returns: PIL Image or Tensor: Color inverted image. @@ -1193,13 +1199,14 @@ def invert(img: Tensor) -> Tensor: def posterize(img: Tensor, bits: int) -> Tensor: - """Posterize a PIL Image or torch Tensor by reducing the number of bits for each color channel. + """Posterize an image by reducing the number of bits for each color channel. Args: img (PIL Image or Tensor): Image to have its colors posterized. If img is a Tensor, it should be of type torch.uint8 and - it is expected to be in [..., H, W] format, where ... means - it can have an arbitrary number of trailing dimensions. + it is expected to be in [..., 1 or 3, H, W] format, where ... means + it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". bits (int): The number of bits to keep for each channel (0-8). Returns: PIL Image or Tensor: Posterized image. @@ -1214,13 +1221,13 @@ def posterize(img: Tensor, bits: int) -> Tensor: def solarize(img: Tensor, threshold: float) -> Tensor: - """Solarize a PIL Image or torch Tensor by inverting all pixel values above a threshold. + """Solarize an RGB/grayscale image by inverting all pixel values above a threshold. Args: img (PIL Image or Tensor): Image to have its colors inverted. - If img is a Tensor, it is expected to be in [..., H, W] format, - where ... means it can have an arbitrary number of trailing - dimensions. + If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". threshold (float): All pixels equal or above this value are inverted. Returns: PIL Image or Tensor: Solarized image. @@ -1232,10 +1239,12 @@ def solarize(img: Tensor, threshold: float) -> Tensor: def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor: - """Adjust the sharpness of an Image. + """Adjust the sharpness of an image. Args: img (PIL Image or Tensor): Image to be adjusted. + If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. sharpness_factor (float): How much to adjust the sharpness. Can be any non negative number. 0 gives a blurred image, 1 gives the original image while 2 increases the sharpness by a factor of 2. @@ -1250,15 +1259,15 @@ def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor: def autocontrast(img: Tensor) -> Tensor: - """Maximize contrast of a PIL Image or torch Tensor by remapping its + """Maximize contrast of an image by remapping its pixels per channel so that the lowest becomes black and the lightest becomes white. Args: img (PIL Image or Tensor): Image on which autocontrast is applied. - If img is a Tensor, it is expected to be in [..., H, W] format, - where ... means it can have an arbitrary number of trailing - dimensions. + If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". Returns: PIL Image or Tensor: An image that was autocontrasted. @@ -1270,15 +1279,15 @@ def autocontrast(img: Tensor) -> Tensor: def equalize(img: Tensor) -> Tensor: - """Equalize the histogram of a PIL Image or torch Tensor by applying + """Equalize the histogram of an image by applying a non-linear mapping to the input in order to create a uniform distribution of grayscale values in the output. Args: img (PIL Image or Tensor): Image on which equalize is applied. - If img is a Tensor, it is expected to be in [..., H, W] format, - where ... means it can have an arbitrary number of trailing - dimensions. + If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "P", "L" or "RGB". Returns: PIL Image or Tensor: An image that was equalized. diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py index 26f3b504d99..6999a2acf5f 100644 --- a/torchvision/transforms/functional_pil.py +++ b/torchvision/transforms/functional_pil.py @@ -35,19 +35,6 @@ def _get_image_num_channels(img: Any) -> int: @torch.jit.unused def hflip(img): - """PRIVATE METHOD. Horizontally flip the given PIL Image. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): Image to be flipped. - - Returns: - PIL Image: Horizontally flipped image. - """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) @@ -56,19 +43,6 @@ def hflip(img): @torch.jit.unused def vflip(img): - """PRIVATE METHOD. Vertically flip the given PIL Image. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): Image to be flipped. - - Returns: - PIL Image: Vertically flipped image. - """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) @@ -77,22 +51,6 @@ def vflip(img): @torch.jit.unused def adjust_brightness(img, brightness_factor): - """PRIVATE METHOD. Adjust brightness of an RGB image. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): Image to be adjusted. - brightness_factor (float): How much to adjust the brightness. Can be - any non negative number. 0 gives a black image, 1 gives the - original image while 2 increases the brightness by a factor of 2. - - Returns: - PIL Image: Brightness adjusted image. - """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) @@ -103,21 +61,6 @@ def adjust_brightness(img, brightness_factor): @torch.jit.unused def adjust_contrast(img, contrast_factor): - """PRIVATE METHOD. Adjust contrast of an Image. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): PIL Image to be adjusted. - contrast_factor (float): How much to adjust the contrast. Can be any - non negative number. 0 gives a solid gray image, 1 gives the - original image while 2 increases the contrast by a factor of 2. - Returns: - PIL Image: Contrast adjusted image. - """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) @@ -128,21 +71,6 @@ def adjust_contrast(img, contrast_factor): @torch.jit.unused def adjust_saturation(img, saturation_factor): - """PRIVATE METHOD. Adjust color saturation of an image. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): PIL Image to be adjusted. - saturation_factor (float): How much to adjust the saturation. 0 will - give a black and white image, 1 will give the original image while - 2 will enhance the saturation by a factor of 2. - Returns: - PIL Image: Saturation adjusted image. - """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) @@ -153,35 +81,6 @@ def adjust_saturation(img, saturation_factor): @torch.jit.unused def adjust_hue(img, hue_factor): - """PRIVATE METHOD. Adjust hue of an image. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - The image hue is adjusted by converting the image to HSV and - cyclically shifting the intensities in the hue channel (H). - The image is then converted back to original image mode. - - `hue_factor` is the amount of shift in H channel and must be in the - interval `[-0.5, 0.5]`. - - See `Hue`_ for more details. - - .. _Hue: https://en.wikipedia.org/wiki/Hue - - Args: - img (PIL Image): PIL Image to be adjusted. - hue_factor (float): How much to shift the hue channel. Should be in - [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in - HSV space in positive and negative direction respectively. - 0 means no shift. Therefore, both -0.5 and 0.5 will give an image - with complementary colors while 0 gives the original image. - - Returns: - PIL Image: Hue adjusted image. - """ if not(-0.5 <= hue_factor <= 0.5): raise ValueError('hue_factor ({}) is not in [-0.5, 0.5].'.format(hue_factor)) @@ -206,30 +105,6 @@ def adjust_hue(img, hue_factor): @torch.jit.unused def adjust_gamma(img, gamma, gain=1): - r"""PRIVATE METHOD. Perform gamma correction on an image. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Also known as Power Law Transform. Intensities in RGB mode are adjusted - based on the following equation: - - .. math:: - I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma} - - See `Gamma Correction`_ for more details. - - .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction - - Args: - img (PIL Image): PIL Image to be adjusted. - gamma (float): Non negative real number, same as :math:`\gamma` in the equation. - gamma larger than 1 make the shadows darker, - while gamma smaller than 1 make dark regions lighter. - gain (float): The constant multiplier. - """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) @@ -247,44 +122,6 @@ def adjust_gamma(img, gamma, gain=1): @torch.jit.unused def pad(img, padding, fill=0, padding_mode="constant"): - r"""PRIVATE METHOD. Pad the given PIL.Image on all sides with the given "pad" value. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): Image to be padded. - padding (int or tuple or list): Padding on each border. If a single int is provided this - is used to pad all borders. If a tuple or list of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple or list of length 4 is provided - this is the padding for the left, top, right and bottom borders respectively. For compatibility reasons - with ``functional_tensor.pad``, if a tuple or list of length 1 is provided, it is interpreted as - a single int. - fill (int or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of - length 3, it is used to fill R, G, B channels respectively. - This value is only used when the padding_mode is constant. - padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. - - - constant: pads with a constant value, this value is specified with fill - - - edge: pads with the last value on the edge of the image - - - reflect: pads with reflection of image (without repeating the last value on the edge) - - padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode - will result in [3, 2, 1, 2, 3, 4, 3, 2] - - - symmetric: pads with reflection of image (repeating the last value on the edge) - - padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode - will result in [2, 1, 1, 2, 3, 4, 4, 3] - - Returns: - PIL Image: Padded image. - """ - if not _is_pil_image(img): raise TypeError("img should be PIL Image. Got {}".format(type(img))) @@ -360,23 +197,6 @@ def pad(img, padding, fill=0, padding_mode="constant"): @torch.jit.unused def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Image.Image: - """PRIVATE METHOD. Crop the given PIL Image. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image. - top (int): Vertical component of the top left corner of the crop box. - left (int): Horizontal component of the top left corner of the crop box. - height (int): Height of the crop box. - width (int): Width of the crop box. - - Returns: - PIL Image: Cropped image. - """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) @@ -385,27 +205,6 @@ def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Imag @torch.jit.unused def resize(img, size, interpolation=Image.BILINEAR): - r"""PRIVATE METHOD. Resize the input PIL Image to the given size. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): Image to be resized. - size (sequence or int): Desired output size. If size is a sequence like - (h, w), the output size will be matched to this. If size is an int, - the smaller edge of the image will be matched to this number maintaining - the aspect ratio. i.e, if height > width, then image will be rescaled to - :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`. - For compatibility reasons with ``functional_tensor.resize``, if a tuple or list of length 1 is provided, - it is interpreted as a single int. - interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR``. - - Returns: - PIL Image: Resized image. - """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))): @@ -431,25 +230,7 @@ def resize(img, size, interpolation=Image.BILINEAR): @torch.jit.unused def _parse_fill(fill, img, min_pil_version, name="fillcolor"): - """PRIVATE METHOD. Helper function to get the fill color for rotate, perspective transforms, and pad. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - fill (n-tuple or int or float): Pixel fill value for area outside the transformed - image. If int or float, the value is used for all bands respectively. - Defaults to 0 for all bands. - img (PIL Image): Image to be filled. - min_pil_version (str): The minimum PILLOW version for when the ``fillcolor`` option - was first introduced in the calling function. (e.g. rotate->5.2.0, perspective->5.0.0) - name (str): Name of the ``fillcolor`` option in the output. Defaults to ``"fillcolor"``. - - Returns: - dict: kwarg for ``fillcolor`` - """ + # Process fill color for affine transforms major_found, minor_found = (int(v) for v in PILLOW_VERSION.split('.')[:2]) major_required, minor_required = (int(v) for v in min_pil_version.split('.')[:2]) if major_found < major_required or (major_found == major_required and minor_found < minor_required): @@ -478,25 +259,6 @@ def _parse_fill(fill, img, min_pil_version, name="fillcolor"): @torch.jit.unused def affine(img, matrix, interpolation=0, fill=None): - """PRIVATE METHOD. Apply affine transformation on the PIL Image keeping image center invariant. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): image to be rotated. - matrix (list of floats): list of 6 float values representing inverse matrix for affine transformation. - interpolation (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional): - An optional resampling filter. - See `filters`_ for more information. - If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``. - fill (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0) - - Returns: - PIL Image: Transformed image. - """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) @@ -507,36 +269,6 @@ def affine(img, matrix, interpolation=0, fill=None): @torch.jit.unused def rotate(img, angle, interpolation=0, expand=False, center=None, fill=None): - """PRIVATE METHOD. Rotate PIL image by angle. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): image to be rotated. - angle (float or int): rotation angle value in degrees, counter-clockwise. - interpolation (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional): - An optional resampling filter. See `filters`_ for more information. - If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``. - expand (bool, optional): Optional expansion flag. - If true, expands the output image to make it large enough to hold the entire rotated image. - If false or omitted, make the output image the same size as the input image. - Note that the expand flag assumes rotation around the center and no translation. - center (2-tuple, optional): Optional center of rotation. - Origin is the upper left corner. - Default is the center of the image. - fill (n-tuple or int or float): Pixel fill value for area outside the rotated - image. If int or float, the value is used for all bands respectively. - Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``. - - Returns: - PIL Image: Rotated image. - - .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters - - """ if not _is_pil_image(img): raise TypeError("img should be PIL Image. Got {}".format(type(img))) @@ -546,25 +278,6 @@ def rotate(img, angle, interpolation=0, expand=False, center=None, fill=None): @torch.jit.unused def perspective(img, perspective_coeffs, interpolation=Image.BICUBIC, fill=None): - """PRIVATE METHOD. Perform perspective transform of the given PIL Image. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): Image to be transformed. - perspective_coeffs (list of float): perspective transformation coefficients. - interpolation (int): Interpolation type. Default, ``Image.BICUBIC``. - fill (n-tuple or int or float): Pixel fill value for area outside the rotated - image. If int or float, the value is used for all bands respectively. - This option is only available for ``pillow>=5.0.0``. - - Returns: - PIL Image: Perspectively transformed Image. - """ - if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) @@ -575,23 +288,6 @@ def perspective(img, perspective_coeffs, interpolation=Image.BICUBIC, fill=None) @torch.jit.unused def to_grayscale(img, num_output_channels): - """PRIVATE METHOD. Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image. - - .. warning:: - - Module ``transforms.functional_pil`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (PIL Image): Image to be converted to grayscale. - num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1. - - Returns: - PIL Image: Grayscale version of the image. - if num_output_channels = 1 : returned image is single channel - - if num_output_channels = 3 : returned image is 3 channel with r = g = b - """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index cc562d1de16..b196ab483c0 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -17,7 +17,7 @@ def _assert_image_tensor(img): def _get_image_size(img: Tensor) -> List[int]: - """Returns (w, h) of tensor image""" + # Returns (w, h) of tensor image _assert_image_tensor(img) return [img.shape[-1], img.shape[-2]] @@ -56,31 +56,6 @@ def _assert_channels(img: Tensor, permitted: List[int]) -> None: def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor: - """PRIVATE METHOD. Convert a tensor image to the given ``dtype`` and scale the values accordingly - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - image (torch.Tensor): Image to be converted - dtype (torch.dtype): Desired data type of the output - - Returns: - (torch.Tensor): Converted image - - .. note:: - - When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly. - If converted back and forth, this mismatch has no effect. - - Raises: - RuntimeError: When trying to cast :class:`torch.float32` to :class:`torch.int32` or :class:`torch.int64` as - well as for trying to cast :class:`torch.float64` to :class:`torch.int64`. These conversions might lead to - overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range - of the integer ``dtype``. - """ if image.dtype == dtype: return image @@ -134,88 +109,24 @@ def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) - def vflip(img: Tensor) -> Tensor: - """PRIVATE METHOD. Vertically flip the given the Image Tensor. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): Image Tensor to be flipped in the form [..., C, H, W]. - - Returns: - Tensor: Vertically flipped image Tensor. - """ _assert_image_tensor(img) return img.flip(-2) def hflip(img: Tensor) -> Tensor: - """PRIVATE METHOD. Horizontally flip the given the Image Tensor. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): Image Tensor to be flipped in the form [..., C, H, W]. - - Returns: - Tensor: Horizontally flipped image Tensor. - """ _assert_image_tensor(img) return img.flip(-1) def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: - """PRIVATE METHOD. Crop the given Image Tensor. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): Image to be cropped in the form [..., H, W]. (0,0) denotes the top left corner of the image. - top (int): Vertical component of the top left corner of the crop box. - left (int): Horizontal component of the top left corner of the crop box. - height (int): Height of the crop box. - width (int): Width of the crop box. - - Returns: - Tensor: Cropped image. - """ _assert_image_tensor(img) return img[..., top:top + height, left:left + width] def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: - """PRIVATE METHOD. Convert the given RGB Image Tensor to Grayscale. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - For RGB to Grayscale conversion, ITU-R 601-2 luma transform is performed which - is L = R * 0.2989 + G * 0.5870 + B * 0.1140 - - Args: - img (Tensor): Image to be converted to Grayscale in the form [C, H, W]. - num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1. - - Returns: - Tensor: Grayscale version of the image. - if num_output_channels = 1 : returned image is single channel - - if num_output_channels = 3 : returned image is 3 channel with r = g = b - - """ if img.ndim < 3: raise TypeError("Input image tensor should have at least 3 dimensions, but found {}".format(img.ndim)) _assert_channels(img, [3]) @@ -236,22 +147,6 @@ def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: - """PRIVATE METHOD. Adjust brightness of a Grayscale or RGB image. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): Image to be adjusted. - brightness_factor (float): How much to adjust the brightness. Can be - any non negative number. 0 gives a black image, 1 gives the - original image while 2 increases the brightness by a factor of 2. - - Returns: - Tensor: Brightness adjusted image. - """ if brightness_factor < 0: raise ValueError('brightness_factor ({}) is not non-negative.'.format(brightness_factor)) @@ -263,22 +158,6 @@ def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: - """PRIVATE METHOD. Adjust contrast of an RGB image. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): Image to be adjusted. - contrast_factor (float): How much to adjust the contrast. Can be any - non negative number. 0 gives a solid gray image, 1 gives the - original image while 2 increases the contrast by a factor of 2. - - Returns: - Tensor: Contrast adjusted image. - """ if contrast_factor < 0: raise ValueError('contrast_factor ({}) is not non-negative.'.format(contrast_factor)) @@ -293,35 +172,6 @@ def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: - """PRIVATE METHOD. Adjust hue of an RGB image. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - The image hue is adjusted by converting the image to HSV and - cyclically shifting the intensities in the hue channel (H). - The image is then converted back to original image mode. - - `hue_factor` is the amount of shift in H channel and must be in the - interval `[-0.5, 0.5]`. - - See `Hue`_ for more details. - - .. _Hue: https://en.wikipedia.org/wiki/Hue - - Args: - img (Tensor): Image to be adjusted. Image type is either uint8 or float. - hue_factor (float): How much to shift the hue channel. Should be in - [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in - HSV space in positive and negative direction respectively. - 0 means no shift. Therefore, both -0.5 and 0.5 will give an image - with complementary colors while 0 gives the original image. - - Returns: - Tensor: Hue adjusted image. - """ if not (-0.5 <= hue_factor <= 0.5): raise ValueError('hue_factor ({}) is not in [-0.5, 0.5].'.format(hue_factor)) @@ -349,22 +199,6 @@ def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: - """PRIVATE METHOD. Adjust color saturation of an RGB image. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): Image to be adjusted. - saturation_factor (float): How much to adjust the saturation. Can be any - non negative number. 0 gives a black and white image, 1 gives the - original image while 2 enhances the saturation by a factor of 2. - - Returns: - Tensor: Saturation adjusted image. - """ if saturation_factor < 0: raise ValueError('saturation_factor ({}) is not non-negative.'.format(saturation_factor)) @@ -376,31 +210,6 @@ def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor: - r"""PRIVATE METHOD. Adjust gamma of a Grayscale or RGB image. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Also known as Power Law Transform. Intensities in RGB mode are adjusted - based on the following equation: - - .. math:: - `I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}` - - See `Gamma Correction`_ for more details. - - .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction - - Args: - img (Tensor): Tensor of RBG values to be adjusted. - gamma (float): Non negative real number, same as :math:`\gamma` in the equation. - gamma larger than 1 make the shadows darker, - while gamma smaller than 1 make dark regions lighter. - gain (float): The constant multiplier. - """ - if not isinstance(img, torch.Tensor): raise TypeError('Input img should be a Tensor.') @@ -422,25 +231,7 @@ def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor: def center_crop(img: Tensor, output_size: BroadcastingList2[int]) -> Tensor: - """DEPRECATED. Crop the Image Tensor and resize it to desired size. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - .. warning:: - - This method is deprecated and will be removed in future releases. - Please, use ``F.center_crop`` instead. - - Args: - img (Tensor): Image to be cropped. - output_size (sequence or int): (height, width) of the crop box. If int, - it is used for both directions - - Returns: - Tensor: Cropped image. + """DEPRECATED """ warnings.warn( "This method is deprecated and will be removed in future releases. " @@ -464,32 +255,7 @@ def center_crop(img: Tensor, output_size: BroadcastingList2[int]) -> Tensor: def five_crop(img: Tensor, size: BroadcastingList2[int]) -> List[Tensor]: - """DEPRECATED. Crop the given Image Tensor into four corners and the central crop. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - .. warning:: - - This method is deprecated and will be removed in future releases. - Please, use ``F.five_crop`` instead. - - .. Note:: - - This transform returns a List of Tensors and there may be a - mismatch in the number of inputs and targets your ``Dataset`` returns. - - Args: - img (Tensor): Image to be cropped. - size (sequence or int): Desired output size of the crop. If size is an - int instead of sequence like (h, w), a square crop (size, size) is - made. - - Returns: - List: List (tl, tr, bl, br, center) - Corresponding top left, top right, bottom left, bottom right and center crop. + """DEPRECATED """ warnings.warn( "This method is deprecated and will be removed in future releases. " @@ -516,35 +282,7 @@ def five_crop(img: Tensor, size: BroadcastingList2[int]) -> List[Tensor]: def ten_crop(img: Tensor, size: BroadcastingList2[int], vertical_flip: bool = False) -> List[Tensor]: - """DEPRECATED. Crop the given Image Tensor into four corners and the central crop plus the - flipped version of these (horizontal flipping is used by default). - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - .. warning:: - - This method is deprecated and will be removed in future releases. - Please, use ``F.ten_crop`` instead. - - .. Note:: - - This transform returns a List of images and there may be a - mismatch in the number of inputs and targets your ``Dataset`` returns. - - Args: - img (Tensor): Image to be cropped. - size (sequence or int): Desired output size of the crop. If size is an - int instead of sequence like (h, w), a square crop (size, size) is - made. - vertical_flip (bool): Use vertical flipping instead of horizontal - - Returns: - List: List (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip) - Corresponding top left, top right, bottom left, bottom right and center crop - and same for the flipped image's tensor. + """DEPRECATED """ warnings.warn( "This method is deprecated and will be removed in future releases. " @@ -663,43 +401,6 @@ def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor: def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor: - r"""PRIVATE METHOD. Pad the given Tensor Image on all sides with specified padding mode and fill value. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): Image to be padded. - padding (int or tuple or list): Padding on each border. If a single int is provided this - is used to pad all borders. If a tuple or list of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple or list of length 4 is provided - this is the padding for the left, top, right and bottom borders - respectively. In torchscript mode padding as single int is not supported, use a tuple or - list of length 1: ``[padding, ]``. - fill (int): Pixel fill value for constant fill. Default is 0. - This value is only used when the padding_mode is constant - padding_mode (str): Type of padding. Should be: constant, edge or reflect. Default is constant. - Mode symmetric is not yet supported for Tensor inputs. - - - constant: pads with a constant value, this value is specified with fill - - - edge: pads with the last value on the edge of the image - - - reflect: pads with reflection of image (without repeating the last value on the edge) - - padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode - will result in [3, 2, 1, 2, 3, 4, 3, 2] - - - symmetric: pads with reflection of image (repeating the last value on the edge) - - padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode - will result in [2, 1, 1, 2, 3, 4, 4, 3] - - Returns: - Tensor: Padded image. - """ _assert_image_tensor(img) if not isinstance(padding, (int, tuple, list)): @@ -770,28 +471,6 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con def resize(img: Tensor, size: List[int], interpolation: str = "bilinear") -> Tensor: - r"""PRIVATE METHOD. Resize the input Tensor to the given size. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): Image to be resized. - size (int or tuple or list): Desired output size. If size is a sequence like - (h, w), the output size will be matched to this. If size is an int, - the smaller edge of the image will be matched to this number maintaining - the aspect ratio. i.e, if height > width, then image will be rescaled to - :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`. - In torchscript mode padding as a single int is not supported, use a tuple or - list of length 1: ``[size, ]``. - interpolation (str): Desired interpolation. Default is "bilinear". Other supported values: - "nearest" and "bicubic". - - Returns: - Tensor: Resized image. - """ _assert_image_tensor(img) if not isinstance(size, (int, tuple, list)): @@ -965,23 +644,6 @@ def _gen_affine_grid( def affine( img: Tensor, matrix: List[float], interpolation: str = "nearest", fill: Optional[List[float]] = None ) -> Tensor: - """PRIVATE METHOD. Apply affine transformation on the Tensor image keeping image center invariant. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): image to be rotated. - matrix (list of floats): list of 6 float values representing inverse matrix for affine transformation. - interpolation (str): An optional resampling filter. Default is "nearest". Other supported values: "bilinear". - fill (sequence or int or float, optional): Optional fill value, default None. - If None, fill with 0. - - Returns: - Tensor: Transformed image. - """ _assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"]) dtype = img.dtype if torch.is_floating_point(img) else torch.float32 @@ -1021,31 +683,6 @@ def rotate( img: Tensor, matrix: List[float], interpolation: str = "nearest", expand: bool = False, fill: Optional[List[float]] = None ) -> Tensor: - """PRIVATE METHOD. Rotate the Tensor image by angle. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): image to be rotated. - matrix (list of floats): list of 6 float values representing inverse matrix for rotation transformation. - Translation part (``matrix[2]`` and ``matrix[5]``) should be in pixel coordinates. - interpolation (str): An optional resampling filter. Default is "nearest". Other supported values: "bilinear". - expand (bool, optional): Optional expansion flag. - If true, expands the output image to make it large enough to hold the entire rotated image. - If false or omitted, make the output image the same size as the input image. - Note that the expand flag assumes rotation around the center and no translation. - fill (sequence or int or float, optional): Optional fill value, default None. - If None, fill with 0. - - Returns: - Tensor: Rotated image. - - .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters - - """ _assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"]) w, h = img.shape[-1], img.shape[-2] ow, oh = _compute_output_size(matrix, w, h) if expand else (w, h) @@ -1093,24 +730,6 @@ def _perspective_grid(coeffs: List[float], ow: int, oh: int, dtype: torch.dtype, def perspective( img: Tensor, perspective_coeffs: List[float], interpolation: str = "bilinear", fill: Optional[List[float]] = None ) -> Tensor: - """PRIVATE METHOD. Perform perspective transform of the given Tensor image. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): Image to be transformed. - perspective_coeffs (list of float): perspective transformation coefficients. - interpolation (str): Interpolation type. Default, "bilinear". - fill (sequence or int or float, optional): Optional fill value, default None. - If None, fill with 0. - - Returns: - Tensor: transformed image. - """ - if not (isinstance(img, torch.Tensor)): raise TypeError('Input img should be Tensor.') @@ -1151,22 +770,6 @@ def _get_gaussian_kernel2d( def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: List[float]) -> Tensor: - """PRIVATE METHOD. Performs Gaussian blurring on the img by given kernel. - - .. warning:: - - Module ``transforms.functional_tensor`` is private and should not be used in user application. - Please, consider instead using methods from `transforms.functional` module. - - Args: - img (Tensor): Image to be blurred - kernel_size (sequence of int or int): Kernel size of the Gaussian kernel ``(kx, ky)``. - sigma (sequence of float or float, optional): Standard deviation of the Gaussian kernel ``(sx, sy)``. - - Returns: - Tensor: An image that is blurred using gaussian kernel of given parameters - """ - if not (isinstance(img, torch.Tensor)): raise TypeError('img should be Tensor. Got {}'.format(type(img))) diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index 117ba74b83a..1274c19795a 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -122,6 +122,7 @@ def __repr__(self): class ConvertImageDtype(torch.nn.Module): """Convert a tensor image to the given ``dtype`` and scale the values accordingly + This function does not support PIL Image. Args: dtype (torch.dtype): Desired data type of the output @@ -187,6 +188,7 @@ def __repr__(self): class Normalize(torch.nn.Module): """Normalize a tensor image with mean and standard deviation. + This transform does not support PIL Image. Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n`` channels, this transform will normalize each channel of the input ``torch.*Tensor`` i.e., @@ -224,7 +226,7 @@ def __repr__(self): class Resize(torch.nn.Module): """Resize the input image to the given size. - The image can be a PIL Image or a torch Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions Args: @@ -233,8 +235,7 @@ class Resize(torch.nn.Module): smaller edge of the image will be matched to this number. i.e, if height > width, then image will be rescaled to (size * height / width, size). - In torchscript mode padding as single int is not supported, use a tuple or - list of length 1: ``[size, ]``. + In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. interpolation (InterpolationMode): Desired interpolation enum defined by :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and @@ -288,13 +289,13 @@ def __init__(self, *args, **kwargs): class CenterCrop(torch.nn.Module): """Crops the given image at the center. - The image can be a PIL Image or a torch Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is - made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). """ def __init__(self, size): @@ -317,21 +318,22 @@ def __repr__(self): class Pad(torch.nn.Module): """Pad the given image on all sides with the given "pad" value. - The image can be a PIL Image or a torch Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions Args: - padding (int or tuple or list): Padding on each border. If a single int is provided this - is used to pad all borders. If tuple of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple of length 4 is provided + padding (int or sequence): Padding on each border. If a single int is provided this + is used to pad all borders. If sequence of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a sequence of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. - In torchscript mode padding as single int is not supported, use a tuple or - list of length 1: ``[padding, ]``. - fill (int or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of + In torchscript mode padding as single int is not supported, use a sequence of length 1: ``[padding, ]``. + fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of length 3, it is used to fill R, G, B channels respectively. - This value is only used when the padding_mode is constant + This value is only used when the padding_mode is constant. + Only number is supported for torch Tensor. + Only int or str or tuple value is supported for PIL Image. padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. - Default is constant. Mode symmetric is not yet supported for Tensor inputs. + Default is constant. - constant: pads with a constant value, this value is specified with fill @@ -405,7 +407,7 @@ class RandomTransforms: """Base class for a list of transformations with randomness Args: - transforms (list or tuple): list of transformations + transforms (sequence): list of transformations """ def __init__(self, transforms): @@ -441,7 +443,7 @@ class RandomApply(torch.nn.Module): `lambda` functions or ``PIL.Image``. Args: - transforms (list or tuple or torch.nn.Module): list of transformations + transforms (sequence or torch.nn.Module): list of transformations p (float): probability """ @@ -488,29 +490,28 @@ def __call__(self, img): class RandomCrop(torch.nn.Module): """Crop the given image at a random location. - The image can be a PIL Image or a Tensor, in which case it is expected - to have [..., H, W] shape, where ... means an arbitrary number of leading - dimensions + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is - made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). padding (int or sequence, optional): Optional padding on each border of the image. Default is None. If a single int is provided this - is used to pad all borders. If tuple of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple of length 4 is provided + is used to pad all borders. If sequence of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a sequence of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. - In torchscript mode padding as single int is not supported, use a tuple or - list of length 1: ``[padding, ]``. + In torchscript mode padding as single int is not supported, use a sequence of length 1: ``[padding, ]``. pad_if_needed (boolean): It will pad the image if smaller than the desired size to avoid raising an exception. Since cropping is done after padding, the padding seems to be done at a random offset. - fill (int or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of + fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of length 3, it is used to fill R, G, B channels respectively. - This value is only used when the padding_mode is constant + This value is only used when the padding_mode is constant. + Only number is supported for torch Tensor. + Only int or str or tuple value is supported for PIL Image. padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. - Mode symmetric is not yet supported for Tensor inputs. - constant: pads with a constant value, this value is specified with fill @@ -597,7 +598,7 @@ def __repr__(self): class RandomHorizontalFlip(torch.nn.Module): """Horizontally flip the given image randomly with a given probability. - The image can be a PIL Image or a torch Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions @@ -627,7 +628,7 @@ def __repr__(self): class RandomVerticalFlip(torch.nn.Module): """Vertically flip the given image randomly with a given probability. - The image can be a PIL Image or a torch Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions @@ -657,7 +658,7 @@ def __repr__(self): class RandomPerspective(torch.nn.Module): """Performs a random perspective transformation of the given image with a given probability. - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. Args: @@ -668,9 +669,8 @@ class RandomPerspective(torch.nn.Module): :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. - fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed - image. If int or float, the value is used for all bands respectively. - This option is supported for PIL image and Tensor inputs. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. If input is PIL Image, the options is only available for ``Pillow>=5.0.0``. """ @@ -753,7 +753,7 @@ def __repr__(self): class RandomResizedCrop(torch.nn.Module): """Crop the given image to random size and aspect ratio. - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions A crop of random size (default: of 0.08 to 1.0) of the original size and a random @@ -764,7 +764,8 @@ class RandomResizedCrop(torch.nn.Module): Args: size (int or sequence): expected output size of each edge. If size is an int instead of sequence like (h, w), a square output size ``(size, size)`` is - made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). + In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. scale (tuple of float): scale range of the cropped image before resizing, relatively to the origin image. ratio (tuple of float): aspect ratio range of the cropped image before resizing. interpolation (InterpolationMode): Desired interpolation enum defined by @@ -878,7 +879,7 @@ def __init__(self, *args, **kwargs): class FiveCrop(torch.nn.Module): """Crop the given image into four corners and the central crop. - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions @@ -890,7 +891,7 @@ class FiveCrop(torch.nn.Module): Args: size (sequence or int): Desired output size of the crop. If size is an ``int`` instead of sequence like (h, w), a square crop of size (size, size) is made. - If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). + If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). Example: >>> transform = Compose([ @@ -925,7 +926,7 @@ def __repr__(self): class TenCrop(torch.nn.Module): """Crop the given image into four corners and the central crop plus the flipped version of these (horizontal flipping is used by default). - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions @@ -937,7 +938,7 @@ class TenCrop(torch.nn.Module): Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is - made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). vertical_flip (bool): Use vertical flipping instead of horizontal Example: @@ -974,6 +975,7 @@ def __repr__(self): class LinearTransformation(torch.nn.Module): """Transform a tensor image with a square transformation matrix and a mean_vector computed offline. + This transform does not support PIL Image. Given transformation_matrix and mean_vector, will flatten the torch.*Tensor and subtract mean_vector from it which is then followed by computing the dot product with the transformation matrix and then reshaping the tensor to its @@ -1040,6 +1042,8 @@ def __repr__(self): class ColorJitter(torch.nn.Module): """Randomly change the brightness, contrast, saturation and hue of an image. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. Args: brightness (float or tuple of float (min, max)): How much to jitter brightness. @@ -1149,11 +1153,11 @@ def __repr__(self): class RandomRotation(torch.nn.Module): """Rotate the image by angle. - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. Args: - degrees (sequence or float or int): Range of degrees to select from. + degrees (sequence or number): Range of degrees to select from. If degrees is a number instead of sequence like (min, max), the range of degrees will be (-degrees, +degrees). interpolation (InterpolationMode): Desired interpolation enum defined by @@ -1164,11 +1168,10 @@ class RandomRotation(torch.nn.Module): If true, expands the output to make it large enough to hold the entire rotated image. If false or omitted, make the output image the same size as the input image. Note that the expand flag assumes rotation around the center and no translation. - center (list or tuple, optional): Optional center of rotation, (x, y). Origin is the upper left corner. + center (sequence, optional): Optional center of rotation, (x, y). Origin is the upper left corner. Default is the center of the image. - fill (sequence or int or float, optional): Pixel fill value for the area outside the rotated - image. If int or float, the value is used for all bands respectively. - This option is supported for PIL image and Tensor inputs. + fill (sequence or number, optional): Pixel fill value for the area outside the rotated + image. If given a number, the value is used for all bands respectively. If input is PIL Image, the options is only available for ``Pillow>=5.2.0``. resample (int, optional): deprecated argument and will be removed since v0.10.0. Please use `arg`:interpolation: instead. @@ -1249,11 +1252,11 @@ def __repr__(self): class RandomAffine(torch.nn.Module): """Random affine transformation of the image keeping center invariant. - The image can be a PIL Image or a Tensor, in which case it is expected + If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. Args: - degrees (sequence or float or int): Range of degrees to select from. + degrees (sequence or number): Range of degrees to select from. If degrees is a number instead of sequence like (min, max), the range of degrees will be (-degrees, +degrees). Set to 0 to deactivate rotations. translate (tuple, optional): tuple of maximum absolute fraction for horizontal @@ -1262,21 +1265,20 @@ class RandomAffine(torch.nn.Module): randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is randomly sampled from the range a <= scale <= b. Will keep original scale by default. - shear (sequence or float or int, optional): Range of degrees to select from. + shear (sequence or number, optional): Range of degrees to select from. If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) - will be applied. Else if shear is a tuple or list of 2 values a shear parallel to the x axis in the - range (shear[0], shear[1]) will be applied. Else if shear is a tuple or list of 4 values, + will be applied. Else if shear is a sequence of 2 values a shear parallel to the x axis in the + range (shear[0], shear[1]) will be applied. Else if shear is a sequence of 4 values, a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. Will not apply shear by default. interpolation (InterpolationMode): Desired interpolation enum defined by :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. - fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed - image. If int or float, the value is used for all bands respectively. - This option is supported for PIL image and Tensor inputs. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. If input is PIL Image, the options is only available for ``Pillow>=5.0.0``. - fillcolor (sequence or int or float, optional): deprecated argument and will be removed since v0.10.0. + fillcolor (sequence or number, optional): deprecated argument and will be removed since v0.10.0. Please use `arg`:fill: instead. resample (int, optional): deprecated argument and will be removed since v0.10.0. Please use `arg`:interpolation: instead. @@ -1412,9 +1414,8 @@ def __repr__(self): class Grayscale(torch.nn.Module): """Convert image to grayscale. - The image can be a PIL Image or a Tensor, in which case it is expected - to have [..., 3, H, W] shape, where ... means an arbitrary number of leading - dimensions + If the image is torch Tensor, it is expected + to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions Args: num_output_channels (int): (1 or 3) number of channels desired for output image @@ -1446,9 +1447,8 @@ def __repr__(self): class RandomGrayscale(torch.nn.Module): """Randomly convert image to grayscale with a probability of p (default 0.1). - The image can be a PIL Image or a Tensor, in which case it is expected - to have [..., 3, H, W] shape, where ... means an arbitrary number of leading - dimensions + If the image is torch Tensor, it is expected + to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions Args: p (float): probability that image should be converted to grayscale. @@ -1483,7 +1483,8 @@ def __repr__(self): class RandomErasing(torch.nn.Module): - """ Randomly selects a rectangle region in an image and erases its pixels. + """ Randomly selects a rectangle region in an torch Tensor image and erases its pixels. + This transform does not support PIL Image. 'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896 Args: @@ -1539,8 +1540,8 @@ def get_params( Args: img (Tensor): Tensor image to be erased. - scale (tuple or list): range of proportion of erased area against input image. - ratio (tuple or list): range of aspect ratio of erased area. + scale (sequence): range of proportion of erased area against input image. + ratio (sequence): range of aspect ratio of erased area. value (list, optional): erasing value. If None, it is interpreted as "random" (erasing each pixel with random values). If ``len(value)`` is 1, it is interpreted as a number, i.e. ``value[0]``. @@ -1605,9 +1606,8 @@ def forward(self, img): class GaussianBlur(torch.nn.Module): """Blurs image with randomly chosen Gaussian blur. - The image can be a PIL Image or a Tensor, in which case it is expected - to have [..., C, H, W] shape, where ... means an arbitrary number of leading - dimensions + If the image is torch Tensor, it is expected + to have [..., C, H, W] shape, where ... means an arbitrary number of leading dimensions. Args: kernel_size (int or sequence): Size of the Gaussian kernel. @@ -1704,9 +1704,9 @@ def _setup_angle(x, name, req_sizes=(2, )): class RandomInvert(torch.nn.Module): """Inverts the colors of the given image randomly with a given probability. - The image can be a PIL Image or a torch Tensor, in which case it is expected - to have [..., H, W] shape, where ... means an arbitrary number of leading - dimensions. + If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". Args: p (float): probability of the image being color inverted. Default value is 0.5 @@ -1734,9 +1734,9 @@ def __repr__(self): class RandomPosterize(torch.nn.Module): """Posterize the image randomly with a given probability by reducing the - number of bits for each color channel. The image can be a PIL Image or a torch - Tensor, in which case it is expected to have [..., H, W] shape, where ... means - an arbitrary number of leading dimensions. + number of bits for each color channel. If the image is torch Tensor, it should be of type torch.uint8, + and it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". Args: bits (int): number of bits to keep for each channel (0-8) @@ -1766,9 +1766,9 @@ def __repr__(self): class RandomSolarize(torch.nn.Module): """Solarize the image randomly with a given probability by inverting all pixel - values above a threshold. The image can be a PIL Image or a torch Tensor, in - which case it is expected to have [..., H, W] shape, where ... means an arbitrary - number of leading dimensions. + values above a threshold. If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". Args: threshold (float): all pixels equal or above this value are inverted. @@ -1797,9 +1797,8 @@ def __repr__(self): class RandomAdjustSharpness(torch.nn.Module): - """Adjust the sharpness of the image randomly with a given probability. The image - can be a PIL Image or a torch Tensor, in which case it is expected to have [..., H, W] - shape, where ... means an arbitrary number of leading dimensions. + """Adjust the sharpness of the image randomly with a given probability. If the image is torch Tensor, + it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions. Args: sharpness_factor (float): How much to adjust the sharpness. Can be @@ -1831,9 +1830,9 @@ def __repr__(self): class RandomAutocontrast(torch.nn.Module): """Autocontrast the pixels of the given image randomly with a given probability. - The image can be a PIL Image or a torch Tensor, in which case it is expected - to have [..., H, W] shape, where ... means an arbitrary number of leading - dimensions. + If the image is torch Tensor, it is expected + to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". Args: p (float): probability of the image being autocontrasted. Default value is 0.5 @@ -1861,9 +1860,9 @@ def __repr__(self): class RandomEqualize(torch.nn.Module): """Equalize the histogram of the given image randomly with a given probability. - The image can be a PIL Image or a torch Tensor, in which case it is expected - to have [..., H, W] shape, where ... means an arbitrary number of leading - dimensions. + If the image is torch Tensor, it is expected + to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "P", "L" or "RGB". Args: p (float): probability of the image being equalized. Default value is 0.5