pytorch · datumbox · Dec 23, 2020 · Dec 22, 2020 · Dec 22, 2020 · Dec 22, 2020
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -59,7 +59,7 @@ def _interpolation_modes_from_int(i: int) -> InterpolationMode:
 
 
 def _get_image_size(img: Tensor) -> List[int]:
-    """Returns image sizea as (w, h)
+    """Returns image size as [w, h]
     """
     if isinstance(img, torch.Tensor):
         return F_t._get_image_size(img)
@@ -68,6 +68,8 @@ def _get_image_size(img: Tensor) -> List[int]:
 
 
 def _get_image_num_channels(img: Tensor) -> int:
+    """Returns number of image channels
+    """
     if isinstance(img, torch.Tensor):
         return F_t._get_image_num_channels(img)
 
@@ -87,7 +89,7 @@ def _is_numpy_image(img: Any) -> bool:
 def to_tensor(pic):
     """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
 
-    See ``ToTensor`` for more details.
+    See :class:`~torchvision.transforms.ToTensor` for more details.
 
     Args:
         pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
@@ -384,16 +386,17 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con
     Args:
         img (PIL Image or Tensor): Image to be padded.
         padding (int or tuple or list): Padding on each border. If a single int is provided this
-            is used to pad all borders. If tuple of length 2 is provided this is the padding
-            on left/right and top/bottom respectively. If a tuple of length 4 is provided
+            is used to pad all borders. If tuple or list of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a tuple or list of length 4 is provided
             this is the padding for the left, top, right and bottom borders respectively.
             In torchscript mode padding as single int is not supported, use a tuple or
             list of length 1: ``[padding, ]``.
-        fill (int or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
-            length 3, it is used to fill R, G, B channels respectively.
-            This value is only used when the padding_mode is constant. Only int value is supported for Tensors.
+        fill (int or float or str or tuple): Pixel fill value for constant fill. Default is 0.
+            If a tuple/list of length 3, it is used to fill R, G, B channels respectively.
+            This value is only used when the padding_mode is constant.
+            Only int or float value is supported for Tensors.
+            Only int or str or tuple value is supported for PIL Images.
         padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
-            Mode symmetric is not yet supported for Tensor inputs.
 
             - constant: pads with a constant value, this value is specified with fill
 
@@ -504,7 +507,7 @@ def hflip(img: Tensor) -> Tensor:
     Args:
         img (PIL Image or Tensor): Image to be flipped. If img
             is a Tensor, it is expected to be in [..., H, W] format,
-            where ... means it can have an arbitrary number of trailing
+            where ... means it can have an arbitrary number of leading
             dimensions.
 
     Returns:
@@ -569,7 +572,6 @@ def perspective(
             For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
         fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed
             image. If int or float, the value is used for all bands respectively.
-            This option is supported for PIL image and Tensor inputs.
             In torchscript mode single int/float value is not supported, please use a tuple
             or list of length 1: ``[value, ]``.
             If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
@@ -604,11 +606,11 @@ def vflip(img: Tensor) -> Tensor:
     Args:
         img (PIL Image or Tensor): Image to be flipped. If img
             is a Tensor, it is expected to be in [..., H, W] format,
-            where ... means it can have an arbitrary number of trailing
+            where ... means it can have an arbitrary number of leading
             dimensions.
 
     Returns:
-        PIL Image:  Vertically flipped image.
+        PIL Image or Tensor:  Vertically flipped image.
     """
     if not isinstance(img, torch.Tensor):
         return F_pil.vflip(img)
@@ -888,7 +890,6 @@ def rotate(
             Default is the center of the image.
         fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed
             image. If int or float, the value is used for all bands respectively.
-            This option is supported for PIL image and Tensor inputs.
             In torchscript mode single int/float value is not supported, please use a tuple
             or list of length 1: ``[value, ]``.
             If input is PIL Image, the options is only available for ``Pillow>=5.2.0``.
@@ -961,7 +962,6 @@ def affine(
             For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
         fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed
             image. If int or float, the value is used for all bands respectively.
-            This option is supported for PIL image and Tensor inputs.
             In torchscript mode single int/float value is not supported, please use a tuple
             or list of length 1: ``[value, ]``.
             If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
@@ -1050,7 +1050,7 @@ def to_grayscale(img, num_output_channels=1):
 
     Args:
         img (PIL Image): PIL Image to be converted to grayscale.
-        num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1.
+        num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default is 1.
 
     Returns:
         PIL Image: Grayscale version of the image.
@@ -1067,7 +1067,7 @@ def to_grayscale(img, num_output_channels=1):
 def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor:
     """Convert RGB image to grayscale version of image.
     The image can be a PIL Image or a Tensor, in which case it is expected
-    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+    to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
 
     Note:
         Please, note that this method supports only RGB images as input. For inputs in other color spaces,
@@ -1117,7 +1117,8 @@ def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool
 def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Tensor:
     """Performs Gaussian blurring on the img by given kernel.
     The image can be a PIL Image or a Tensor, in which case it is expected
-    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+    If input is a PIL image, it will be first converted to Tensor for this operation, then converted back.
 
     Args:
         img (PIL Image or Tensor): Image to be blurred
@@ -1180,8 +1181,8 @@ def invert(img: Tensor) -> Tensor:
     Args:
         img (PIL Image or Tensor): Image to have its colors inverted.
             If img is a Tensor, it is expected to be in [..., H, W] format,
-            where ... means it can have an arbitrary number of trailing
-            dimensions.
+            where ... means it can have an arbitrary number of leading dimensions.
+            For PIL images, only mode "L" and "RGB" is supported.
 
     Returns:
         PIL Image or Tensor: Color inverted image.
@@ -1199,7 +1200,7 @@ def posterize(img: Tensor, bits: int) -> Tensor:
         img (PIL Image or Tensor): Image to have its colors posterized.
             If img is a Tensor, it should be of type torch.uint8 and
             it is expected to be in [..., H, W] format, where ... means
-            it can have an arbitrary number of trailing dimensions.
+            it can have an arbitrary number of leading dimensions.
         bits (int): The number of bits to keep for each channel (0-8).
     Returns:
         PIL Image or Tensor: Posterized image.
@@ -1219,7 +1220,7 @@ def solarize(img: Tensor, threshold: float) -> Tensor:
     Args:
         img (PIL Image or Tensor): Image to have its colors inverted.
             If img is a Tensor, it is expected to be in [..., H, W] format,
-            where ... means it can have an arbitrary number of trailing
+            where ... means it can have an arbitrary number of leading
             dimensions.
         threshold (float): All pixels equal or above this value are inverted.
     Returns:
@@ -1257,7 +1258,7 @@ def autocontrast(img: Tensor) -> Tensor:
     Args:
         img (PIL Image or Tensor): Image on which autocontrast is applied.
             If img is a Tensor, it is expected to be in [..., H, W] format,
-            where ... means it can have an arbitrary number of trailing
+            where ... means it can have an arbitrary number of leading
             dimensions.
 
     Returns:
@@ -1277,7 +1278,7 @@ def equalize(img: Tensor) -> Tensor:
     Args:
         img (PIL Image or Tensor): Image on which equalize is applied.
             If img is a Tensor, it is expected to be in [..., H, W] format,
-            where ... means it can have an arbitrary number of trailing
+            where ... means it can have an arbitrary number of leading
             dimensions.
 
     Returns:

diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
@@ -678,7 +678,7 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con
             this is the padding for the left, top, right and bottom borders
             respectively. In torchscript mode padding as single int is not supported, use a tuple or
             list of length 1: ``[padding, ]``.
-        fill (int): Pixel fill value for constant fill. Default is 0.
+        fill (int, float): Pixel fill value for constant fill. Default is 0.
             This value is only used when the padding_mode is constant
         padding_mode (str): Type of padding. Should be: constant, edge or reflect. Default is constant.
             Mode symmetric is not yet supported for Tensor inputs.

diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
@@ -233,7 +233,7 @@ class Resize(torch.nn.Module):
             smaller edge of the image will be matched to this number.
             i.e, if height > width, then image will be rescaled to
             (size * height / width, size).
-            In torchscript mode padding as single int is not supported, use a tuple or
+            In torchscript mode size as single int is not supported, use a tuple or
             list of length 1: ``[size, ]``.
         interpolation (InterpolationMode): Desired interpolation enum defined by
             :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
@@ -322,16 +322,18 @@ class Pad(torch.nn.Module):
 
     Args:
         padding (int or tuple or list): Padding on each border. If a single int is provided this
-            is used to pad all borders. If tuple of length 2 is provided this is the padding
-            on left/right and top/bottom respectively. If a tuple of length 4 is provided
+            is used to pad all borders. If tuple or list of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a tuple or list of length 4 is provided
             this is the padding for the left, top, right and bottom borders respectively.
             In torchscript mode padding as single int is not supported, use a tuple or
             list of length 1: ``[padding, ]``.
-        fill (int or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
+        fill (int or float or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
             length 3, it is used to fill R, G, B channels respectively.
-            This value is only used when the padding_mode is constant
+            This value is only used when the padding_mode is constant.
+            Only int or float value is supported for Tensors.
+            Only int or str or tuple value is supported for PIL Images.
         padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
-            Default is constant. Mode symmetric is not yet supported for Tensor inputs.
+            Default is constant.
 
             - constant: pads with a constant value, this value is specified with fill
 
@@ -498,19 +500,20 @@ class RandomCrop(torch.nn.Module):
             made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
         padding (int or sequence, optional): Optional padding on each border
             of the image. Default is None. If a single int is provided this
-            is used to pad all borders. If tuple of length 2 is provided this is the padding
-            on left/right and top/bottom respectively. If a tuple of length 4 is provided
+            is used to pad all borders. If tuple or list of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a tuple or list of length 4 is provided
             this is the padding for the left, top, right and bottom borders respectively.
             In torchscript mode padding as single int is not supported, use a tuple or
             list of length 1: ``[padding, ]``.
         pad_if_needed (boolean): It will pad the image if smaller than the
             desired size to avoid raising an exception. Since cropping is done
             after padding, the padding seems to be done at a random offset.
-        fill (int or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
+        fill (int or float or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
             length 3, it is used to fill R, G, B channels respectively.
-            This value is only used when the padding_mode is constant
+            This value is only used when the padding_mode is constant.
+            Only int or float value is supported for Tensors.
+            Only int or str or tuple value is supported for PIL Images.
         padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
-            Mode symmetric is not yet supported for Tensor inputs.
 
              - constant: pads with a constant value, this value is specified with fill
 
@@ -670,7 +673,6 @@ class RandomPerspective(torch.nn.Module):
             For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
         fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed
             image. If int or float, the value is used for all bands respectively.
-            This option is supported for PIL image and Tensor inputs.
             If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
     """
 
@@ -765,6 +767,8 @@ class RandomResizedCrop(torch.nn.Module):
         size (int or sequence): expected output size of each edge. If size is an
             int instead of sequence like (h, w), a square output size ``(size, size)`` is
             made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
+            In torchscript mode size as single int is not supported, use a tuple or
+            list of length 1: ``[size, ]``.
         scale (tuple of float): scale range of the cropped image before resizing, relatively to the origin image.
         ratio (tuple of float): aspect ratio range of the cropped image before resizing.
         interpolation (InterpolationMode): Desired interpolation enum defined by
@@ -1040,6 +1044,9 @@ def __repr__(self):
 
 class ColorJitter(torch.nn.Module):
     """Randomly change the brightness, contrast, saturation and hue of an image.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions. This transform does not support torchscript.
 
     Args:
         brightness (float or tuple of float (min, max)): How much to jitter brightness.
@@ -1168,7 +1175,6 @@ class RandomRotation(torch.nn.Module):
             Default is the center of the image.
         fill (sequence or int or float, optional): Pixel fill value for the area outside the rotated
             image. If int or float, the value is used for all bands respectively.
-            This option is supported for PIL image and Tensor inputs.
             If input is PIL Image, the options is only available for ``Pillow>=5.2.0``.
         resample (int, optional): deprecated argument and will be removed since v0.10.0.
             Please use `arg`:interpolation: instead.
@@ -1274,7 +1280,6 @@ class RandomAffine(torch.nn.Module):
             For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
         fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed
             image. If int or float, the value is used for all bands respectively.
-            This option is supported for PIL image and Tensor inputs.
             If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
         fillcolor (sequence or int or float, optional): deprecated argument and will be removed since v0.10.0.
             Please use `arg`:fill: instead.
@@ -1483,7 +1488,7 @@ def __repr__(self):
 
 
 class RandomErasing(torch.nn.Module):
-    """ Randomly selects a rectangle region in an image and erases its pixels.
+    """ Randomly selects a rectangle region in an torch Tensor image and erases its pixels.
     'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896
 
     Args:
@@ -1607,7 +1612,8 @@ class GaussianBlur(torch.nn.Module):
     """Blurs image with randomly chosen Gaussian blur.
     The image can be a PIL Image or a Tensor, in which case it is expected
     to have [..., C, H, W] shape, where ... means an arbitrary number of leading
-    dimensions
+    dimensions.
+    If input is a PIL image, it will be first converted to Tensor for this operation, then converted back.
 
     Args:
         kernel_size (int or sequence): Size of the Gaussian kernel.
@@ -1707,6 +1713,7 @@ class RandomInvert(torch.nn.Module):
     The image can be a PIL Image or a torch Tensor, in which case it is expected
     to have [..., H, W] shape, where ... means an arbitrary number of leading
     dimensions.
+    For PIL images, only mode "L" and "RGB" is supported.
 def invert(img: Tensor) -> Tensor: 
     """Invert the colors of an RGB/grayscale PIL Image or torch Tensor. 
 def invert(img: Tensor) -> Tensor: 
     """Invert the colors of an RGB/grayscale PIL Image or torch Tensor. 
 
     Args:
         p (float): probability of the image being color inverted. Default value is 0.5
@@ -1735,8 +1742,8 @@ def __repr__(self):
 class RandomPosterize(torch.nn.Module):
     """Posterize the image randomly with a given probability by reducing the
     number of bits for each color channel. The image can be a PIL Image or a torch
-    Tensor, in which case it is expected to have [..., H, W] shape, where ... means
-    an arbitrary number of leading dimensions.
+    Tensor, in which case it should be of type torch.uint8,
+    and it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
 
     Args:
         bits (int): number of bits to keep for each channel (0-8)