Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Normalizaiton Normalization #60921

Merged
merged 1 commit into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/paddle/audio/functional/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def create_dct(
Args:
n_mfcc (int): Number of mel frequency cepstral coefficients.
n_mels (int): Number of mel filterbanks.
norm (Optional[str], optional): Normalizaiton type. Defaults to 'ortho'.
norm (Optional[str], optional): Normalization type. Defaults to 'ortho'.
dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.

Returns:
Expand Down
6 changes: 3 additions & 3 deletions python/paddle/utils/cpp_extension/cpp_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def unix_custom_single_compiler(
# shared library have same ABI suffix with libpaddle.so.
# See https://stackoverflow.com/questions/34571583/understanding-gcc-5s-glibcxx-use-cxx11-abi-or-the-new-abi
add_compile_flag(cflags, ['-D_GLIBCXX_USE_CXX11_ABI=1'])
# Append this macor only when jointly compiling .cc with .cu
# Append this macro only when jointly compiling .cc with .cu
if not is_cuda_file(src) and self.contain_cuda_file:
if core.is_compiled_with_rocm():
cflags.append('-DPADDLE_WITH_HIP')
Expand Down Expand Up @@ -829,7 +829,7 @@ def load(
If the above conditions are not met, the corresponding warning will be printed, and a fatal error may
occur because of ABI compatibility.

Compared with ``setup`` interface, it doesn't need extra ``setup.py`` and excute
Compared with ``setup`` interface, it doesn't need extra ``setup.py`` and execute
``python setup.py install`` command. The interface contains all compiling and installing
process underground.

Expand All @@ -850,7 +850,7 @@ def load(
from paddle.utils.cpp_extension import load

custom_op_module = load(
name="op_shared_libary_name", # name of shared library
name="op_shared_library_name", # name of shared library
sources=['relu_op.cc', 'relu_op.cu'], # source files of customized op
extra_cxx_cflags=['-g', '-w'], # optional, specify extra flags to compile .cc/.cpp file
extra_cuda_cflags=['-O2'], # optional, specify extra flags to compile .cu file
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/utils/cpp_extension/extension_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
# See _reset_so_rpath for details.
extra_link_args.append(f'-Wl,-rpath,{_get_base_path()}')
# On MacOS, ld don't support `-l:xx`, so we create a
# liblibpaddle.dylib symbol link.
# libpaddle.dylib symbol link.
lib_core_name = create_sym_link_if_not_exist()
extra_link_args.append(f'-l{lib_core_name}')
# ----------------------- -- END -- ----------------------- #
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/utils/inplace_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __impl__(*args, **kwargs):
for arg in args:
if hasattr(arg, "is_view_var") and arg.is_view_var:
raise ValueError(
f'Sorry about what\'s happend. In to_static mode, {func.__name__}\'s output variable {arg.name} is a viewed Tensor in dygraph. This will result in inconsistent calculation behavior between dynamic and static graphs. You must find the location of the strided API be called, and call {arg.name} = {arg.name}.assign().'
f'Sorry about what\'s happened. In to_static mode, {func.__name__}\'s output variable {arg.name} is a viewed Tensor in dygraph. This will result in inconsistent calculation behavior between dynamic and static graphs. You must find the location of the strided API be called, and call {arg.name} = {arg.name}.assign().'
)

origin_func = f"{func.__module__}.{origin_api_name}"
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/vision/datasets/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class DatasetFolder(Dataset):
... dirname = list(subpath.keys())[0]
... make_directory(root / dirname, subpath[dirname])

>>> directory_hirerarchy = [
>>> directory_hierarchy = [
... {"class_0": [
... "abc.jpg",
... "def.png"]},
Expand All @@ -146,7 +146,7 @@ class DatasetFolder(Dataset):
>>> # You can replace this with any directory to explore the structure
>>> # of generated data. e.g. fake_data_dir = "./temp_dir"
>>> fake_data_dir = tempfile.mkdtemp()
>>> make_directory(fake_data_dir, directory_hirerarchy)
>>> make_directory(fake_data_dir, directory_hierarchy)
>>> data_folder_1 = DatasetFolder(fake_data_dir)
>>> print(data_folder_1.classes)
['class_0', 'class_1']
Expand Down
14 changes: 7 additions & 7 deletions python/paddle/vision/models/shufflenetv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def _shufflenet_v2(arch, pretrained=False, **kwargs):

def shufflenet_v2_x0_25(pretrained=False, **kwargs):
"""ShuffleNetV2 with 0.25x output channels, as described in
`"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
`"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.

Args:
pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
Expand Down Expand Up @@ -365,7 +365,7 @@ def shufflenet_v2_x0_25(pretrained=False, **kwargs):

def shufflenet_v2_x0_33(pretrained=False, **kwargs):
"""ShuffleNetV2 with 0.33x output channels, as described in
`"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
`"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.

Args:
pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
Expand Down Expand Up @@ -400,7 +400,7 @@ def shufflenet_v2_x0_33(pretrained=False, **kwargs):

def shufflenet_v2_x0_5(pretrained=False, **kwargs):
"""ShuffleNetV2 with 0.5x output channels, as described in
`"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
`"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.

Args:
pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
Expand Down Expand Up @@ -435,7 +435,7 @@ def shufflenet_v2_x0_5(pretrained=False, **kwargs):

def shufflenet_v2_x1_0(pretrained=False, **kwargs):
"""ShuffleNetV2 with 1.0x output channels, as described in
`"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
`"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.

Args:
pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
Expand Down Expand Up @@ -470,7 +470,7 @@ def shufflenet_v2_x1_0(pretrained=False, **kwargs):

def shufflenet_v2_x1_5(pretrained=False, **kwargs):
"""ShuffleNetV2 with 1.5x output channels, as described in
`"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
`"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.

Args:
pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
Expand Down Expand Up @@ -505,7 +505,7 @@ def shufflenet_v2_x1_5(pretrained=False, **kwargs):

def shufflenet_v2_x2_0(pretrained=False, **kwargs):
"""ShuffleNetV2 with 2.0x output channels, as described in
`"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
`"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.

Args:
pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
Expand Down Expand Up @@ -540,7 +540,7 @@ def shufflenet_v2_x2_0(pretrained=False, **kwargs):

def shufflenet_v2_swish(pretrained=False, **kwargs):
"""ShuffleNetV2 with swish activation function, as described in
`"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.
`"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_.

Args:
pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained
Expand Down
28 changes: 14 additions & 14 deletions python/paddle/vision/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,13 @@ def yolo_loss(
box coordinates (w, h), sigmoid cross entropy loss is used for box
coordinates (x, y), objectness loss and classification loss.

Each groud truth box finds a best matching anchor box in all anchors.
Each ground truth box finds a best matching anchor box in all anchors.
Prediction of this anchor box will incur all three parts of losses, and
prediction of anchor boxes with no GT box matched will only incur objectness
loss.

In order to trade off box coordinate losses between big boxes and small
boxes, box coordinate losses will be mutiplied by scale weight, which is
boxes, box coordinate losses will be multiplied by scale weight, which is
calculated as follows.

$$
Expand All @@ -134,10 +134,10 @@ def yolo_loss(
While :attr:`use_label_smooth` is set to be :attr:`True`, the classification
target will be smoothed when calculating classification loss, target of
positive samples will be smoothed to :math:`1.0 - 1.0 / class\_num` and target of
negetive samples will be smoothed to :math:`1.0 / class\_num`.
negative samples will be smoothed to :math:`1.0 / class\_num`.

While :attr:`gt_score` is given, which means the mixup score of ground truth
boxes, all losses incured by a ground truth box will be multiplied by its
boxes, all losses incurred by a ground truth box will be multiplied by its
mixup score.

Args:
Expand All @@ -146,7 +146,7 @@ def yolo_loss(
and the second dimension(C) stores box locations, confidence
score and classification one-hot keys of each anchor box.
The data type is float32 or float64.
gt_box (Tensor): groud truth boxes, should be in shape of [N, B, 4],
gt_box (Tensor): ground truth boxes, should be in shape of [N, B, 4],
in the third dimension, x, y, w, h should be stored.
x,y is the center coordinate of boxes, w, h are the
width and height, x, y, w, h should be divided by
Expand All @@ -163,7 +163,7 @@ def yolo_loss(
ignore_thresh (float): The ignore threshold to ignore confidence loss.
downsample_ratio (int): The downsample ratio from network input to YOLOv3
loss input, so 32, 16, 8 should be set for the
first, second, and thrid YOLOv3 loss operators.
first, second, and third YOLOv3 loss operators.
gt_score (Tensor, optional): mixup score of ground truth boxes, should be in shape
of [N, B]. Default None.
use_label_smooth (bool, optional): Whether to use label smooth. Default True.
Expand Down Expand Up @@ -313,7 +313,7 @@ def yolo_box(
The logistic regression value of the 5th channel of each anchor prediction boxes
represents the confidence score of each prediction box, and the logistic
regression value of the last :attr:`class_num` channels of each anchor prediction
boxes represents the classifcation scores. Boxes with confidence scores less than
boxes represents the classification scores. Boxes with confidence scores less than
:attr:`conf_thresh` should be ignored, and box final scores is the product of
confidence scores and classification scores.

Expand All @@ -340,7 +340,7 @@ def yolo_box(
be ignored.
downsample_ratio (int): The downsample ratio from network input to
:attr:`yolo_box` operator input, so 32, 16, 8
should be set for the first, second, and thrid
should be set for the first, second, and third
:attr:`yolo_box` layer.
clip_bbox (bool, optional): Whether clip output bonding box in :attr:`img_size`
boundary. Default true.
Expand Down Expand Up @@ -1356,7 +1356,7 @@ def decode_jpeg(x, mode='unchanged', name=None):
need for user to set this property. For more information, please
refer to :ref:`api_guide_Name`.
Returns:
Tensor: A decoded image tensor with shape (imge_channels, image_height, image_width)
Tensor: A decoded image tensor with shape (image_channels, image_height, image_width)

Examples:
.. code-block:: python
Expand Down Expand Up @@ -1809,18 +1809,18 @@ def forward(self, x, boxes, boxes_num, aligned=True):

class ConvNormActivation(Sequential):
"""
Configurable block used for Convolution-Normalzation-Activation blocks.
Configurable block used for Convolution-Normalization-Activation blocks.
This code is based on the torchvision code with modifications.
You can also see at https://github.com/pytorch/vision/blob/main/torchvision/ops/misc.py#L68
Args:
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the Convolution-Normalzation-Activation block
out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block
kernel_size: (int|list|tuple, optional): Size of the convolving kernel. Default: 3
stride (int|list|tuple, optional): Stride of the convolution. Default: 1
padding (int|str|tuple|list, optional): Padding added to all four sides of the input. Default: None,
in wich case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
in which case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
norm_layer (Callable[..., paddle.nn.Layer], optional): Norm layer that will be stacked on top of the convolutiuon layer.
norm_layer (Callable[..., paddle.nn.Layer], optional): Norm layer that will be stacked on top of the convolution layer.
If ``None`` this layer wont be used. Default: ``paddle.nn.BatchNorm2D``
activation_layer (Callable[..., paddle.nn.Layer], optional): Activation function which will be stacked on top of the normalization
layer (if not ``None``), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``paddle.nn.ReLU``
Expand Down Expand Up @@ -1887,7 +1887,7 @@ def nms(

If category_idxs and categories are provided, NMS will be performed with a batched style,
which means NMS will be applied to each category respectively and results of each category
will be concated and sorted by scores.
will be concatenated and sorted by scores.

If K is provided, only the first k elements will be returned. Otherwise, all box indices sorted by scores will be returned.

Expand Down
4 changes: 2 additions & 2 deletions python/paddle/vision/transforms/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def hflip(img):
img (PIL.Image|np.array|Tensor): Image to be flipped.

Returns:
PIL.Image|np.array|paddle.Tensor: Horizontall flipped image.
PIL.Image|np.array|paddle.Tensor: Horizontally flipped image.

Examples:
.. code-block:: python
Expand Down Expand Up @@ -966,7 +966,7 @@ def normalize(img, mean, std, data_format='CHW', to_rgb=False):
data_format (str, optional): Data format of input img, should be 'HWC' or
'CHW'. Default: 'CHW'.
to_rgb (bool, optional): Whether to convert to rgb. If input is tensor,
this option will be igored. Default: False.
this option will be ignored. Default: False.

Returns:
PIL.Image|np.array|paddle.Tensor: Normalized mage. Data format is same as input img.
Expand Down
6 changes: 3 additions & 3 deletions python/paddle/vision/transforms/functional_cv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def center_crop(img, output_size):
img (np.array): Image to be cropped. (0,0) denotes the top left corner of the image.
output_size (sequence or int): (height, width) of the crop box. If int,
it is used for both directions
backend (str, optional): The image proccess backend type. Options are `pil`, `cv2`. Default: 'pil'.
backend (str, optional): The image process backend type. Options are `pil`, `cv2`. Default: 'pil'.

Returns:
np.array: Cropped image.
Expand All @@ -269,7 +269,7 @@ def hflip(img):
img (np.array): Image to be flipped.

Returns:
np.array: Horizontall flipped image.
np.array: Horizontally flipped image.

"""
cv2 = try_import('cv2')
Expand Down Expand Up @@ -681,7 +681,7 @@ def to_grayscale(img, num_output_channels=1):


def normalize(img, mean, std, data_format='CHW', to_rgb=False):
"""Normalizes a ndarray imge or image with mean and standard deviation.
"""Normalizes a ndarray image or image with mean and standard deviation.

Args:
img (np.array): input data to be normalized.
Expand Down
6 changes: 3 additions & 3 deletions python/paddle/vision/transforms/functional_pil.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def center_crop(img, output_size):
img (PIL.Image): Image to be cropped. (0,0) denotes the top left corner of the image.
output_size (sequence or int): (height, width) of the crop box. If int,
it is used for both directions
backend (str, optional): The image proccess backend type. Options are `pil`, `cv2`. Default: 'pil'.
backend (str, optional): The image process backend type. Options are `pil`, `cv2`. Default: 'pil'.

Returns:
PIL.Image: Cropped image.
Expand All @@ -292,7 +292,7 @@ def hflip(img):
img (PIL.Image): Image to be flipped.

Returns:
PIL.Image: Horizontall flipped image.
PIL.Image: Horizontally flipped image.

"""

Expand Down Expand Up @@ -520,7 +520,7 @@ def to_grayscale(img, num_output_channels=1):

Args:
img (PIL.Image): Image to be converted to grayscale.
backend (str, optional): The image proccess backend type. Options are `pil`,
backend (str, optional): The image process backend type. Options are `pil`,
`cv2`. Default: 'pil'.

Returns:
Expand Down
6 changes: 3 additions & 3 deletions python/paddle/vision/transforms/functional_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,8 @@ def to_grayscale(img, num_output_channels=1, data_format='CHW'):
"""Converts image to grayscale version of image.

Args:
img (paddel.Tensor): Image to be converted to grayscale.
num_output_channels (int, optionl[1, 3]):
img (paddle.Tensor): Image to be converted to grayscale.
num_output_channels (int, optional[1, 3]):
if num_output_channels = 1 : returned image is single channel
if num_output_channels = 3 : returned image is 3 channel
data_format (str, optional): Data format of img, should be 'HWC' or
Expand Down Expand Up @@ -585,7 +585,7 @@ def hflip(img, data_format='CHW'):
'CHW'. Default: 'CHW'.

Returns:
paddle.Tensor: Horizontall flipped image.
paddle.Tensor: Horizontally flipped image.

"""
_assert_image_tensor(img, data_format)
Expand Down
Loading