From e63a765165be908b35dfebde07b3459fd13613a1 Mon Sep 17 00:00:00 2001
From: Zman <35071129+Atlantisming@users.noreply.github.com>
Date: Wed, 7 Dec 2022 19:05:17 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=E8=8B=B1=E6=96=87AP?=
 =?UTF-8?q?I=E6=96=87=E6=A1=A3=20(#48219)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 修改paddle.nn.dynamic_decode，paddle.nn.functional.diag_embed 示例

* mma qk tensor_core (#48087)

* use mma for QK dot computing in fused_multi_transformer.
* Update fused_multi_transformer_op.cu.h

* remove lrn which is not used in paddle 2.0 (#47945)

* replace scatter_nd and scatter_nd_add with paddle.scatter_nd and (#47960)

paddle.scatter_nd_add

* [PHI] Migrate mul_grad kernel (#48061)

* cleanup unused code

* unify is_int8 is_bfloat16

* Simplify matmul_v2 FWD kernel

* remove RunKernel methods

* remove import namespace

* remove headers

* clean fluid/phi cross imports

* remove fluid axpy_handler

* delete fluid methods

* activations

* OneDNNMemDesc

* MKLDNNFormatForSize

* MatchShapeToLayout

* MKLDNNMemoryFormat

* MKLDNNFormat

* ReorderMKLDNNHandler

* to_void_cast

* review suggestions

* interpolate

* remove fluid depedency

* init

* ExecuteMatMulV2

* rm fluid kernel

* matmul_grad

* remove mutable_data

* mul_grad

* delete unnecessary shape and slice op (#48112)

* 修改英文文档。

* 修改segment operator等英文文档。

* 重新修改了paddle.einsum，paddle.unique_consecutive，
paddle.disable_signal_handler的英文文档格式。

* 重新修改了英文文档格式。;test=docs_preview

* Update extension.py

* 重新修改了英文文档格式。;test=docs_preview

* 重新修改了英文文档格式。
待验收：
- paddle.linalg.svd
- paddle.nn.functional.diag_embed
- paddle.set_grad_enabled
- paddle.disable_signal_handler
- paddle.cumprod
- paddle.devaice.cuda.stream_guard

待修改：
- paddle.nn.dynamic_decode
- paddle.einsum
- paddle.unique_consecutive
- paddle.linalg.svd
- paddle.uncubate.segment_min
- paddle.uncubate.segment_max
- paddle.uncubate.segment_sum
- paddle.uncubate.segment_mean

;test=docs_preview

* 重新修改了英文文档格式。
待验收：
- paddle.linalg.svd
- paddle.nn.functional.diag_embed
- paddle.set_grad_enabled
- paddle.disable_signal_handler
- paddle.cumprod
- paddle.devaice.cuda.stream_guard
- paddle.nn.dynamic_decode
- paddle.unique_consecutive
- paddle.linalg.svd

待修改：
- paddle.einsum
- paddle.incubate.segment_min
- paddle.incubate.segment_max
- paddle.incubate.segment_sum
- paddle.incubate.segment_mean

;test=docs_preview

* 重新修改了英文文档格式。
待验收：
- paddle.linalg.svd
- paddle.nn.functional.diag_embed
- paddle.set_grad_enabled
- paddle.disable_signal_handler
- paddle.cumprod
- paddle.devaice.cuda.stream_guard
- paddle.nn.dynamic_decode
- paddle.unique_consecutive
- paddle.linalg.svd

待修改：
- paddle.einsum
- paddle.incubate.segment_min
- paddle.incubate.segment_max
- paddle.incubate.segment_sum
- paddle.incubate.segment_mean

;test=docs_preview

* update

* test=docs_preview

* update formula; test=docs_preview

* update formula; test=docs_preview

* remove this operator; test=docs_preview

* add hyper link; test=docs_preview

* add default value; test=docs_preview

* update format; test=docs_preview

* empty commit; test=docs_preview

* fix codestyle issues; test=docs_preview

* empty commit; test=docs_preview

Co-authored-by: lzy <569782149@qq.com>
Co-authored-by: Vvsmile <450864116@qq.com>
Co-authored-by: Sławomir Siwek <slawomir.siwek@intel.com>
Co-authored-by: RichardWooSJTU <37864677+RichardWooSJTU@users.noreply.github.com>
Co-authored-by: Ligoml <39876205+Ligoml@users.noreply.github.com>
Co-authored-by: Nyakku Shigure <sigure.qaq@gmail.com>
---
 python/paddle/device/cuda/__init__.py    |  4 +-
 python/paddle/fluid/framework.py         |  3 +-
 python/paddle/fluid/layers/rnn.py        | 25 ++++---
 python/paddle/framework/framework.py     |  3 +
 python/paddle/incubate/tensor/math.py    | 44 +++++++++----
 python/paddle/nn/functional/extension.py | 84 ++++++++++++------------
 python/paddle/tensor/einsum.py           | 31 ++++-----
 python/paddle/tensor/linalg.py           |  9 ++-
 python/paddle/tensor/manipulation.py     | 13 ++--
 python/paddle/tensor/math.py             | 10 ++-
 10 files changed, 131 insertions(+), 95 deletions(-)

diff --git a/python/paddle/device/cuda/__init__.py b/python/paddle/device/cuda/__init__.py
index 316f9de612265..22ef453d08594 100644
--- a/python/paddle/device/cuda/__init__.py
+++ b/python/paddle/device/cuda/__init__.py
@@ -355,8 +355,8 @@ def _set_current_stream(stream):
 @signature_safe_contextmanager
 def stream_guard(stream):
     '''
-    **Notes**:
-        **This API only supports dygraph mode currently.**
+    Notes:
+        This API only supports dynamic graph mode currently.
 
     A context manager that specifies the current stream context by the given stream.
 
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 6aa1a32c55280..da4f609c401ac 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -786,7 +786,8 @@ def disable_signal_handler():
 
     Make sure you called paddle.disable_signal_handler() before using above mentioned frameworks.
 
-    Returns: None
+    Returns:
+        None
 
     Examples:
         .. code-block:: python
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index 6786f04292ba4..e6ad3de9f48f4 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -1805,26 +1805,23 @@ def dynamic_decode(
         **kwargs: Additional keyword arguments. Arguments passed to `decoder.step`.
 
     Returns:
-        tuple: A tuple( :code:`(final_outputs, final_states, sequence_lengths)` ) \
-            when `return_length` is True, otherwise a tuple( :code:`(final_outputs, final_states)` ). \
-            The final outputs and states, both are Tensor or nested structure of Tensor. \
-            `final_outputs` has the same structure and data types as the :code:`outputs` \
-            returned by :code:`decoder.step()` , and each Tenser in `final_outputs` \
-            is the stacked of all decoding steps' outputs, which might be revised \
-            by :code:`decoder.finalize()` if the decoder has implemented `finalize`. \
-            `final_states` is the counterpart at last time step of initial states \
-            returned by :code:`decoder.initialize()` , thus has the same structure \
-            with it and has tensors with same shapes and data types. `sequence_lengths` \
-            is an `int64` tensor with the same shape as `finished` returned \
-            by :code:`decoder.initialize()` , and it stores the actual lengths of \
-            all decoded sequences.
 
+        - final_outputs (Tensor, nested structure of Tensor), each Tensor in :code:`final_outputs` is the stacked of all decoding steps' outputs, which might be revised
+            by :code:`decoder.finalize()` if the decoder has implemented finalize.
+            And :code:`final_outputs` has the same structure and data types as the :code:`outputs`
+            returned by :code:`decoder.step()`
+
+        - final_states (Tensor, nested structure of Tensor), :code:`final_states` is the counterpart at last time step of initial states \
+            returned by :code:`decoder.initialize()` , thus has the same structure
+            with it and has tensors with same shapes and data types.
+
+        - sequence_lengths (Tensor), stores the actual lengths of all decoded sequences.
+            sequence_lengths is provided only if :code:`return_length` is True.
 
     Examples:
 
         .. code-block:: python
 
-            import numpy as np
             import paddle
             from paddle.nn import BeamSearchDecoder, dynamic_decode
             from paddle.nn import GRUCell, Linear, Embedding
diff --git a/python/paddle/framework/framework.py b/python/paddle/framework/framework.py
index d8e1b79c259fb..e3b7519c4f846 100644
--- a/python/paddle/framework/framework.py
+++ b/python/paddle/framework/framework.py
@@ -93,6 +93,9 @@ def set_grad_enabled(mode):
     Args:
         mode(bool): whether to enable (`True`), or disable (`False`) grad.
 
+    Returns:
+        None.
+
     Examples:
         .. code-block:: python
 
diff --git a/python/paddle/incubate/tensor/math.py b/python/paddle/incubate/tensor/math.py
index 21e49135441ca..923f8a590bb52 100644
--- a/python/paddle/incubate/tensor/math.py
+++ b/python/paddle/incubate/tensor/math.py
@@ -31,9 +31,14 @@ def segment_sum(data, segment_ids, name=None):
     r"""
     Segment Sum Operator.
 
-    This operator sums the elements of input `data` which with
+    Sum the elements of input `data` which with
     the same index in `segment_ids`.
-    It computes a tensor such that $out_i = \\sum_{j} data_{j}$
+    It computes a tensor such that
+
+    .. math::
+
+        out_i = \sum_{j \in \{segment\_ids_j == i \} } data_{j}
+
     where sum is over j such that `segment_ids[j] == i`.
 
     Args:
@@ -45,7 +50,7 @@ def segment_sum(data, segment_ids, name=None):
                             For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-       output (Tensor): the reduced result.
+       Tensor, the Segment Sum result.
 
     Examples:
 
@@ -93,11 +98,16 @@ def segment_sum(data, segment_ids, name=None):
 )
 def segment_mean(data, segment_ids, name=None):
     r"""
-    Segment mean Operator.
+    Segment Mean Operator.
 
     Ihis operator calculate the mean value of input `data` which
     with the same index in `segment_ids`.
-    It computes a tensor such that $out_i = \\frac{1}{n_i}  \\sum_{j} data[j]$
+    It computes a tensor such that
+
+    .. math::
+
+        out_i = \mathop{mean}_{j \in \{segment\_ids_j == i \} } data_{j}
+
     where sum is over j such that 'segment_ids[j] == i' and $n_i$ is the number
     of all index 'segment_ids[j] == i'.
 
@@ -110,7 +120,7 @@ def segment_mean(data, segment_ids, name=None):
                             For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-       output (Tensor): the reduced result.
+       Tensor, the Segment Mean result.
 
     Examples:
 
@@ -161,9 +171,14 @@ def segment_min(data, segment_ids, name=None):
     r"""
     Segment min operator.
 
-    This operator calculate the minimum elements of input `data` which with
+    Calculate the minimum elements of input `data` which with
     the same index in `segment_ids`.
-    It computes a tensor such that $out_i = \\min_{j} data_{j}$
+    It computes a tensor such that
+
+    .. math::
+
+        out_i = \min_{j \in \{segment\_ids_j == i \} } data_{j}
+
     where min is over j such that `segment_ids[j] == i`.
 
     Args:
@@ -175,7 +190,7 @@ def segment_min(data, segment_ids, name=None):
                             For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-       output (Tensor): the reduced result.
+       Tensor, the minimum result.
 
     Examples:
 
@@ -227,9 +242,14 @@ def segment_max(data, segment_ids, name=None):
     r"""
     Segment max operator.
 
-    This operator calculate the maximum elements of input `data` which with
+    Calculate the maximum elements of input `data` which with
     the same index in `segment_ids`.
-    It computes a tensor such that $out_i = \\max_{j} data_{j}$
+    It computes a tensor such that
+
+    .. math::
+
+        out_i = \max_{j \in \{segment\_ids_j == i \} } data_{j}
+
     where max is over j such that `segment_ids[j] == i`.
 
     Args:
@@ -241,7 +261,7 @@ def segment_max(data, segment_ids, name=None):
                             For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-       output (Tensor): the reduced result.
+       Tensor, the maximum result.
 
     Examples:
 
diff --git a/python/paddle/nn/functional/extension.py b/python/paddle/nn/functional/extension.py
index f3d906be1f3ed..1a3a719369fff 100644
--- a/python/paddle/nn/functional/extension.py
+++ b/python/paddle/nn/functional/extension.py
@@ -39,7 +39,7 @@
 
 def diag_embed(input, offset=0, dim1=-2, dim2=-1):
     """
-    This OP creates a tensor whose diagonals of certain 2D planes (specified by dim1 and dim2)
+    Creates a tensor whose diagonals of certain 2D planes (specified by dim1 and dim2)
     are filled by ``input``. By default, a 2D plane formed by the last two dimensions
     of the returned tensor will be selected.
 
@@ -61,48 +61,48 @@ def diag_embed(input, offset=0, dim1=-2, dim2=-1):
     Examples:
         .. code-block:: python
 
+            import paddle
             import paddle.nn.functional as F
-            import numpy as np
-
-            diag_embed = np.random.randn(2, 3).astype('float32')
-            # [[ 0.7545889 , -0.25074545,  0.5929117 ],
-            #  [-0.6097662 , -0.01753256,  0.619769  ]]
-
-            data1 = F.diag_embed(diag_embed)
-            data1.numpy()
-            # [[[ 0.7545889 ,  0.        ,  0.        ],
-            #  [ 0.        , -0.25074545,  0.        ],
-            #   [ 0.        ,  0.        ,  0.5929117 ]],
-
-            # [[-0.6097662 ,  0.        ,  0.        ],
-            #  [ 0.        , -0.01753256,  0.        ],
-            #  [ 0.        ,  0.        ,  0.619769  ]]]
-
-            data2 = F.diag_embed(diag_embed, offset=-1, dim1=0, dim2=2)
-            data2.numpy()
-            # [[[ 0.        ,  0.        ,  0.        ,  0.        ],
-            #   [ 0.7545889 ,  0.        ,  0.        ,  0.        ],
-            #   [ 0.        , -0.25074545,  0.        ,  0.        ],
-            #   [ 0.        ,  0.        ,  0.5929117 ,  0.        ]],
-            #
-            #  [[ 0.        ,  0.        ,  0.        ,  0.        ],
-            #   [-0.6097662 ,  0.        ,  0.        ,  0.        ],
-            #   [ 0.        , -0.01753256,  0.        ,  0.        ],
-            #   [ 0.        ,  0.        ,  0.619769  ,  0.        ]]]
-
-            data3 = F.diag_embed(diag_embed, offset=1, dim1=0, dim2=2)
-            data3.numpy()
-            # [[[ 0.        ,  0.7545889 ,  0.        ,  0.        ],
-            #   [ 0.        , -0.6097662 ,  0.        ,  0.        ]],
-            #
-            #  [[ 0.        ,  0.        , -0.25074545,  0.        ],
-            #   [ 0.        ,  0.        , -0.01753256,  0.        ]],
-            #
-            #  [[ 0.        ,  0.        ,  0.        ,  0.5929117 ],
-            #   [ 0.        ,  0.        ,  0.        ,  0.619769  ]],
-            #
-            #  [[ 0.        ,  0.        ,  0.        ,  0.        ],
-            #   [ 0.        ,  0.        ,  0.        ,  0.        ]]]
+
+            diag_embed_input = paddle.arange(6)
+
+            diag_embed_output1 = F.diag_embed(diag_embed_input)
+            print(diag_embed_output1)
+            # Tensor(shape=[6, 6], dtype=int64, place=Place(cpu), stop_gradient=True,
+            #        [[0, 0, 0, 0, 0, 0],
+            #         [0, 1, 0, 0, 0, 0],
+            #         [0, 0, 2, 0, 0, 0],
+            #         [0, 0, 0, 3, 0, 0],
+            #         [0, 0, 0, 0, 4, 0],
+            #         [0, 0, 0, 0, 0, 5]])
+
+            diag_embed_output2 = F.diag_embed(diag_embed_input, offset=-1, dim1=0,dim2=1 )
+            print(diag_embed_output2)
+            # Tensor(shape=[7, 7], dtype=int64, place=Place(cpu), stop_gradient=True,
+            #        [[0, 0, 0, 0, 0, 0, 0],
+            #         [0, 0, 0, 0, 0, 0, 0],
+            #         [0, 1, 0, 0, 0, 0, 0],
+            #         [0, 0, 2, 0, 0, 0, 0],
+            #         [0, 0, 0, 3, 0, 0, 0],
+            #         [0, 0, 0, 0, 4, 0, 0],
+            #         [0, 0, 0, 0, 0, 5, 0]])
+
+            diag_embed_input_2dim = paddle.reshape(diag_embed_input,[2,3])
+            print(diag_embed_input_2dim)
+            # Tensor(shape=[2, 3], dtype=int64, place=Place(cpu), stop_gradient=True,
+            #        [[0, 1, 2],
+            #         [3, 4, 5]])
+            diag_embed_output3 = F.diag_embed(diag_embed_input_2dim,offset= 0, dim1=0, dim2=2 )
+            print(diag_embed_output3)
+            # Tensor(shape=[3, 2, 3], dtype=int64, place=Place(cpu), stop_gradient=True,
+            #        [[[0, 0, 0],
+            #          [3, 0, 0]],
+
+            #         [[0, 1, 0],
+            #          [0, 4, 0]],
+
+            #         [[0, 0, 2],
+            #          [0, 0, 5]]])
     """
     if not isinstance(input, Variable):
         input = assign(input)
diff --git a/python/paddle/tensor/einsum.py b/python/paddle/tensor/einsum.py
index 50718b64409e9..1aad124b2cace 100644
--- a/python/paddle/tensor/einsum.py
+++ b/python/paddle/tensor/einsum.py
@@ -868,7 +868,7 @@ def einsum(equation, *operands):
 
     einsum(equation, *operands)
 
-    The current version of this API should be used in dygraph only mode.
+    The current version of this API should be used in dynamic graph only mode.
 
     Einsum offers a tensor operation API which allows using the Einstein summation
     convention or Einstain notation. It takes as input one or multiple tensors and
@@ -901,20 +901,21 @@ def einsum(equation, *operands):
           dimensions into broadcasting dimensions.
         - Singular labels are called free labels, duplicate are dummy labels. Dummy labeled
           dimensions will be reduced and removed in the output.
-        - Output labels can be explicitly specified on the right hand side of `->` or omitted. In the latter case, the output labels will be inferred from the input labels.
-            - Inference of output labels
-                - Broadcasting label `...`, if present, is put on the leftmost position.
-                - Free labels are reordered alphabetically and put after `...`.
-            - On explicit output labels
-                - If broadcasting is enabled, then `...` must be present.
-                - The output labels can be an empty, an indication to output as a scalar
-                  the sum over the original output.
-                - Non-input labels are invalid.
-                - Duplicate labels are invalid.
-                - For any dummy label which is present for the output, it's promoted to
-                  a free label.
-                - For any free label which is not present for the output, it's lowered to
-                  a dummy label.
+        - Output labels can be explicitly specified on the right hand side of `->` or omitted.
+            In the latter case, the output labels will be inferred from the input labels.
+                - Inference of output labels
+                    - Broadcasting label `...`, if present, is put on the leftmost position.
+                    - Free labels are reordered alphabetically and put after `...`.
+                - On explicit output labels
+                    - If broadcasting is enabled, then `...` must be present.
+                    - The output labels can be an empty, an indication to output as a scalar
+                        the sum over the original output.
+                    - Non-input labels are invalid.
+                    - Duplicate labels are invalid.
+                    - For any dummy label which is present for the output, it's promoted to
+                        a free label.
+                    - For any free label which is not present for the output, it's lowered to
+                        a dummy label.
 
         - Examples
             - '...ij, ...jk', where i and k are free labels, j is dummy. The output label
diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py
index 0ffae882ee5f2..2a3ae8001e743 100644
--- a/python/paddle/tensor/linalg.py
+++ b/python/paddle/tensor/linalg.py
@@ -2030,16 +2030,21 @@ def svd(x, full_matrices=False, name=None):
             where `...` is zero or more batch dimensions. N and M can be arbitraty
             positive number. Note that if x is sigular matrices, the grad is numerical
             instable. The data type of x should be float32 or float64.
-        full_matrices (bool): A flag to control the behavor of svd.
+        full_matrices (bool, optional): A flag to control the behavor of svd.
             If full_matrices = True, svd op will compute full U and V matrics,
             which means shape of U is `[..., N, N]`, shape of V is `[..., M, M]`. K = min(M, N).
             If full_matrices = False, svd op will use a economic method to store U and V.
             which means shape of U is `[..., N, K]`, shape of V is `[..., M, K]`. K = min(M, N).
+            Default value is False.
         name (str, optional): Name for the operation (optional, default is None).
             For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-        Tuple of 3 tensors: (U, S, VH). VH is the conjugate transpose of V. S is the singlar value vectors of matrics with shape `[..., K]`
+        - U (Tensor), is the singular value decomposition result U.
+        - S (Tensor), is the singular value decomposition result S.
+        - VH (Tensor), VH is the conjugate transpose of V, which is the singular value decomposition result V.
+
+        Tuple of 3 tensors(U, S, VH): VH is the conjugate transpose of V. S is the singlar value vectors of matrics with shape `[..., K]`
 
     Examples:
         .. code-block:: python
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 60272630b2199..06a229106347c 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -2278,12 +2278,12 @@ def unique_consecutive(
     dtype="int64",
     name=None,
 ):
-    r"""
+    """
     Eliminates all but the first element from every consecutive group of equivalent elements.
 
     Note:
-        This function is different from :func:`paddle.unique` in the sense that this function
-        only eliminates consecutive duplicate values. This semantics is similar to `std::unique` in C++.
+        This function is different from :ref:`api_paddle_unique` in the sense that this function
+        only eliminates consecutive duplicate values. This semantics is similar to :ref:`api_paddle_unique` in C++.
 
     Args:
         x(Tensor): the input tensor, it's data type should be float32, float64, int32, int64.
@@ -2299,7 +2299,12 @@ def unique_consecutive(
             :ref:`api_guide_Name`. Default is None.
 
     Returns:
-        tuple (out, inverse, counts). `out` is the unique consecutive tensor for `x`. `inverse` is provided only if `return_inverse` is True. `counts` is provided only if `return_counts` is True.
+        - out (Tensor), the unique consecutive tensor for x.
+        - inverse (Tensor), the element of the input tensor corresponds to
+            the index of the elements in the unique consecutive tensor for x.
+            inverse is provided only if return_inverse is True.
+        - counts (Tensor), the counts of the every unique consecutive element in the input tensor.
+            counts is provided only if return_counts is True.
 
     Example:
         .. code-block:: python
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index efa8d7c453b80..883d3c0e3aeb4 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -3449,9 +3449,13 @@ def cumprod(x, dim=None, dtype=None, name=None):
 
     Args:
         x (Tensor): the input tensor need to be cumproded.
-        dim (int): the dimension along which the input tensor will be accumulated. It need to be in the range of [-x.rank, x.rank), where x.rank means the dimensions of the input tensor x and -1 means the last dimension.
-        dtype (str, optional): The data type of the output tensor, can be float32, float64, int32, int64, complex64, complex128. If specified, the input tensor is casted to dtype before the operation is performed. This is useful for preventing data type overflows. The default value is None.
-        name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+        dim (int, optional): the dimension along which the input tensor will be accumulated. It need to be in the range of [-x.rank, x.rank),
+                    where x.rank means the dimensions of the input tensor x and -1 means the last dimension.
+        dtype (str, optional): The data type of the output tensor, can be float32, float64, int32, int64, complex64,
+                    complex128. If specified, the input tensor is casted to dtype before the operation is performed.
+                    This is useful for preventing data type overflows. The default value is None.
+        name (str, optional): Name for the operation (optional, default is None). For more information,
+                    please refer to :ref:`api_guide_Name`.
 
     Returns:
         Tensor, the result of cumprod operator.