From d9fe67259ead828f174e0be719906e3f2d3c2137 Mon Sep 17 00:00:00 2001
From: Lunderberg <Lunderberg@users.noreply.github.com>
Date: Thu, 24 Jun 2021 07:27:29 -0700
Subject: [PATCH] [Docs] Prevented docs/1 file from being generated. (#8029)

* [Docs] Prevented docs/1 file from being generated.

Typo in tests/scripts/task_sphinx_precheck.sh caused $TVM_HOME/docs/1
file to be created with stderr output, rather than merged stderr and
stdout.

* [Docs] Corrected sphinx build warnings

- Previously, several warnings were generated by sphinx, but were
  unintentionally suppressed.  This PR resolves the sphinx warnings.

* [Docs] Corrected additional sphinx build warnings.

- Rebased on main and corrected warnings, now up to date as of commit
  53e4c603.

* [Docs] Corrected additional sphinx build warnings

- Rebased on main and corrected warnings, now up to date as of commit
  1f2ca068c.

* [Docs] Corrected additional sphinx build warnings

- Rebased on main and corrected warnings, now up to date as of commit
  d0791d3db.

* [Docs] Ignore sphinx warnings from missing "git describe" and sckit-learn versions.

Co-authored-by: Eric Lunderberg <elunderberg@octoml.ai>
---
 docs/api/python/index.rst                     |  1 +
 docs/api/python/relay/image.rst               |  1 +
 docs/api/python/relay/index.rst               |  1 +
 docs/api/python/tir.rst                       |  1 +
 docs/api/python/topi.rst                      |  1 +
 docs/dev/device_target_interactions.rst       |  1 +
 docs/dev/index.rst                            | 11 ++++++++
 python/tvm/auto_scheduler/compute_dag.py      |  2 +-
 python/tvm/driver/build_module.py             | 12 ++++-----
 python/tvm/ir/op.py                           | 18 ++++++++-----
 python/tvm/micro/build.py                     | 11 ++++----
 python/tvm/relay/op/transform.py              | 16 ++++++++---
 python/tvm/relay/transform/transform.py       | 26 +++++++++---------
 python/tvm/runtime/ndarray.py                 |  1 +
 python/tvm/runtime/profiling.py               |  2 +-
 python/tvm/te/hybrid/__init__.py              |  2 +-
 python/tvm/te/operation.py                    | 10 +++----
 python/tvm/te/tensor_intrin.py                |  8 +++---
 python/tvm/tir/buffer.py                      |  2 +-
 python/tvm/tir/schedule/block_scope.py        | 17 +++++++-----
 python/tvm/tir/schedule/schedule.py           | 27 ++++++++++++-------
 python/tvm/tir/stmt.py                        |  2 +-
 python/tvm/tir/stmt_functor.py                |  8 +++---
 python/tvm/tir/transform/function_pass.py     |  2 +-
 python/tvm/tir/transform/transform.py         | 20 +++++++++-----
 python/tvm/topi/nn/sparse.py                  | 18 +++++++------
 python/tvm/topi/sparse_reshape.py             | 10 ++++---
 python/tvm/topi/transform.py                  |  1 +
 python/tvm/topi/unique.py                     | 14 +++++-----
 tests/scripts/task_sphinx_precheck.sh         |  7 ++---
 tutorials/frontend/deploy_model_on_rasp.py    |  2 +-
 tutorials/get_started/autotvm_matmul_x86.py   |  8 +++---
 tutorials/get_started/install.py              |  1 +
 tutorials/get_started/relay_quick_start.py    |  9 ++++---
 .../get_started/tensor_expr_get_started.py    | 24 ++++++++++-------
 vta/tutorials/autotvm/tune_alu_vta.py         |  1 +
 36 files changed, 185 insertions(+), 113 deletions(-)

diff --git a/docs/api/python/index.rst b/docs/api/python/index.rst
index 76322a1acfe2..62741548d3da 100644
--- a/docs/api/python/index.rst
+++ b/docs/api/python/index.rst
@@ -18,6 +18,7 @@
 Python API
 ==========
 
+
 .. toctree::
    :maxdepth: 2
 
diff --git a/docs/api/python/relay/image.rst b/docs/api/python/relay/image.rst
index efd142f205a7..3e5162718441 100644
--- a/docs/api/python/relay/image.rst
+++ b/docs/api/python/relay/image.rst
@@ -22,4 +22,5 @@ tvm.relay.image
 .. automodule:: tvm.relay.image
    :members:
    :imported-members:
+   :exclude-members: Expr, Constant
    :autosummary:
diff --git a/docs/api/python/relay/index.rst b/docs/api/python/relay/index.rst
index 084b745ffb20..399bba3e5b11 100644
--- a/docs/api/python/relay/index.rst
+++ b/docs/api/python/relay/index.rst
@@ -26,4 +26,5 @@ tvm.relay
       TypeVar, GlobalTypeVar, TypeConstraint, FuncType, TupleType, IncompleteType,
       TypeCall, TypeRelation, TensorType, RelayRefType, GlobalVar, SourceName,
       Span, Var, Op, Constructor
+    :noindex: TypeData
     :autosummary:
diff --git a/docs/api/python/tir.rst b/docs/api/python/tir.rst
index 9f2581b8c0a8..b0b8f1cff5fb 100644
--- a/docs/api/python/tir.rst
+++ b/docs/api/python/tir.rst
@@ -37,6 +37,7 @@ tvm.tir.analysis
 .. automodule:: tvm.tir.analysis
    :members:
    :imported-members:
+   :noindex: Buffer, Stmt
    :autosummary:
 
 
diff --git a/docs/api/python/topi.rst b/docs/api/python/topi.rst
index f62509f571e3..c77b9eae89d4 100644
--- a/docs/api/python/topi.rst
+++ b/docs/api/python/topi.rst
@@ -20,6 +20,7 @@ tvm.topi
 .. automodule:: tvm.topi
    :members:
    :imported-members:
+   :noindex: AssertStmt
    :autosummary:
 
 tvm.topi.nn
diff --git a/docs/dev/device_target_interactions.rst b/docs/dev/device_target_interactions.rst
index e5fa708434fb..9c391d31bec0 100644
--- a/docs/dev/device_target_interactions.rst
+++ b/docs/dev/device_target_interactions.rst
@@ -15,6 +15,7 @@
     specific language governing permissions and limitations
     under the License.
 
+
 .. _tvm-target-specific-overview:
 
 Device/Target Interactions
diff --git a/docs/dev/index.rst b/docs/dev/index.rst
index 873af9c6a3b7..b4fb37d790f4 100644
--- a/docs/dev/index.rst
+++ b/docs/dev/index.rst
@@ -29,6 +29,10 @@ This page is organized as follows:
   The sections after are specific guides focused on each logical component, organized
   by the component's name.
 
+- The :ref:`Device/Target Interactions <tvm-target-specific-overview>`
+  page describes how TVM interacts with each supported physical device
+  and code-generation target.
+
 - Feel free to also check out the :ref:`dev-how-to` for useful development tips.
 
 This guide provides a few complementary views of the architecture.
@@ -244,11 +248,18 @@ for learning-based optimizations.
    :maxdepth: 1
 
    runtime
+
+
+.. toctree::
+   :maxdepth: 1
+
    debugger
    virtual_machine
    introduction_to_module_serialization
    device_target_interactions
 
+
+
 tvm/node
 --------
 The node module adds additional features on top of the `runtime::Object` for IR data structures.
diff --git a/python/tvm/auto_scheduler/compute_dag.py b/python/tvm/auto_scheduler/compute_dag.py
index 948f277034db..f7a5f39c829a 100755
--- a/python/tvm/auto_scheduler/compute_dag.py
+++ b/python/tvm/auto_scheduler/compute_dag.py
@@ -96,7 +96,7 @@ class ComputeDAG(Object):
 
     Parameters
     ----------
-    compute : Union[List[Tensor], str, Schedule]
+    compute : Union[List[Tensor], str, tvm.te.Schedule]
         Input/output tensors or workload key for a compute declaration.
     """
 
diff --git a/python/tvm/driver/build_module.py b/python/tvm/driver/build_module.py
index a4df63f225b2..a9e07299f6dd 100644
--- a/python/tvm/driver/build_module.py
+++ b/python/tvm/driver/build_module.py
@@ -98,17 +98,17 @@ def lower(
 
     Parameters
     ----------
-    inputs : Union[schedule.Schedule, PrimFunc, IRModule]
+    inp : Union[tvm.te.schedule.Schedule, tvm.tir.PrimFunc, IRModule]
         The TE schedule or TensorIR PrimFunc/IRModule to be built
 
-    args : Optional[List[Union[Buffer, tensor.Tensor, Var]]]
+    args : Optional[List[Union[tvm.tir.Buffer, tensor.Tensor, Var]]]
         The argument lists to the function for TE schedule.
         It should be None if we want to lower TensorIR.
 
     name : str
         The name of result function.
 
-    binds : Optional[Mapping[tensor.Tensor, Buffer]]
+    binds : Optional[Mapping[tensor.Tensor, tvm.tir.Buffer]]
         Dictionary that maps the Tensor to Buffer which specified the data layout
         requirement of the function. By default, a new compact buffer is created
         for each tensor in the argument.
@@ -233,10 +233,10 @@ def build(
 
     Parameters
     ----------
-    inputs : Union[schedule.Schedule, PrimFunc, IRModule, Mapping[str, IRModule]]
+    inputs : Union[tvm.te.schedule.Schedule, tvm.tir.PrimFunc, IRModule, Mapping[str, IRModule]]
         The input to be built
 
-    args : Optional[List[Union[Buffer, tensor.Tensor, Var]]]
+    args : Optional[List[Union[tvm.tir.Buffer, tensor.Tensor, Var]]]
         The argument lists to the function.
 
     target : Optional[Union[str, Target]]
@@ -254,7 +254,7 @@ def build(
     name : Optional[str]
         The name of result function.
 
-    binds : Optional[Mapping[tensor.Tensor, Buffer]]
+    binds : Optional[Mapping[tensor.Tensor, tvm.tir.Buffer]]
         Dictionary that maps the binding of symbolic buffer to Tensor.
         By default, a new buffer is created for each tensor in the argument.
 
diff --git a/python/tvm/ir/op.py b/python/tvm/ir/op.py
index 1a2854615f59..2a62f34c934f 100644
--- a/python/tvm/ir/op.py
+++ b/python/tvm/ir/op.py
@@ -96,14 +96,20 @@ def add_type_rel(self, rel_name, type_rel_func=None):
         type_rel_func : Optional[function (args: List[Type], attrs: Attrs) -> Type]
             The backing relation function which can solve an arbitrary relation on variables.
             Differences with type_rel_func in C++:
-            1, when type_rel_func is not None:
-               1) OpAddTypeRel on C++ side will adjust type_rel_func with TypeReporter to
+
+            1) When type_rel_func is not None
+
+               a) OpAddTypeRel on C++ side will adjust type_rel_func with TypeReporter to
                   calling convention of relay type system.
-               2) type_rel_func returns output argument's type, return None means can't
+
+               b) type_rel_func returns output argument's type, return None means can't
                   infer output's type.
-               3) only support single output operators for now, the last argument is output tensor.
-            2, when type_rel_func is None, will call predefined type_rel_funcs in relay
-               accorrding to `tvm.relay.type_relation.` + rel_name.
+
+               c) only support single output operators for now, the last argument is output tensor.
+
+            2) when type_rel_func is None, will call predefined type_rel_funcs in relay
+                   according to ``tvm.relay.type_relation.`` + rel_name.
+
         """
         _ffi_api.OpAddTypeRel(self, rel_name, type_rel_func)
 
diff --git a/python/tvm/micro/build.py b/python/tvm/micro/build.py
index 694aebe6f1ed..a83ccaa47cda 100644
--- a/python/tvm/micro/build.py
+++ b/python/tvm/micro/build.py
@@ -158,11 +158,12 @@ def default_options(crt_config_include_dir, standalone_crt_dir=None):
     Dict :
         A dictionary containing 3 subkeys, each whose value is _build_default_compiler_options()
         plus additional customization.
-         - "bin_opts" - passed as "options" to Compiler.binary() when building MicroBinary.
-         - "lib_opts" - passed as "options" to Compiler.library() when building bundled CRT
-           libraries (or otherwise, non-generated libraries).
-         - "generated_lib_opts" - passed as "options" to Compiler.library() when building the
-           generated library.
+
+        - "bin_opts" - passed as "options" to Compiler.binary() when building MicroBinary.
+        - "lib_opts" - passed as "options" to Compiler.library() when building bundled CRT
+          libraries (or otherwise, non-generated libraries).
+        - "generated_lib_opts" - passed as "options" to Compiler.library() when building the
+          generated library.
     """
     bin_opts = _build_default_compiler_options(standalone_crt_dir)
     bin_opts["include_dirs"].append(crt_config_include_dir)
diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py
index 9cb50ed6548a..2c299022bd6e 100644
--- a/python/tvm/relay/op/transform.py
+++ b/python/tvm/relay/op/transform.py
@@ -1373,25 +1373,32 @@ def sparse_fill_empty_rows(sparse_indices, sparse_values, dense_shape, default_v
     Fill rows in a sparse matrix that do no contain any values. Values are placed in the first
     column of empty rows. The sparse array is in COO format.
     It returns a TupleWrapper with 3 outputs
+
     Parameters
     ----------
     sparse_indices : relay.Expr
         A 2-D tensor[N, ndims] of integers containing location of sparse values, where N is
         the number of sparse values and n_dim is the number of dimensions of the dense_shape.
         The first column of this relay parameter must be sorted in ascending order.
+
     sparse_values : relay.Expr
         A 1-D tensor[N] containing the sparse values for the sparse indices.
+
     dense_shape : relay.Expr
         A 1-D tensor[ndims] which contains shape of the dense output tensor.
+
     default_value : relay.Expr
         A 1-D tensor[1] containing the default value for the remaining locations.
+
     Returns
     -------
     new_sparse_indices : relay.Expr
         A 2-D tensor[?, ndims] of integers containing location of new sparse
         indices. The first column outputs must be sorted in ascending order.
+
     new_sparse_values : relay.Expr
         A 1-D tensor[?] containing the sparse values for the sparse indices.
+
     empty_row_indicator : relay.Expr
         A 1-D tensor[dense_shape[0]] filled with zeros and ones
         indicating whether the particular row is empty or full respectively
@@ -1702,18 +1709,18 @@ def unique(data, is_sorted=True, return_counts=False):
     .. code-block:: python
 
         [output, indices, num_unique] = unique([4, 5, 1, 2, 3, 3, 4, 5], False, False)
-        output         =  [4, 5, 1, 2, 3, ?, ?, ?]
+        output         =  [4, 5, 1, 2, 3, _, _, _]
         indices        =  [0, 1, 2, 3, 4, 4, 0, 1]
         num_unique     =  [5]
 
         [output, indices, num_unique, counts] = unique([4, 5, 1, 2, 3, 3, 4, 5], False, True)
-        output         =  [4, 5, 1, 2, 3, ?, ?, ?]
+        output         =  [4, 5, 1, 2, 3, _, _, _]
         indices        =  [0, 1, 2, 3, 4, 4, 0, 1]
         num_unique     =  [5]
-        counts         =  [2, 2, 1, 1, 2, ?, ?, ?]
+        counts         =  [2, 2, 1, 1, 2, _, _, _]
 
         [output, indices, num_unique] = unique([4, 5, 1, 2, 3, 3, 4, 5], True)
-        output         =  [1, 2, 3, 4, 5, ?, ?, ?]
+        output         =  [1, 2, 3, 4, 5, _, _, _]
         indices        =  [3, 4, 0, 1, 2, 2, 3, 4]
         num_unique     =  [5]
     """
@@ -1744,6 +1751,7 @@ def invert_permutation(data):
     Examples
     --------
     .. code-block:: python
+
         data = [3, 4, 0, 2, 1]
         relay.invert_permutation(data) = [2, 4, 3, 0, 1]
     """
diff --git a/python/tvm/relay/transform/transform.py b/python/tvm/relay/transform/transform.py
index fa7f4c4db644..ad383055dbce 100644
--- a/python/tvm/relay/transform/transform.py
+++ b/python/tvm/relay/transform/transform.py
@@ -1177,18 +1177,20 @@ def FakeQuantizationToInteger():
     """
     Find regions of the graph of the form
 
-    x    w
-    |    |
-    dq   dq
-     \   /
-      op1
-       |
-      op2
-       |
-       q
-
-    where q == qnn.quantize and dq = qnn.dequantize
-    and rewrite them into integer versions of op1 and op2
+    .. code-block:: text
+
+        x    w
+        |    |
+        dq   dq
+         \   /
+          op1
+           |
+          op2
+           |
+           q
+
+    where ``q == qnn.quantize`` and ``dq = qnn.dequantize``
+    and rewrite them into integer versions of ``op1`` and ``op2``
 
     Rules for rewriting indivdual ops are in fake_quantization_to_integer.py
 
diff --git a/python/tvm/runtime/ndarray.py b/python/tvm/runtime/ndarray.py
index 5a7acf0d6c30..27811a963993 100644
--- a/python/tvm/runtime/ndarray.py
+++ b/python/tvm/runtime/ndarray.py
@@ -390,6 +390,7 @@ def gpu(dev_id=0):
 
         deprecated:: 0.9.0
         Use :py:func:`tvm.cuda` instead.
+
     Parameters
     ----------
     dev_id : int, optional
diff --git a/python/tvm/runtime/profiling.py b/python/tvm/runtime/profiling.py
index caa1117944c9..5a1cd6796b64 100644
--- a/python/tvm/runtime/profiling.py
+++ b/python/tvm/runtime/profiling.py
@@ -26,7 +26,7 @@
 class Report(Object):
     """A container for information gathered during a profiling run.
 
-    Fields
+    Attributes
     ----------
     calls : Array[Dict[str, Object]]
         Per-call profiling metrics (function name, runtime, device, ...).
diff --git a/python/tvm/te/hybrid/__init__.py b/python/tvm/te/hybrid/__init__.py
index 3a10bda29a7d..9530f0d0ae36 100644
--- a/python/tvm/te/hybrid/__init__.py
+++ b/python/tvm/te/hybrid/__init__.py
@@ -74,7 +74,7 @@ def build(sch, inputs, outputs, name="hybrid_func"):
 
     Parameters
     ----------
-    sch: Schedule
+    sch: tvm.te.Schedule
         The schedule to be dumped
 
     inputs: An array of Tensors or Vars
diff --git a/python/tvm/te/operation.py b/python/tvm/te/operation.py
index 52eb591c48d4..6af3429b3eef 100644
--- a/python/tvm/te/operation.py
+++ b/python/tvm/te/operation.py
@@ -226,12 +226,12 @@ def extern(
         .. note::
              **Parameters**
 
-             - **ins** (list of :any:`Buffer`) - Placeholder for each inputs
-             - **outs** (list of :any:`Buffer`) - Placeholder for each outputs
+             - **ins** (list of :any:`tvm.tir.Buffer`) - Placeholder for each inputs
+             - **outs** (list of :any:`tvm.tir.Buffer`) - Placeholder for each outputs
 
              **Returns**
 
-             - **stmt** (:any:`Stmt`) - The statement that carries out array computation.
+             - **stmt** (:any:`tvm.tir.Stmt`) - The statement that carries out array computation.
 
     name: str, optional
         The name hint of the tensor
@@ -240,10 +240,10 @@ def extern(
         The data types of outputs,
         by default dtype will be same as inputs.
 
-    in_buffers: Buffer or list of Buffer, optional
+    in_buffers: tvm.tir.Buffer or list of tvm.tir.Buffer, optional
         Input buffers.
 
-    out_buffers: Buffer or list of Buffers, optional
+    out_buffers: tvm.tir.Buffer or list of tvm.tir.Buffer, optional
         Output buffers.
 
 
diff --git a/python/tvm/te/tensor_intrin.py b/python/tvm/te/tensor_intrin.py
index 79b2db5d94a8..ff633af02d13 100644
--- a/python/tvm/te/tensor_intrin.py
+++ b/python/tvm/te/tensor_intrin.py
@@ -82,12 +82,12 @@ def decl_tensor_intrin(
         .. note::
              **Parameters**
 
-             - **ins** (list of :any:`Buffer`) - Placeholder for each inputs
-             - **outs** (list of :any:`Buffer`) - Placeholder for each outputs
+             - **ins** (list of :any:`tvm.tir.Buffer`) - Placeholder for each inputs
+             - **outs** (list of :any:`tvm.tir.Buffer`) - Placeholder for each outputs
 
              **Returns**
 
-             - **stmt** (:any:`Stmt`, or tuple of three stmts)
+             - **stmt** (:any:`tvm.tir.Stmt`, or tuple of three stmts)
              - If a single stmt is returned, it represents the body
              - If tuple of three stmts are returned they corresponds to body,
                reduce_init, reduce_update
@@ -95,7 +95,7 @@ def decl_tensor_intrin(
     name: str, optional
         The name of the intrinsic.
 
-    binds: dict of :any:`Tensor` to :any:`Buffer`, optional
+    binds: dict of :any:`Tensor` to :any:`tvm.tir.Buffer`, optional
         Dictionary that maps the Tensor to Buffer which specified the data layout
         requirement of the function. By default, a new compact buffer is created
         for each tensor in the argument.
diff --git a/python/tvm/tir/buffer.py b/python/tvm/tir/buffer.py
index 267f0151a91b..d905a53b3303 100644
--- a/python/tvm/tir/buffer.py
+++ b/python/tvm/tir/buffer.py
@@ -198,7 +198,7 @@ def decl_buffer(
 
     Returns
     -------
-    buffer : Buffer
+    buffer : tvm.tir.Buffer
         The created buffer
 
     Example
diff --git a/python/tvm/tir/schedule/block_scope.py b/python/tvm/tir/schedule/block_scope.py
index 82814521785d..cd0821ee0f24 100644
--- a/python/tvm/tir/schedule/block_scope.py
+++ b/python/tvm/tir/schedule/block_scope.py
@@ -109,15 +109,18 @@ class Dependency(Object):
 
 @register_object("tir.BlockScope")
 class BlockScope(Object):
-    """An object corresponds to each block sref in the sref tree,
-       which tracks the producer-consumer dependency between blocks.
+    """An object corresponds to each block sref in the sref tree, which
+    tracks the producer-consumer dependency between blocks.
 
     Glossary:
-    - Block scope: A contiguous subtree of the sref tree, rooted at each block sref,
-    whose components are:
-        - scope root: a block sref
-        - internal srefs: loop srefs
-        - scope leaves: block srefs
+
+    - Block scope: A contiguous subtree of the sref tree, rooted at
+      each block sref, whose components are:
+
+      - scope root: a block sref
+      - internal srefs: loop srefs
+      - scope leaves: block srefs
+
     - Child block: The scope leaf blocks under the scope root or a specific internal sref
     """
 
diff --git a/python/tvm/tir/schedule/schedule.py b/python/tvm/tir/schedule/schedule.py
index 9452f5ab72ee..960c07f27feb 100644
--- a/python/tvm/tir/schedule/schedule.py
+++ b/python/tvm/tir/schedule/schedule.py
@@ -260,12 +260,16 @@ def get_loops(self, block: BlockRV) -> List[LoopRV]:
     ########## Schedule: compute location ##########
     def compute_inline(self, block: BlockRV) -> None:
         """Inline a block into its consumer(s). It requires:
+
         1) The block is a complete non-root block, which only produces one buffer
+
         2) The block must not be the only leaf in the scope.
-        3) The body of the block must be a BufferStore statement in the form of,
-            A[i, j, k, ...] = ...
-        where the indices of the LHS are all distinct atomic variables,
-        and no variables other than those indexing variables are allowed in the statement.
+
+        3) The body of the block must be a BufferStore statement in
+           the form of, ``A[i, j, k, ...] = ...`` where the indices of
+           the LHS are all distinct atomic variables, and no variables
+           other than those indexing variables are allowed in the
+           statement.
 
         Parameters
         ----------
@@ -313,14 +317,19 @@ def after_inline(a: ty.handle, c: ty.handle) -> None:
 
     def reverse_compute_inline(self, block: BlockRV) -> None:
         """Inline a block into its only producer. It requires:
+
         1) The block is a complete non-root block, which only produces and consumes one buffer
+
         2) The block must not be the only leaf in the scope.
-        3) The only producer of the block is a read-after-write producer
-        and a complete non-root block
+
+        3) The only producer of the block is a read-after-write producer and a
+           complete non-root block
+
         4) The body of the block must be a BufferStore statement in the form of,
-            B[f(i, j, k, ...)] = g(i, j, k, A[i, j, k, ...] ...)
-        where the indices of each `BufferLoad` on the RHS are all distinct atomic variables,
-        and no variables other than those indexing variables are allowed in the statement.
+           ``B[f(i, j, k, ...)] = g(i, j, k, A[i, j, k, ...] ...)`` where the
+           indices of each `BufferLoad` on the RHS are all distinct atomic
+           variables, and no variables other than those indexing variables are
+           allowed in the statement.
 
         Parameters
         ----------
diff --git a/python/tvm/tir/stmt.py b/python/tvm/tir/stmt.py
index 46f456cd760a..dd7665a56692 100644
--- a/python/tvm/tir/stmt.py
+++ b/python/tvm/tir/stmt.py
@@ -77,7 +77,7 @@ class AssertStmt(Stmt):
     message : PrimExpr
         The error message.
 
-    body : Stmt
+    body : tvm.tir.Stmt
         The body statement.
 
     span : Optional[Span]
diff --git a/python/tvm/tir/stmt_functor.py b/python/tvm/tir/stmt_functor.py
index f1e64ba0d1ce..4ec755cdf922 100644
--- a/python/tvm/tir/stmt_functor.py
+++ b/python/tvm/tir/stmt_functor.py
@@ -23,13 +23,13 @@ def ir_transform(stmt, preorder, postorder, only_enable=None):
 
     Parameters
     ----------
-    stmt : Stmt
+    stmt : tvm.tir.Stmt
         The input to be transformed.
 
     preorder: function
         The function called in before recursive mutation
         If preorder returns None, then the transform will proceed to recursive call.
-        If preorder returns a not None Stmt/Expr, the transformer will simply return it and
+        If preorder returns a not None tvm.tir.Stmt/Expr, the transformer will simply return it and
         won't do further recursion.
 
     postorder : function
@@ -40,7 +40,7 @@ def ir_transform(stmt, preorder, postorder, only_enable=None):
 
     Returns
     -------
-    result : Stmt
+    result : tvm.tir.Stmt
         The result.
     """
     return _ffi_api.IRTransform(stmt, preorder, postorder, only_enable)
@@ -71,7 +71,7 @@ def substitute(node, vmap):
 
     Returns
     -------
-    result : Stmt
+    result : tvm.tir.Stmt
         The result.
     """
     return _ffi_api.Substitute(node, vmap)
diff --git a/python/tvm/tir/transform/function_pass.py b/python/tvm/tir/transform/function_pass.py
index 7cff1f66a625..374e731725be 100644
--- a/python/tvm/tir/transform/function_pass.py
+++ b/python/tvm/tir/transform/function_pass.py
@@ -70,7 +70,7 @@ def prim_func_pass(pass_func=None, opt_level=None, name=None, required=None):
 
     Parameters
     ----------
-    pass_func : Optional[Callable[(PrimFunc, IRModule, PassContext) -> PrimFunc]]
+    pass_func : Optional[Callable[(tvm.tir.PrimFunc, IRModule, PassContext) -> tvm.tir.PrimFunc]]
         The transformation function or class.
 
     opt_level : int
diff --git a/python/tvm/tir/transform/transform.py b/python/tvm/tir/transform/transform.py
index 26b22f99c215..8a32a7e6dff0 100644
--- a/python/tvm/tir/transform/transform.py
+++ b/python/tvm/tir/transform/transform.py
@@ -587,12 +587,16 @@ def ConvertBlocksToOpaque():
 
 
 def CompactBufferAllocation():
-    """Compact the buffer access region. by removing the buffer regions that are not accessed,
-    i.e. narrowing the buffer shape and adjust the access region if necessary.
+    """Compact the buffer access region. by removing the buffer regions
+    that are not accessed, i.e. narrowing the buffer shape and adjust
+    the access region if necessary.
 
     Example
     -------
-    Before narrowing, `B` is a `[16, 16]` buffer, but only a skinny vector `B[i, 0:16]` is accessed.
+
+    Before narrowing, ``B`` is a ``[16, 16]`` buffer, but only a
+    skinny vector ``B[i, 0:16]`` is accessed.
+
     .. code-block:: python
 
         for i in range(0, 16):
@@ -602,9 +606,12 @@ def CompactBufferAllocation():
                     B[i, j] = A[i, j] + 1
                 for j in range(0, 16):
                     C[i, j] = B[i, j] + 1
-    This pass narrows the buffer shape and adjust its accessed region accordingly.
-    In this particular case, because only a `1 * 16` vector of `B` is accessed,
-    the pass narrows `B` to shape `[1, 16]`, and changes the access to `B[i, j]` to `B[0, j]`.
+
+    This pass narrows the buffer shape and adjust its accessed region
+    accordingly.  In this particular case, because only a ``1 * 16``
+    vector of ``B`` is accessed, the pass narrows ``B`` to shape ``[1,
+    16]``, and changes the access to ``B[i, j]`` to ``B[0, j]``.
+
     .. code-block:: python
 
         for i in range(0, 16):
@@ -619,6 +626,7 @@ def CompactBufferAllocation():
     -------
     fpass : tvm.transform.Pass
         The result pass
+
     """
     return _ffi_api.CompactBufferAllocation()
 
diff --git a/python/tvm/topi/nn/sparse.py b/python/tvm/topi/nn/sparse.py
index 511ed195101c..73998db6f162 100644
--- a/python/tvm/topi/nn/sparse.py
+++ b/python/tvm/topi/nn/sparse.py
@@ -568,24 +568,26 @@ def _compute_block(i, nb_j, j, h, w):  # pylint: disable=C0103
 
 def sparse_conv2d(dense_data, sparse_data, sparse_indices, sparse_indptr, layout="NHWC"):
     """
-    Computes sparse-conv2d(1*1) of `data` and
-    `(weight_data, weight_indices, weight_indptr)
+    Computes sparse-conv2d(1*1) of ``data`` and
+    ``(weight_data, weight_indices, weight_indptr)``
 
     Parameters
     ----------
     dense_data : tvm.te.Tensor
-        4-D with shape [M, H, W, K] (layout=NHWC)
-        4-D with shape [M, K, H, W] (layout=NCHW)
+        4-D with shape ``[M, H, W, K]`` (layout=NHWC)
+
+        4-D with shape ``[M, K, H, W]`` (layout=NCHW)
 
     sparse_data : tvm.te.Tensor
-        2-D with shape [num_blocks, bs_r] (BSR)
-        3-D with shape [num_blocks, bs_r, bs_c] (BSR)
+        2-D with shape ``[num_blocks, bs_r]`` (BSR)
+
+        3-D with shape ``[num_blocks, bs_r, bs_c]`` (BSR)
 
     sparse_indices : tvm.te.Tensor
-        1-D with shape [num_blocks] (BSR)
+        1-D with shape ``[num_blocks]`` (BSR)
 
     sparse_indptr : tvm.te.Tensor
-        1-D with shape [(N + 1) // bs_r] (BSR)
+        1-D with shape ``[(N + 1) // bs_r]`` (BSR)
 
     layout : str
         layout of data
diff --git a/python/tvm/topi/sparse_reshape.py b/python/tvm/topi/sparse_reshape.py
index f2c0a2928b93..b25bd854a7f9 100644
--- a/python/tvm/topi/sparse_reshape.py
+++ b/python/tvm/topi/sparse_reshape.py
@@ -29,19 +29,24 @@ def sparse_reshape(
 ):
     """
     Reshape a Sparse Tensor
+
     Parameters
     ----------
     sparse_indices : relay.Expr
         A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
         number of sparse values and n_dim is the number of dimensions of the dense_shape
+
     prev_shape : relay.Expr
         A 1-D tensor containing the previous shape of the dense tensor
+
     new_shape : relay.Expr
         A 1-D tensor containing the new shape of the dense tensor
+
     Returns
     -------
     result: relay.Expr
         Output tensor.
+
     Examples
     --------
     .. code-block:: python
@@ -53,9 +58,8 @@ def sparse_reshape(
                             [1, 2, 3]]
         prev_shape = [2, 3, 4]
         new_shape = [9, -1]
-        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
-                            prev_shape,
-                            new_shape)
+        new_sparse_indices, new_shape = relay.sparse_reshape(
+            sparse_indices, prev_shape, new_shape)
         new_sparse_indices = [[0, 0],
                               [0, 1],
                               [1, 2],
diff --git a/python/tvm/topi/transform.py b/python/tvm/topi/transform.py
index 45756eadbcdb..006a6e785091 100644
--- a/python/tvm/topi/transform.py
+++ b/python/tvm/topi/transform.py
@@ -961,6 +961,7 @@ def invert_permutation(data):
     Examples
     --------
     .. code-block:: python
+
         data = [3, 4, 0, 2, 1]
         topi.invert_permutation(data) = [2, 4, 3, 0, 1]
     """
diff --git a/python/tvm/topi/unique.py b/python/tvm/topi/unique.py
index 5aeadc541e29..b168bf01b0ec 100644
--- a/python/tvm/topi/unique.py
+++ b/python/tvm/topi/unique.py
@@ -245,21 +245,21 @@ def unique(data, is_sorted=True, return_counts=False):
     .. code-block:: python
 
         [output, indices, num_unique] = unique([4, 5, 1, 2, 3, 3, 4, 5], False, False)
-        output          =  [4, 5, 1, 2, 3, ?, ?, ?]
-        indices         =  [0, 1, 2, 3, 4, ?, ?, ?]
+        output          =  [4, 5, 1, 2, 3, _, _, _]
+        indices         =  [0, 1, 2, 3, 4, _, _, _]
         inverse_indices =  [0, 1, 2, 3, 4, 4, 0, 1]
         num_unique      =  [5]
 
         [output, indices, num_unique, counts] = unique([4, 5, 1, 2, 3, 3, 4, 5], False, True)
-        output          =  [4, 5, 1, 2, 3, ?, ?, ?]
-        indices         =  [0, 1, 2, 3, 4, ?, ?, ?]
+        output          =  [4, 5, 1, 2, 3, _, _, _]
+        indices         =  [0, 1, 2, 3, 4, _, _, _]
         inverse_indices =  [0, 1, 2, 3, 4, 4, 0, 1]
         num_unique      =  [5]
-        counts          =  [2, 2, 1, 1, 2, ?, ?, ?]
+        counts          =  [2, 2, 1, 1, 2, _, _, _]
 
         [output, indices, num_unique] = unique([4, 5, 1, 2, 3, 3, 4, 5], True)
-        output          =  [1, 2, 3, 4, 5, ?, ?, ?]
-        indices         =  [2, 3, 4, 0, 1, ?, ?, ?]
+        output          =  [1, 2, 3, 4, 5, _, _, _]
+        indices         =  [2, 3, 4, 0, 1, _, _, _]
         inverse_indices =  [3, 4, 0, 1, 2, 2, 3, 4]
         num_unique      =  [5]
     """
diff --git a/tests/scripts/task_sphinx_precheck.sh b/tests/scripts/task_sphinx_precheck.sh
index 894f7471bde4..31fc663082f1 100755
--- a/tests/scripts/task_sphinx_precheck.sh
+++ b/tests/scripts/task_sphinx_precheck.sh
@@ -36,14 +36,15 @@ make cython3
 echo "PreCheck sphinx doc generation WARNINGS.."
 cd docs
 make clean
-TVM_TUTORIAL_EXEC_PATTERN=none make html 2>1 | tee /tmp/$$.log.txt
+TVM_TUTORIAL_EXEC_PATTERN=none make html 2>&1 | tee /tmp/$$.log.txt
 
-grep -v -E "__mro__|UserWarning|FutureWarning|tensorflow|Keras|pytorch|TensorFlow|403" < /tmp/$$.log.txt > /tmp/$$.logclean.txt || true
+grep -v -E "__mro__|UserWarning|FutureWarning|tensorflow|Keras|pytorch|TensorFlow|403|git describe|scikit-learn version" < /tmp/$$.log.txt > /tmp/$$.logclean.txt || true
 echo "---------Sphinx Log----------"
 cat /tmp/$$.logclean.txt
 echo "-----------------------------"
 if grep --quiet -E "WARN" < /tmp/$$.logclean.txt; then
-    echo "WARNINIG found in the log, please fix them."
+    echo "WARNING found in the log, please fix them."
+    grep -E "WARN" < /tmp/$$.logclean.txt
     echo "You can reproduce locally by running ./tests/scripts/task_sphinx_precheck.sh"
     exit 1
 fi
diff --git a/tutorials/frontend/deploy_model_on_rasp.py b/tutorials/frontend/deploy_model_on_rasp.py
index 58e9c55de2c7..de4ed9aff074 100644
--- a/tutorials/frontend/deploy_model_on_rasp.py
+++ b/tutorials/frontend/deploy_model_on_rasp.py
@@ -162,7 +162,7 @@ def transform_image(image):
 ######################################################################
 # Compile The Graph
 # -----------------
-# To compile the graph, we call the :any:`relay.build` function
+# To compile the graph, we call the :py:func:`relay.build` function
 # with the graph configuration and parameters. However, You cannot to
 # deploy a x86 program on a device with ARM instruction set. It means
 # Relay also needs to know the compilation option of target device,
diff --git a/tutorials/get_started/autotvm_matmul_x86.py b/tutorials/get_started/autotvm_matmul_x86.py
index 97e1b0b8b55f..f9b33b894192 100644
--- a/tutorials/get_started/autotvm_matmul_x86.py
+++ b/tutorials/get_started/autotvm_matmul_x86.py
@@ -286,10 +286,10 @@ def matmul(N, L, M, dtype):
 # When proposing the next batch of configs, the tuner can take different
 # strategies. Some of the tuner strategies provided by TVM include:
 #
-# * :any:`RandomTuner`: Enumerate the space in a random order
-# * :any:`GridSearchTuner`: Enumerate the space in a grid search order
-# * :any:`GATuner`: Using genetic algorithm to search through the space
-# * :any:`XGBTuner`: Uses a model based method. Train a XGBoost model to
+# * :any:`tvm.autotvm.tuner.RandomTuner`: Enumerate the space in a random order
+# * :any:`tvm.autotvm.tuner.GridSearchTuner`: Enumerate the space in a grid search order
+# * :any:`tvm.autotvm.tuner.GATuner`: Using genetic algorithm to search through the space
+# * :any:`tvm.autotvm.tuner.XGBTuner`: Uses a model based method. Train a XGBoost model to
 #   predict the speed of lowered IR and pick the next batch according to the
 #   prediction.
 #
diff --git a/tutorials/get_started/install.py b/tutorials/get_started/install.py
index e022e4b1ae2e..b69b8b493a4f 100644
--- a/tutorials/get_started/install.py
+++ b/tutorials/get_started/install.py
@@ -23,6 +23,7 @@
 
 Depending on your needs and your working environment, there are a few different
 methods for installing TVM. These include:
+
 * Installing from source
 * Installing from third-party binary package.
 """
diff --git a/tutorials/get_started/relay_quick_start.py b/tutorials/get_started/relay_quick_start.py
index 9f58b1602d7f..fd7f5aa9d756 100644
--- a/tutorials/get_started/relay_quick_start.py
+++ b/tutorials/get_started/relay_quick_start.py
@@ -55,10 +55,11 @@
 # Relay also supports other model formats such as MXNet, CoreML, ONNX and
 # Tensorflow.
 #
-# In this tutorial, we assume we will do inference on our device
-# and the batch size is set to be 1. Input images are RGB color
-# images of size 224 * 224. We can call the :any:`tvm.relay.TupleWrapper.astext()`
-# to show the network structure.
+# In this tutorial, we assume we will do inference on our device and
+# the batch size is set to be 1. Input images are RGB color images of
+# size 224 * 224. We can call the
+# :py:meth:`tvm.relay.expr.TupleWrapper.astext()` to show the network
+# structure.
 
 batch_size = 1
 num_class = 1000
diff --git a/tutorials/get_started/tensor_expr_get_started.py b/tutorials/get_started/tensor_expr_get_started.py
index 8fbdb751c9f8..310d6bdbfee4 100644
--- a/tutorials/get_started/tensor_expr_get_started.py
+++ b/tutorials/get_started/tensor_expr_get_started.py
@@ -512,7 +512,7 @@ def evaluate_addition(func, target, optimization, log):
 #     before it moves on to the next stage.
 #
 #   A complete description of these primitives can be found in the
-# [Schedule Primitives](https://tvm.apache.org/docs/tutorials/language/schedule_primitives.html) docs page.
+#   [Schedule Primitives](https://tvm.apache.org/docs/tutorials/language/schedule_primitives.html) docs page.
 
 ################################################################################
 # Example 2: Manually Optimizing Matrix Multiplication with TE
@@ -521,14 +521,20 @@ def evaluate_addition(func, target, optimization, log):
 # Now we will consider a second, more advanced example, demonstrating how with
 # just 18 lines of python code TVM speeds up a common matrix multiplication operation by 18x.
 #
-# **Matrix multiplication is a compute intensive operation. There are two important optimizations for good CPU performance:**
-# 1. Increase the cache hit rate of memory access. Both complex numerical
-#    computation and hot-spot memory access can be accelerated by a high cache hit
-#    rate. This requires us to transform the origin memory access pattern to a pattern that fits the cache policy.
-# 2. SIMD (Single instruction multi-data), also known as the vector processing
-#    unit. On each cycle instead of processing a single value, SIMD can process a small batch of data.
-#    This requires us to transform the data access pattern in the loop
-#    body in uniform pattern so that the LLVM backend can lower it to SIMD.
+# **Matrix multiplication is a compute intensive operation. There are
+# two important optimizations for good CPU performance:**
+#
+# 1. Increase the cache hit rate of memory access. Both complex
+#    numerical computation and hot-spot memory access can be
+#    accelerated by a high cache hit rate. This requires us to
+#    transform the origin memory access pattern to a pattern that fits
+#    the cache policy.
+#
+# 2. SIMD (Single instruction multi-data), also known as the vector
+#    processing unit. On each cycle instead of processing a single
+#    value, SIMD can process a small batch of data.  This requires us
+#    to transform the data access pattern in the loop body in uniform
+#    pattern so that the LLVM backend can lower it to SIMD.
 #
 # The techniques used in this tutorial are a subset of tricks mentioned in this
 # `repository <https://github.com/flame/how-to-optimize-gemm>`_. Some of them
diff --git a/vta/tutorials/autotvm/tune_alu_vta.py b/vta/tutorials/autotvm/tune_alu_vta.py
index f2bf15b9876f..7b1fd411be57 100644
--- a/vta/tutorials/autotvm/tune_alu_vta.py
+++ b/vta/tutorials/autotvm/tune_alu_vta.py
@@ -16,6 +16,7 @@
 # under the License.
 """
 Auto-tuning a ALU fused op on VTA
+---------------------------------
 """
 
 import os