diff --git a/apps/pt_tvmdsoop/tests/test_as_torch.py b/apps/pt_tvmdsoop/tests/test_as_torch.py
index 0243e86edebd..684dcd439605 100644
--- a/apps/pt_tvmdsoop/tests/test_as_torch.py
+++ b/apps/pt_tvmdsoop/tests/test_as_torch.py
@@ -52,7 +52,7 @@ def main(a: T.handle, b: T.handle, c: T.handle) -> None:
 @tvm.script.ir_module
 class ModuleGPU:
     @T.prim_func
-    def main(A: T.Buffer[8, "float32"], B: T.Buffer[8, "float32"]) -> None:
+    def main(A: T.Buffer(8, "float32"), B: T.Buffer(8, "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i_0 in T.thread_binding(2, thread="blockIdx.x"):
             for i_2 in T.thread_binding(2, thread="threadIdx.x"):
diff --git a/apps/pt_tvmdsoop/tests/test_boolean_tensor.py b/apps/pt_tvmdsoop/tests/test_boolean_tensor.py
index 4718b4043945..540cef0c90a3 100644
--- a/apps/pt_tvmdsoop/tests/test_boolean_tensor.py
+++ b/apps/pt_tvmdsoop/tests/test_boolean_tensor.py
@@ -81,10 +81,10 @@ def test_tensor_boolean_operation():
 @as_torch
 @T.prim_func
 def negate_tvmscript(
-    X: T.Buffer[(8, 8), "bool"],
-    Y: T.Buffer[(8, 8), "float32"],
-    Z: T.Buffer[(8, 8), "bool"],
-    U: T.Buffer[(8, 8), "float32"],
+    X: T.Buffer((8, 8), "bool"),
+    Y: T.Buffer((8, 8), "float32"),
+    Z: T.Buffer((8, 8), "bool"),
+    U: T.Buffer((8, 8), "float32"),
 ) -> None:
     for i, j in T.grid(8, 8):
         with T.block():
diff --git a/include/tvm/script/printer/doc.h b/include/tvm/script/printer/doc.h
index 6321caa4e057..4a2d7df1adec 100644
--- a/include/tvm/script/printer/doc.h
+++ b/include/tvm/script/printer/doc.h
@@ -774,7 +774,7 @@ class AssignDocNode : public StmtDocNode {
   /*!
    * \brief The right hand side of the assignment.
    *
-   * If null, this doc represents declaration, e.g. `A: T.Buffer[(1,2)]`
+   * If null, this doc represents declaration, e.g. `A: T.Buffer((1,2))`
    * */
   Optional<ExprDoc> rhs;
   /*! \brief The type annotation of this assignment. */
diff --git a/include/tvm/tir/transform.h b/include/tvm/tir/transform.h
index 829594d61b98..be7589b04bf5 100644
--- a/include/tvm/tir/transform.h
+++ b/include/tvm/tir/transform.h
@@ -576,7 +576,7 @@ TVM_DLL Pass UnifiedStaticMemoryPlanner();
  *
  * \code{.py}
  * @T.prim_func
- * def before_transform(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]) -> None:
+ * def before_transform(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")) -> None:
  *     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
  *         for i in T.serial(0, 16,
  *                           annotations={"software_pipeline_stage": [0, 1],
@@ -601,7 +601,7 @@ TVM_DLL Pass UnifiedStaticMemoryPlanner();
  *
  * \code{.py}
  * @T.prim_func
- * def after_transform(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]) -> None:
+ * def after_transform(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")) -> None:
  *     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
  *         with T.block():
  *             T.reads([A[tx, 0:16]])
diff --git a/python/tvm/ir/base.py b/python/tvm/ir/base.py
index 5df529b0532f..5f3a679591d1 100644
--- a/python/tvm/ir/base.py
+++ b/python/tvm/ir/base.py
@@ -282,3 +282,34 @@ def structural_hash(node, map_free_vars=False):
     structrual_equal
     """
     return _ffi_node_api.StructuralHash(node, map_free_vars)  # type: ignore # pylint: disable=no-member
+
+
+def deprecated(
+    method_name: str,
+    new_method_name: str,
+):
+    """A decorator to indicate that a method is deprecated
+
+    Parameters
+    ----------
+    method_name : str
+        The name of the method to deprecate
+    new_method_name : str
+        The name of the new method to use instead
+    """
+    import functools  # pylint: disable=import-outside-toplevel
+    import warnings  # pylint: disable=import-outside-toplevel
+
+    def _deprecate(func):
+        @functools.wraps(func)
+        def _wrapper(*args, **kwargs):
+            warnings.warn(
+                f"{method_name} is deprecated, use {new_method_name} instead",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            return func(*args, **kwargs)
+
+        return _wrapper
+
+    return _deprecate
diff --git a/python/tvm/parser.py b/python/tvm/parser.py
index 63c40deb2069..b79682d8907b 100644
--- a/python/tvm/parser.py
+++ b/python/tvm/parser.py
@@ -16,9 +16,12 @@
 # under the License.
 # pylint: disable=invalid-name
 """The legacy TVM parser """
+from .ir.base import deprecated
+
 # pylint: disable=import-outside-toplevel
 
 
+@deprecated("tvm.parser.parse", "tvm.relay.parse")
 def parse(*args, **kwargs):
     """Deprecated, use `tvm.relay.parse` instead"""
     from tvm.relay import parse as _impl
@@ -26,6 +29,7 @@ def parse(*args, **kwargs):
     return _impl(*args, **kwargs)
 
 
+@deprecated("tvm.parser.parse_expr", "tvm.relay.parse_expr")
 def parse_expr(*args, **kwargs):
     """Deprecated, use `tvm.relay.parse_expr` instead"""
     from tvm.relay import parse_expr as _impl
@@ -33,6 +37,7 @@ def parse_expr(*args, **kwargs):
     return _impl(*args, **kwargs)
 
 
+@deprecated("tvm.parser.fromtext", "tvm.relay.fromtext")
 def fromtext(*args, **kwargs):
     """Deprecated, use `tvm.relay.fromtext` instead"""
     from tvm.relay import fromtext as _impl
@@ -40,6 +45,7 @@ def fromtext(*args, **kwargs):
     return _impl(*args, **kwargs)
 
 
+@deprecated("tvm.parser.SpanCheck", "tvm.relay.SpanCheck")
 def SpanCheck(*args, **kwargs):
     """Deprecated, use `tvm.relay.SpanCheck` instead"""
     from tvm.relay import SpanCheck as _impl
diff --git a/python/tvm/script/ir_builder/tir/ir.py b/python/tvm/script/ir_builder/tir/ir.py
index 0e76e3d86d6e..7b22751118f6 100644
--- a/python/tvm/script/ir_builder/tir/ir.py
+++ b/python/tvm/script/ir_builder/tir/ir.py
@@ -29,6 +29,7 @@
 import numpy as np  # type: ignore
 
 from tvm.ir import Range, Type
+from tvm.ir.base import deprecated
 from tvm.runtime import convert, ndarray
 from tvm.target import Target
 
@@ -1427,6 +1428,26 @@ def ptr(dtype: str, storage_scope: str = "global") -> Var:
     return _ffi_api.Ptr(dtype, storage_scope)  # type: ignore[attr-defined] # pylint: disable=no-member
 
 
+@deprecated("T.buffer_var", "T.Ptr")
+def buffer_var(dtype: str, storage_scope: str = "global") -> Var:
+    """The pointer declaration function.
+
+    Parameters
+    ----------
+    dtype : str
+        The data type of the pointer.
+
+    storage_scope : str
+        The storage scope of the pointer.
+
+    Returns
+    -------
+    res : Var
+        The pointer.
+    """
+    return _ffi_api.Ptr(dtype, storage_scope)  # type: ignore[attr-defined] # pylint: disable=no-member
+
+
 def min(a: PrimExpr, b: PrimExpr) -> PrimExpr:  # pylint: disable=redefined-builtin
     """Compute the minimum value of two expressions.
 
@@ -1703,7 +1724,6 @@ def wrapped(*args, **kwargs):
 
 broadcast = Broadcast
 ramp = Ramp
-buffer_var = ptr
 fabs = abs
 tvm_call_packed = call_packed
 tvm_call_cpacked = call_cpacked
diff --git a/python/tvm/script/parser/core/utils.py b/python/tvm/script/parser/core/utils.py
index 453ac18b382b..6a693df12f89 100644
--- a/python/tvm/script/parser/core/utils.py
+++ b/python/tvm/script/parser/core/utils.py
@@ -15,7 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 """TVM Script Parser utils"""
-
 import inspect
 from types import FrameType
 from typing import Any, Callable, Dict, List
diff --git a/python/tvm/script/parser/tir/entry.py b/python/tvm/script/parser/tir/entry.py
index e7ec7cf886d4..bacf92c14287 100644
--- a/python/tvm/script/parser/tir/entry.py
+++ b/python/tvm/script/parser/tir/entry.py
@@ -18,6 +18,7 @@
 import inspect
 from typing import Callable, Union
 
+from tvm.ir.base import deprecated
 from tvm.tir import Buffer, PrimFunc
 
 from ...ir_builder.tir import buffer_decl, ptr
@@ -49,7 +50,7 @@ def prim_func(func: Callable) -> Union[PrimFunc, Callable]:
 
 class BufferProxy:
     """Buffer proxy class for constructing tir buffer.
-    Overload __call__ and __getitem__ to support syntax as T.Buffer() and T.Buffer[].
+    Overload __call__ and __getitem__ to support syntax as T.Buffer() and T.Buffer().
     """
 
     def __call__(
@@ -78,6 +79,7 @@ def __call__(
             axis_separators=axis_separators,
         )
 
+    @deprecated("T.Buffer(...)", "T.Buffer(...)")
     def __getitem__(self, keys) -> Buffer:
         if not isinstance(keys, tuple):
             return self(keys)
@@ -88,7 +90,7 @@ def __getitem__(self, keys) -> Buffer:
 
 class PtrProxy:
     """Ptr proxy class for constructing tir pointer.
-    Overload __call__ and __getitem__ to support syntax as T.Ptr() and T.Ptr[].
+    Overload __call__ and __getitem__ to support syntax as T.Ptr() and T.Ptr().
     """
 
     def __call__(self, dtype, storage_scope="global"):
@@ -96,6 +98,7 @@ def __call__(self, dtype, storage_scope="global"):
             dtype = dtype().dtype
         return ptr(dtype, storage_scope)  # pylint: disable=no-member # type: ignore
 
+    @deprecated("T.Ptr(...)", "T.Ptr(...)")
     def __getitem__(self, keys):
         if not isinstance(keys, tuple):
             return self(keys)
diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
index 19669cd60cf4..5f0e94869d05 100644
--- a/python/tvm/testing/utils.py
+++ b/python/tvm/testing/utils.py
@@ -1932,13 +1932,13 @@ class object that inherits from `Exception`.
         class TestRemoveIf(tvm.testing.CompareBeforeAfter):
             transform = tvm.tir.transform.Simplify()
 
-            def before(A: T.Buffer[1, "int32"]):
+            def before(A: T.Buffer(1, "int32")):
                 if True:
                     A[0] = 42
                 else:
                     A[0] = 5
 
-            def expected(A: T.Buffer[1, "int32"]):
+            def expected(A: T.Buffer(1, "int32")):
                 A[0] = 42
 
     """
diff --git a/python/tvm/tir/schedule/schedule.py b/python/tvm/tir/schedule/schedule.py
index 6a71e5872fcd..4727b4a5f00e 100644
--- a/python/tvm/tir/schedule/schedule.py
+++ b/python/tvm/tir/schedule/schedule.py
@@ -754,9 +754,9 @@ def add_unit_loop(self, block_or_loop: Union[LoopRV, BlockRV]) -> LoopRV:
 
             @T.prim_func
             def before_add_unit_loop(
-                A: T.Buffer[(), "int32"],
-                B: T.Buffer[(), "int32"],
-                C: T.Buffer[(), "int32"],
+                A: T.Buffer((), "int32"),
+                B: T.Buffer((), "int32"),
+                C: T.Buffer((), "int32"),
             ) -> None:
                 with T.block("C"):
                     vi = T.axis.spatial(1, 0)
@@ -776,9 +776,9 @@ def before_add_unit_loop(
 
             @T.prim_func
             def after_add_unit_loop(
-                A: T.Buffer[(), "int32"],
-                B: T.Buffer[(), "int32"],
-                C: T.Buffer[(), "int32"],
+                A: T.Buffer((), "int32"),
+                B: T.Buffer((), "int32"),
+                C: T.Buffer((), "int32"),
             ) -> None:
                 for u in T.serial(1):
                     with T.block("C"):
@@ -1240,7 +1240,7 @@ def cache_inplace(
         .. code-block:: python
 
             @T.prim_func
-            def before_cache_inplace(data_io: T.Buffer[(64), "int32"]):
+            def before_cache_inplace(data_io: T.Buffer((64), "int32")):
                 for i0 in T.serial(1):
                     with T.block("A"):
                         T.reads(data_io[:64])
@@ -1261,7 +1261,7 @@ def before_cache_inplace(data_io: T.Buffer[(64), "int32"]):
         .. code-block:: python
 
             @T.prim_func
-            def cache_inplace(data_io: T.Buffer[64, "int32"]) -> None:
+            def cache_inplace(data_io: T.Buffer(64, "int32")) -> None:
                 data_io_local = T.alloc_buffer([64], dtype="int32", scope="local")
                 for i0 in T.serial(1):
                     for ax0 in T.serial(64):
@@ -1350,7 +1350,7 @@ def resize(a: T.handle, b: T.handle) -> None:
 
             @T.prim_func
             def resize_cache_index(
-                A: T.Buffer[(1, 3, 40, 40), "float32"], B: T.Buffer[(1, 3, 80, 80), "float32"]
+                A: T.Buffer((1, 3, 40, 40), "float32"), B: T.Buffer((1, 3, 80, 80), "float32")
             ) -> None:
                 index_var_0 = T.alloc_buffer([80, 80], dtype="int32", strides=[1])
                 index_var_1 = T.alloc_buffer([80], dtype="int32", strides=[1])
@@ -1431,8 +1431,8 @@ def reindex(
 
             @T.prim_func
             def before_reindex(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32")
             ) -> None:
                 for i, j in T.grid(128, 128):
                     with T.block("B"):
@@ -1453,8 +1453,8 @@ def before_reindex(
 
             @T.prim_func
             def after_reindex(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32")
             ) -> None:
                 A_reindex = T.alloc_buffer((128, 128), "float32")
                 for i, j in T.grid(128, 128):
@@ -2151,7 +2151,7 @@ def set_scope(self, block: Union[BlockRV, str], buffer_index: int, storage_scope
 
             @T.prim_func
             def before_set_scope(
-                A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
             ) -> None:
                 B = T.alloc_buffer((128, 128), dtype="float32")
 
@@ -2178,7 +2178,7 @@ def before_set_scope(
 
             @T.prim_func
             def after_set_scope(
-                A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
             ) -> None:
                 B_shared = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
 
@@ -2227,8 +2227,8 @@ def blockize(self, loop: LoopRV, preserve_unit_iters: bool = True) -> BlockRV:
 
             @T.prim_func
             def before_blockize(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32")
             ) -> None:
                 for i_0, j_0, i_1, j_1 in T.grid(8, 8, 16, 16):
                     with T.block("B"):
@@ -2254,8 +2254,8 @@ def before_blockize(
 
             @T.prim_func
             def after_blockize(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32")
             )-> None:
                 for i_0, j_0 in T.grid(8, 8):
                     with T.block("B_o"):
@@ -2305,9 +2305,9 @@ def tensorize(
 
             @T.prim_func
             def before_tensorize(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"],
-                C: T.Buffer[(128, 128), "float32"],
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32"),
+                C: T.Buffer((128, 128), "float32"),
             ) -> None:
                 # body
                 # with T.block("root")
@@ -2380,9 +2380,9 @@ def mma_intrin(a: T.handle, b: T.handle, c: T.handle) -> None:
 
             @T.prim_func
             def after_tensorize(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"],
-                C: T.Buffer[(128, 128), "float32"],
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32"),
+                C: T.Buffer((128, 128), "float32"),
             ) -> None:
                 # body
                 # with T.block("root")
@@ -2819,8 +2819,8 @@ def transform_block_layout(
 
             @T.prim_func
             def before_transform_block_layout(
-                A: T.Buffer[(16, 16), "float32"],
-                B: T.Buffer[(16, 16), "float32"]
+                A: T.Buffer((16, 16), "float32"),
+                B: T.Buffer((16, 16), "float32")
             ) -> None:
                 for i, j in T.grid(16, 16):
                     with T.block("B"):
@@ -2841,8 +2841,8 @@ def before_transform_block_layout(
 
             @T.prim_func
             def after_transform_block_layout(
-                A: T.Buffer[(16, 16), "float32"],
-                B: T.Buffer[(16, 16), "float32"]
+                A: T.Buffer((16, 16), "float32"),
+                B: T.Buffer((16, 16), "float32")
             ) -> None:
                 for i in range(256):
                     with T.block("B"):
@@ -2903,7 +2903,7 @@ def set_axis_separator(
 
             @T.prim_func
             def before_set_axis_separator(
-                A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
             ) -> None:
                 B = T.alloc_buffer((128, 128), dtype="float32")
 
@@ -2931,7 +2931,7 @@ def before_set_axis_separator(
 
             @T.prim_func
             def after_set_axis_separators(
-                A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
             ) -> None:
                 B = T.alloc_buffer([128, 128], dtype="float32", axis_separators=[1])
 
@@ -2992,7 +2992,7 @@ def decompose_padding(self, block: Union[BlockRV, str], loop: LoopRV) -> BlockRV
         .. code-block:: python
 
             @T.prim_func
-            def before_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
+            def before_decompose(x: T.Buffer(128, "int32"), y: T.Buffer(140, "int32")):
                 for i in range(140):
                     with T.block("block"):
                         vi = T.axis.remap("S", [i])
@@ -3012,7 +3012,7 @@ def before_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
         .. code-block:: python
 
             @T.prim_func
-            def after_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
+            def after_decompose(x: T.Buffer(128, "int32"), y: T.Buffer(140, "int32")):
                 for i in T.serial(140):
                     with T.block("block_pad_const"):
                         vi = T.axis.spatial(140, i)
@@ -3067,9 +3067,9 @@ def pad_einsum(self, block: Union[BlockRV, str], padding: List[int]) -> None:
 
             @T.prim_func
             def before_pad_einsum(
-                A: T.Buffer[(128, 127), "float32"],
-                B: T.Buffer[(127, 127), "float32"],
-                C: T.Buffer[(128, 127), "float32"],
+                A: T.Buffer((128, 127), "float32"),
+                B: T.Buffer((127, 127), "float32"),
+                C: T.Buffer((128, 127), "float32"),
             ) -> None:
                 A_shared = T.alloc_buffer((128, 127), "float32", scope="shared")
                 B_shared = T.alloc_buffer((127, 127), "float32", scope="shared")
@@ -3108,9 +3108,9 @@ def before_pad_einsum(
 
             @T.prim_func
             def after_pad_einsum(
-                A: T.Buffer[(128, 127), "float32"],
-                B: T.Buffer[(127, 127), "float32"],
-                C: T.Buffer[(128, 127), "float32"],
+                A: T.Buffer((128, 127), "float32"),
+                B: T.Buffer((127, 127), "float32"),
+                C: T.Buffer((128, 127), "float32"),
             ) -> None:
                 A_shared_padded = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
                 B_shared_padded = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
@@ -3193,7 +3193,7 @@ def rolling_buffer(
 
             @T.prim_func
             def before_rolling_buffer(
-                A: T.Buffer[(12, 12), "int8"], C: T.Buffer[(8, 8), "int8"]
+                A: T.Buffer((12, 12), "int8"), C: T.Buffer((8, 8), "int8")
             ) -> None:
                 # body
                 # with T.block("root")
@@ -3230,8 +3230,8 @@ def before_rolling_buffer(
 
             @T.prim_func
             def after_rolling_buffer(
-                A: T.Buffer[(12, 12), "int8"],
-                C: T.Buffer[(8, 8), "int8"]
+                A: T.Buffer((12, 12), "int8"),
+                C: T.Buffer((8, 8), "int8")
             ) -> None:
                 # body
                 # with T.block("root")
diff --git a/python/tvm/topi/hexagon/slice_ops/conv2d.py b/python/tvm/topi/hexagon/slice_ops/conv2d.py
index 439fd80648f9..ab782b5fa21a 100644
--- a/python/tvm/topi/hexagon/slice_ops/conv2d.py
+++ b/python/tvm/topi/hexagon/slice_ops/conv2d.py
@@ -166,7 +166,7 @@ def conv2d_schedule(
 
     # from tvm.script import tir as T
     @T.prim_func
-    def func(InputTensor: T.Buffer[(1, 24, 12, 32), "float16"], Weights: T.Buffer[(3, 3, 32, 32), "float16"], compute: T.Buffer[(1, 16, 8, 32), "float16"]) -> None:
+    def func(InputTensor: T.Buffer((1, 24, 12, 32), "float16"), Weights: T.Buffer((3, 3, 32, 32), "float16"), compute: T.Buffer((1, 16, 8, 32), "float16")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
diff --git a/python/tvm/topi/hexagon/slice_ops/dwconv2d.py b/python/tvm/topi/hexagon/slice_ops/dwconv2d.py
index d22dc02a5c1b..d94afe98bc61 100644
--- a/python/tvm/topi/hexagon/slice_ops/dwconv2d.py
+++ b/python/tvm/topi/hexagon/slice_ops/dwconv2d.py
@@ -94,7 +94,7 @@ def dwconv2d_schedule(
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(InputTensor: T.Buffer[(1, 16, 8, 32), "float16"], Weights: T.Buffer[(3, 3, 1, 32), "float16"], Output: T.Buffer[(1, 8, 4, 32), "float16"]) -> None:
+        def main(InputTensor: T.Buffer((1, 16, 8, 32), "float16"), Weights: T.Buffer((3, 3, 1, 32), "float16"), Output: T.Buffer((1, 8, 4, 32), "float16")) -> None:
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
             # body
diff --git a/src/tir/analysis/control_flow_graph.h b/src/tir/analysis/control_flow_graph.h
index 00a6b68ff945..f2e46b2478a3 100644
--- a/src/tir/analysis/control_flow_graph.h
+++ b/src/tir/analysis/control_flow_graph.h
@@ -315,7 +315,7 @@ class BufferState {
  *
  * \code{.py}
  * @T.prim_func
- * def func(T.Buffer[16, "float32"]):
+ * def func(T.Buffer(16, "float32")):
  *     for i in T.serial(16):
  *         if i < 8:
  *              B[i] = i
diff --git a/tests/micro/zephyr/test_zephyr_aot_exec.py b/tests/micro/zephyr/test_zephyr_aot_exec.py
index d42c7a00b40e..d5bcf08a0cb6 100644
--- a/tests/micro/zephyr/test_zephyr_aot_exec.py
+++ b/tests/micro/zephyr/test_zephyr_aot_exec.py
@@ -105,7 +105,7 @@ def test_aot_executor(workspace_dir, board, microtvm_debug, use_fvp, serial_numb
     print("test_relay: construct relay program\n")
 
     # Construct Relay program.
-    relay_mod = tvm.parser.fromtext(
+    relay_mod = tvm.relay.fromtext(
         """
       #[version = "0.0.5"]
       def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
diff --git a/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py b/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
index ace1db7811da..a4ea1ea32e6b 100644
--- a/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
+++ b/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
@@ -48,8 +48,8 @@ def @main(%data : Tensor[(16, 29), int8]) -> Tensor[(16, 29), int8] {
   %1
 }
 """
-    orig_mod = tvm.parser.fromtext(original_model)
-    cmsisnn_mod = tvm.parser.fromtext(cmsisnn_model)
+    orig_mod = tvm.relay.fromtext(original_model)
+    cmsisnn_mod = tvm.relay.fromtext(cmsisnn_model)
     params = {}
 
     # validate the output
diff --git a/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py b/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py
index 02b5f9f7f122..99bd273115a7 100644
--- a/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py
+++ b/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py
@@ -473,7 +473,7 @@ def test_reordering_based_on_cycles():
     @tvm.script.ir_module
     class ModuleBefore:
         @T.prim_func
-        def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208, "uint8"], placeholder_encoded_1: T.Buffer[112, "uint8"], placeholder_encoded_2: T.Buffer[96, "uint8"], placeholder_encoded_3: T.Buffer[112, "uint8"], ethosu_write: T.Buffer[43672, "int8"]) -> None:
+        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_encoded_3: T.Buffer(112, "uint8"), ethosu_write: T.Buffer(43672, "int8")) -> None:
             # function attr dict
             T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
             ax0_ax1_fused_ax2_fused_ax3_fused = T.var("int32")
@@ -521,7 +521,7 @@ def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208
     @tvm.script.ir_module
     class ModuleAfter:
         @T.prim_func
-        def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208, "uint8"], placeholder_encoded_1: T.Buffer[112, "uint8"], placeholder_encoded_2: T.Buffer[96, "uint8"], placeholder_encoded_3: T.Buffer[112, "uint8"], ethosu_write: T.Buffer[43672, "int8"]) -> None:
+        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_encoded_3: T.Buffer(112, "uint8"), ethosu_write: T.Buffer(43672, "int8")) -> None:
             # function attr dict
             T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
             ax0_ax1_fused_ax2_fused_ax3_fused = T.var("int32")
@@ -576,7 +576,7 @@ def test_reordering_based_on_cycles_luts_present():
     @tvm.script.ir_module
     class ModuleBefore:
         @T.prim_func
-        def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208, "uint8"], placeholder_encoded_1: T.Buffer[112, "uint8"], placeholder_1: T.Buffer[256, "int8"], placeholder_encoded_2: T.Buffer[96, "uint8"], placeholder_2: T.Buffer[256, "int8"], placeholder_3: T.Buffer[256, "int8"], ethosu_write: T.Buffer[46200, "int8"]) -> None:
+        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_1: T.Buffer(256, "int8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_2: T.Buffer(256, "int8"), placeholder_3: T.Buffer(256, "int8"), ethosu_write: T.Buffer(46200, "int8")) -> None:
             # function attr dict
             T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
             ax0_ax1_fused_ax2_fused_ax3_fused = T.var("int32")
@@ -626,7 +626,7 @@ def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208
     @tvm.script.ir_module
     class ModuleAfter:
         @T.prim_func
-        def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208, "uint8"], placeholder_encoded_1: T.Buffer[112, "uint8"], placeholder_1: T.Buffer[256, "int8"], placeholder_encoded_2: T.Buffer[96, "uint8"], placeholder_2: T.Buffer[256, "int8"], placeholder_3: T.Buffer[256, "int8"], ethosu_write: T.Buffer[46200, "int8"]) -> None:
+        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_1: T.Buffer(256, "int8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_2: T.Buffer(256, "int8"), placeholder_3: T.Buffer(256, "int8"), ethosu_write: T.Buffer(46200, "int8")) -> None:
             # function attr dict
             T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
             ax0_ax1_fused_ax2_fused_ax3_fused = T.var("int32")
diff --git a/tests/python/contrib/test_ethosu/test_create_tiles.py b/tests/python/contrib/test_ethosu/test_create_tiles.py
index d51c438cbf4e..e4b4067a2977 100644
--- a/tests/python/contrib/test_ethosu/test_create_tiles.py
+++ b/tests/python/contrib/test_ethosu/test_create_tiles.py
@@ -48,7 +48,7 @@ def test_create_tiles_h():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(placeholder1: T.Buffer[(100,), "int8"], placeholder2: T.Buffer[(100,), "int8"]) -> None:
+        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
             T.attr("i0", "pragma_layout", "NHCWB16")
             for i0 in T.serial(0, 1):
                 for i1 in T.serial(0, 6):
@@ -79,7 +79,7 @@ def test_create_tiles_w():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(placeholder1: T.Buffer[(100,), "int8"], placeholder2: T.Buffer[(100,), "int8"]) -> None:
+        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
             T.attr("i0", "pragma_layout", "NHCWB16")
             for i0 in T.serial(0, 1):
                 for i1 in T.serial(0, 1):
@@ -110,7 +110,7 @@ def test_create_tiles_wrong_var_stride():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(placeholder1: T.Buffer[(100,), "int8"], placeholder2: T.Buffer[(100,), "int8"]) -> None:
+        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
             T.attr("i0", "pragma_layout", "NHCWB16")
             for i0 in T.serial(0, 1):
                 for i1 in T.serial(0, 6):
@@ -141,7 +141,7 @@ def test_create_tiles_multiple_var_occurrences():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(placeholder1: T.Buffer[(100,), "int8"], placeholder2: T.Buffer[(100,), "int8"]) -> None:
+        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
             T.attr("i0", "pragma_layout", "NHWC")
             for i0 in T.serial(0, 1):
                 for i1 in T.serial(0, 5):
diff --git a/tests/python/contrib/test_ethosu/test_encode_constants.py b/tests/python/contrib/test_ethosu/test_encode_constants.py
index 871c7e29df20..030976845298 100644
--- a/tests/python/contrib/test_ethosu/test_encode_constants.py
+++ b/tests/python/contrib/test_ethosu/test_encode_constants.py
@@ -36,7 +36,7 @@
 @tvm.script.ir_module
 class WeightStreamOnlyU55:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         placeholder = T.Buffer([8192], "int8", data=input_placeholder.data)
@@ -66,7 +66,7 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_writ
 @tvm.script.ir_module
 class WeightStreamOnlyU65:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         # buffer definition
@@ -154,7 +154,7 @@ def _get_func():
 @tvm.script.ir_module
 class RereadWeightsU55:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer1 = T.Buffer([384], "uint8")
@@ -175,7 +175,7 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_writ
 @tvm.script.ir_module
 class RereadWeightsU65:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         # buffer definition
@@ -256,7 +256,7 @@ def _get_func():
 @tvm.script.ir_module
 class DirectReadOnlyU55:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([592], "uint8")
@@ -276,7 +276,7 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_writ
 @tvm.script.ir_module
 class DirectReadOnlyU65:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         # buffer definition
@@ -354,7 +354,7 @@ def _get_func():
 @tvm.script.ir_module
 class MixedReadU55:
     @T.prim_func
-    def main(input_ifm: T.Buffer[(1,16,16,32), "int8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"]) -> None:
+    def main(input_ifm: T.Buffer((1,16,16,32), "int8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer1 = T.Buffer([112], "uint8")
@@ -387,7 +387,7 @@ def main(input_ifm: T.Buffer[(1,16,16,32), "int8"], input_ethosu_write: T.Buffer
 @tvm.script.ir_module
 class MixedReadU65:
     @T.prim_func
-    def main(input_ifm: T.Buffer[(1,16,16,32), "int8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"]) -> None:
+    def main(input_ifm: T.Buffer((1,16,16,32), "int8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         # buffer definition
diff --git a/tests/python/contrib/test_ethosu/test_hoist_allocates.py b/tests/python/contrib/test_ethosu/test_hoist_allocates.py
index ea1cae50e6eb..f38e981e93bd 100644
--- a/tests/python/contrib/test_ethosu/test_hoist_allocates.py
+++ b/tests/python/contrib/test_ethosu/test_hoist_allocates.py
@@ -106,7 +106,7 @@ def test_double_convolution():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1, 27, 42, 3), "int8"], input_placeholder_encoded: T.Buffer[(3, 3, 2, 3), "uint8"], input_placeholder_encoded_1: T.Buffer[(3, 10), "uint8"], input_placeholder_encoded_2: T.Buffer[(3, 3, 2, 3), "uint8"], input_placeholder_encoded_3: T.Buffer[(3, 10), "uint8"], input_ethosu_write: T.Buffer[(1, 27, 42, 3), "int8"]) -> None:
+        def main(input_placeholder: T.Buffer((1, 27, 42, 3), "int8"), input_placeholder_encoded: T.Buffer((3, 3, 2, 3), "uint8"), input_placeholder_encoded_1: T.Buffer((3, 10), "uint8"), input_placeholder_encoded_2: T.Buffer((3, 3, 2, 3), "uint8"), input_placeholder_encoded_3: T.Buffer((3, 10), "uint8"), input_ethosu_write: T.Buffer((1, 27, 42, 3), "int8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             placeholder = T.Buffer([3402], dtype="int8", data=input_placeholder.data)
@@ -150,7 +150,7 @@ def test_identities():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1, 2, 3, 4), "int8"], T_concat: T.Buffer[(24,), "int8"]) -> None:
+        def main(input_placeholder: T.Buffer((1, 2, 3, 4), "int8"), T_concat: T.Buffer((24,), "int8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             placeholder = T.Buffer([24], dtype="int8", data=input_placeholder.data)
@@ -187,7 +187,7 @@ def test_outer_seq_stmt():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"], buffer_encoded: T.Buffer[(128,), "uint8"], buffer_encoded_1: T.Buffer[(32,), "uint8"], buffer_encoded_2: T.Buffer[(112,), "uint8"], buffer_encoded_3: T.Buffer[(32,), "uint8"], buffer_encoded_4: T.Buffer[(112,), "uint8"], buffer_encoded_5: T.Buffer[(32,), "uint8"], buffer_encoded_6: T.Buffer[(112,), "uint8"], buffer_encoded_7: T.Buffer[(32,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8"), buffer_encoded: T.Buffer((128,), "uint8"), buffer_encoded_1: T.Buffer((32,), "uint8"), buffer_encoded_2: T.Buffer((112,), "uint8"), buffer_encoded_3: T.Buffer((32,), "uint8"), buffer_encoded_4: T.Buffer((112,), "uint8"), buffer_encoded_5: T.Buffer((32,), "uint8"), buffer_encoded_6: T.Buffer((112,), "uint8"), buffer_encoded_7: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
@@ -237,7 +237,7 @@ def test_allocate_without_seq_stmt():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"], buffer_encoded: T.Buffer[(128,), "uint8"], buffer_encoded_1: T.Buffer[(32,), "uint8"], buffer_encoded_2: T.Buffer[(112,), "uint8"], buffer_encoded_3: T.Buffer[(32,), "uint8"], buffer_encoded_4: T.Buffer[(112,), "uint8"], buffer_encoded_5: T.Buffer[(32,), "uint8"], buffer_encoded_6: T.Buffer[(112,), "uint8"], buffer_encoded_7: T.Buffer[(32,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8"), buffer_encoded: T.Buffer((128,), "uint8"), buffer_encoded_1: T.Buffer((32,), "uint8"), buffer_encoded_2: T.Buffer((112,), "uint8"), buffer_encoded_3: T.Buffer((32,), "uint8"), buffer_encoded_4: T.Buffer((112,), "uint8"), buffer_encoded_5: T.Buffer((32,), "uint8"), buffer_encoded_6: T.Buffer((112,), "uint8"), buffer_encoded_7: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
diff --git a/tests/python/contrib/test_ethosu/test_legalize.py b/tests/python/contrib/test_ethosu/test_legalize.py
index 5bc31dacb59d..c445ceb2f3e3 100644
--- a/tests/python/contrib/test_ethosu/test_legalize.py
+++ b/tests/python/contrib/test_ethosu/test_legalize.py
@@ -86,7 +86,7 @@ def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tens
           (%1, %2, %3, %4)
         }
         """
-        return tvm.parser.fromtext(expected_ir_string)
+        return tvm.relay.fromtext(expected_ir_string)
 
     def expected_mod_axis2():
         expected_ir_string = """
@@ -107,7 +107,7 @@ def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tens
           (%1, %2, %3, %4)
         }
         """
-        return tvm.parser.fromtext(expected_ir_string)
+        return tvm.relay.fromtext(expected_ir_string)
 
     rewrite_split = [legalize.PartitionedSplitRewriter(), legalize.SplitRewriter()]
 
@@ -174,7 +174,7 @@ def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tens
           (%8, %10, %12, %14, %16)
         }
         """
-        return tvm.parser.fromtext(expected_ir_string)
+        return tvm.relay.fromtext(expected_ir_string)
 
     def expected_mod_axis2():
         expected_ir_string = """
@@ -209,7 +209,7 @@ def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tens
           (%8, %10, %12, %14, %16)
         }
         """
-        return tvm.parser.fromtext(expected_ir_string)
+        return tvm.relay.fromtext(expected_ir_string)
 
     rewrite_split = [legalize.PartitionedSplitRewriter(), legalize.SplitRewriter()]
 
diff --git a/tests/python/contrib/test_ethosu/test_merge_constants.py b/tests/python/contrib/test_ethosu/test_merge_constants.py
index 7465e220787c..909f9fe67365 100644
--- a/tests/python/contrib/test_ethosu/test_merge_constants.py
+++ b/tests/python/contrib/test_ethosu/test_merge_constants.py
@@ -38,7 +38,7 @@ def test_only_one_operator():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -56,7 +56,7 @@ def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"])
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(160,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((160,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -83,7 +83,7 @@ def test_all_operators_with_weights():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"], buffer4: T.Buffer[(112,), "uint8"], buffer5: T.Buffer[(32,), "uint8"], buffer6: T.Buffer[(112,), "uint8"], buffer7: T.Buffer[(32,), "uint8"], buffer8: T.Buffer[(112,), "uint8"], buffer9: T.Buffer[(32,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8"), buffer4: T.Buffer((112,), "uint8"), buffer5: T.Buffer((32,), "uint8"), buffer6: T.Buffer((112,), "uint8"), buffer7: T.Buffer((32,), "uint8"), buffer8: T.Buffer((112,), "uint8"), buffer9: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -122,7 +122,7 @@ def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"],
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(160,), "uint8"], buffer4: T.Buffer[(144,), "uint8"], buffer6: T.Buffer[(144,), "uint8"], buffer8: T.Buffer[(144,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((160,), "uint8"), buffer4: T.Buffer((144,), "uint8"), buffer6: T.Buffer((144,), "uint8"), buffer8: T.Buffer((144,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -173,7 +173,7 @@ def test_operators_with_and_without_weights():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(80,), "uint8"], buffer3: T.Buffer[(64,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((80,), "uint8"), buffer3: T.Buffer((64,), "uint8")) -> None:
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer0 = T.Buffer([390336], "int8")
             buffer1 = T.Buffer([97156], "int8")
@@ -192,7 +192,7 @@ def main(buffer2: T.Buffer[(80,), "uint8"], buffer3: T.Buffer[(64,), "uint8"]) -
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(144,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((144,), "uint8")) -> None:
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer0 = T.Buffer([390336], "int8")
             buffer1 = T.Buffer([97156], "int8")
@@ -221,15 +221,15 @@ def test_copy_to_buffer_with_local_scope():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer1: T.Buffer[(64,), "uint8"],
-        buffer2: T.Buffer[(48,), "uint8"],
-        buffer3: T.Buffer[(256,), "uint8"],
-        buffer4: T.Buffer[(256,), "uint8"],
-        buffer5: T.Buffer[(16,), "uint8"],
-        buffer6: T.Buffer[(48,), "uint8"],
-        buffer7: T.Buffer[(256,), "uint8"],
-        buffer8: T.Buffer[(64,), "uint8"],
-        buffer9: T.Buffer[(256,), "int8"],
+        def main(buffer1: T.Buffer((64,), "uint8"),
+        buffer2: T.Buffer((48,), "uint8"),
+        buffer3: T.Buffer((256,), "uint8"),
+        buffer4: T.Buffer((256,), "uint8"),
+        buffer5: T.Buffer((16,), "uint8"),
+        buffer6: T.Buffer((48,), "uint8"),
+        buffer7: T.Buffer((256,), "uint8"),
+        buffer8: T.Buffer((64,), "uint8"),
+        buffer9: T.Buffer((256,), "int8"),
         ) -> None:
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # body
@@ -258,13 +258,13 @@ def main(buffer1: T.Buffer[(64,), "uint8"],
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer1: T.Buffer[(64,), "uint8"],
-            buffer2: T.Buffer[(96,), "uint8"],
-            buffer4: T.Buffer[(256,), "uint8"],
-            buffer5: T.Buffer[(64,), "uint8"],
-            buffer7: T.Buffer[(256,), "uint8"],
-            buffer8: T.Buffer[(64,), "uint8"],
-            buffer9: T.Buffer[(256,), "int8"],
+        def main(buffer1: T.Buffer((64,), "uint8"),
+            buffer2: T.Buffer((96,), "uint8"),
+            buffer4: T.Buffer((256,), "uint8"),
+            buffer5: T.Buffer((64,), "uint8"),
+            buffer7: T.Buffer((256,), "uint8"),
+            buffer8: T.Buffer((64,), "uint8"),
+            buffer9: T.Buffer((256,), "int8"),
             ) -> None:
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # body
@@ -348,7 +348,7 @@ def test_copies_to_the_same_buffer():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -369,7 +369,7 @@ def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"])
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(160,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((160,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -399,7 +399,7 @@ def test_read_from_the_same_buffer():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], buffer1: T.Buffer[(368,), "uint8"], buffer2: T.Buffer[(96,), "uint8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+        def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), buffer1: T.Buffer((368,), "uint8"), buffer2: T.Buffer((96,), "uint8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -419,7 +419,7 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], buffer1: T.Buffer
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(464,), "uint8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -449,7 +449,7 @@ def test_arbitrary_argument_order():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(368,), "uint8"], buffer2: T.Buffer[(96,), "uint8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"], buffer3: T.Buffer[(368,), "uint8"], buffer4: T.Buffer[(96,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((368,), "uint8"), buffer2: T.Buffer((96,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer3: T.Buffer((368,), "uint8"), buffer4: T.Buffer((96,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -476,7 +476,7 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(3
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(464,), "uint8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"], buffer2: T.Buffer[(464,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer2: T.Buffer((464,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -515,7 +515,7 @@ def test_arbitrary_argument_order_const_split():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(368,), "uint8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"], buffer2: T.Buffer[(96,), "uint8"], buffer3: T.Buffer[(368,), "uint8"], buffer4: T.Buffer[(96,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((368,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer2: T.Buffer((96,), "uint8"), buffer3: T.Buffer((368,), "uint8"), buffer4: T.Buffer((96,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -542,7 +542,7 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(3
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(464,), "uint8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"], buffer2: T.Buffer[(464,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer2: T.Buffer((464,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -581,7 +581,7 @@ def test_arbitrary_argument_order_const_split_mixed():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(368,), "uint8"], buffer2: T.Buffer[(368,), "uint8"], input_ethosu_write: T.Buffer[(2,16,16,8), "int8"], buffer3: T.Buffer[(96,), "uint8"], buffer4: T.Buffer[(96,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((368,), "uint8"), buffer2: T.Buffer((368,), "uint8"), input_ethosu_write: T.Buffer((2,16,16,8), "int8"), buffer3: T.Buffer((96,), "uint8"), buffer4: T.Buffer((96,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -608,7 +608,7 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(3
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(464,), "uint8"], buffer2: T.Buffer[(464,), "uint8"], input_ethosu_write: T.Buffer[(2,16,16,8), "int8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), buffer2: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((2,16,16,8), "int8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -647,7 +647,7 @@ def test_cycle_count():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"], buffer4: T.Buffer[(112,), "uint8"], buffer5: T.Buffer[(32,), "uint8"], buffer6: T.Buffer[(112,), "uint8"], buffer7: T.Buffer[(32,), "uint8"], buffer8: T.Buffer[(112,), "uint8"], buffer9: T.Buffer[(32,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8"), buffer4: T.Buffer((112,), "uint8"), buffer5: T.Buffer((32,), "uint8"), buffer6: T.Buffer((112,), "uint8"), buffer7: T.Buffer((32,), "uint8"), buffer8: T.Buffer((112,), "uint8"), buffer9: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             v1a = T.var("int32")
@@ -710,7 +710,7 @@ def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"],
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(160,), "uint8"], buffer4: T.Buffer[(144,), "uint8"], buffer6: T.Buffer[(144,), "uint8"], buffer8: T.Buffer[(144,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((160,), "uint8"), buffer4: T.Buffer((144,), "uint8"), buffer6: T.Buffer((144,), "uint8"), buffer8: T.Buffer((144,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             v1a = T.var("int32")
diff --git a/tests/python/contrib/test_ethosu/test_remove_concatenates.py b/tests/python/contrib/test_ethosu/test_remove_concatenates.py
index 64777aa0fb71..ef034930d7bc 100644
--- a/tests/python/contrib/test_ethosu/test_remove_concatenates.py
+++ b/tests/python/contrib/test_ethosu/test_remove_concatenates.py
@@ -31,7 +31,7 @@
 @tvm.script.ir_module
 class ReferenceModule:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1,8,12,16), "int8"], input_placeholder_1: T.Buffer[(1,8,10,16), "int8"], input_T_concat: T.Buffer[(1,8,32,16), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1,8,12,16), "int8"), input_placeholder_1: T.Buffer((1,8,10,16), "int8"), input_T_concat: T.Buffer((1,8,32,16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
 
diff --git a/tests/python/contrib/test_ethosu/test_replace_conv2d.py b/tests/python/contrib/test_ethosu/test_replace_conv2d.py
index ffa6d6effd79..6bcea7008c86 100644
--- a/tests/python/contrib/test_ethosu/test_replace_conv2d.py
+++ b/tests/python/contrib/test_ethosu/test_replace_conv2d.py
@@ -367,7 +367,7 @@ def _visit(stmt):
 @tvm.script.ir_module
 class Conv2dDoubleCascade1:
     @T.prim_func
-    def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 8), "int8"]) -> None:
+    def main(input_placeholder_5: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 8, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([304], "uint8")
@@ -389,7 +389,7 @@ def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write
 @tvm.script.ir_module
 class Conv2dDoubleCascade2:
     @T.prim_func
-    def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 8), "int8"]) -> None:
+    def main(input_placeholder_5: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 8, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([80], "uint8")
@@ -411,7 +411,7 @@ def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write
 @tvm.script.ir_module
 class Conv2dDoubleCascade3:
     @T.prim_func
-    def main(input_placeholder_5: T.Buffer[(1, 16, 16, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 20, 4, 8), "int8"]) -> None:
+    def main(input_placeholder_5: T.Buffer((1, 16, 16, 3), "int8"), input_ethosu_write_1: T.Buffer((1, 20, 4, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([1744], "uint8")
@@ -436,7 +436,7 @@ def main(input_placeholder_5: T.Buffer[(1, 16, 16, 3), "int8"], input_ethosu_wri
 @tvm.script.ir_module
 class Conv2dDoubleCascade4:
     @T.prim_func
-    def main(input_placeholder_5: T.Buffer[(1, 8, 1, 8, 16), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 2, 8, 16), "int8"]) -> None:
+    def main(input_placeholder_5: T.Buffer((1, 8, 1, 8, 16), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 2, 8, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([1456], "uint8")
@@ -458,7 +458,7 @@ def main(input_placeholder_5: T.Buffer[(1, 8, 1, 8, 16), "int8"], input_ethosu_w
 @tvm.script.ir_module
 class Conv2dDoubleCascade5:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write: T.Buffer[(1, 32, 32, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write: T.Buffer((1, 32, 32, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
@@ -480,7 +480,7 @@ def main(input_placeholder: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write:
 @tvm.script.ir_module
 class Conv2dDoubleCascade6:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 8, 1, 8, 16), "int8"], input_ethosu_write: T.Buffer[(1, 32, 2, 32, 16), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 8, 1, 8, 16), "int8"), input_ethosu_write: T.Buffer((1, 32, 2, 32, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([1456], "uint8")
@@ -644,7 +644,7 @@ def _get_func(
 @tvm.script.ir_module
 class Conv2dInlineCopy1:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(1, 10, 12, 8), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 16), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((1, 10, 12, 8), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 8, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([848], "uint8")
@@ -659,7 +659,7 @@ def main(input_placeholder_3: T.Buffer[(1, 10, 12, 8), "int8"], input_ethosu_wri
 @tvm.script.ir_module
 class Conv2dInlineCopy2:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(1, 7, 9, 5), "int8"], input_ethosu_write_1: T.Buffer[(1, 3, 5, 16), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((1, 7, 9, 5), "int8"), input_ethosu_write_1: T.Buffer((1, 3, 5, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
@@ -703,7 +703,7 @@ def _get_func(ifm_shape, lower, upper, ofm_channels=16):
 @tvm.script.ir_module
 class Conv2dInlineReshape1:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(4, 6, 8, 1), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((4, 6, 8, 1), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
@@ -719,7 +719,7 @@ def main(input_placeholder_3: T.Buffer[(4, 6, 8, 1), "int8"], input_ethosu_write
 @tvm.script.ir_module
 class Conv2dInlineReshape2:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(1, 24, 8), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((1, 24, 8), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
@@ -735,7 +735,7 @@ def main(input_placeholder_3: T.Buffer[(1, 24, 8), "int8"], input_ethosu_write_1
 @tvm.script.ir_module
 class Conv2dInlineReshape3:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(192, 1), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((192, 1), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
@@ -751,7 +751,7 @@ def main(input_placeholder_3: T.Buffer[(192, 1), "int8"], input_ethosu_write_1:
 @tvm.script.ir_module
 class Conv2dInlineReshape4:
     @T.prim_func
-    def main(placeholder_3: T.Buffer[(192,), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
+    def main(placeholder_3: T.Buffer((192,), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
diff --git a/tests/python/contrib/test_ethosu/test_replace_copy.py b/tests/python/contrib/test_ethosu/test_replace_copy.py
index 29e1f9814c81..94763c5d3fbf 100644
--- a/tests/python/contrib/test_ethosu/test_replace_copy.py
+++ b/tests/python/contrib/test_ethosu/test_replace_copy.py
@@ -34,7 +34,7 @@
 @tvm.script.ir_module
 class ReferenceModule:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write_1: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write_1: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer_1 = T.Buffer([384], "uint8")
@@ -78,7 +78,7 @@ def _get_func():
 @tvm.script.ir_module
 class WeightStream:
     @T.prim_func
-    def main(input_placeholder_5: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write_1: T.Buffer[(1, 16, 16, 16), "int8"]) -> None:
+    def main(input_placeholder_5: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write_1: T.Buffer((1, 16, 16, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([528], "uint8")
diff --git a/tests/python/contrib/test_ethosu/test_scheduler.py b/tests/python/contrib/test_ethosu/test_scheduler.py
index eb3f4c7c2a31..21f0b7c157ae 100644
--- a/tests/python/contrib/test_ethosu/test_scheduler.py
+++ b/tests/python/contrib/test_ethosu/test_scheduler.py
@@ -180,7 +180,7 @@ def test_schedule_cache_reads():
 @tvm.script.ir_module
 class DiamondGraphTir:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 56, 56, 96), "int8"], input_ethosu_write: T.Buffer[(1, 56, 56, 24), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 56, 56, 96), "int8"), input_ethosu_write: T.Buffer((1, 56, 56, 24), "int8")) -> None:
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         placeholder = T.Buffer([301056], dtype='int8', data=input_placeholder.data)
         ethosu_write = T.Buffer([75264], dtype='int8', data=input_ethosu_write.data)
diff --git a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py b/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
index 632fe0017f95..22f886a5917a 100644
--- a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
+++ b/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
@@ -33,7 +33,7 @@
 @tvm.script.ir_module
 class SingleEthosUConv2D:
     @T.prim_func
-    def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(1024,), "int8"]) -> None:
+    def main(placeholder_3: T.Buffer((8192,), "int8"), ethosu_conv2d_1: T.Buffer((1024,), "int8")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         placeholder_4 = T.Buffer([1], "uint8")
@@ -48,7 +48,7 @@ def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(10
 @tvm.script.ir_module
 class MultiEthosUConv2D:
     @T.prim_func
-    def main(placeholder_6: T.Buffer[(192,), "int8"], ethosu_conv2d_1: T.Buffer[(512,), "int8"]) -> None:
+    def main(placeholder_6: T.Buffer((192,), "int8"), ethosu_conv2d_1: T.Buffer((512,), "int8")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         placeholder_9 = T.Buffer([1], "uint8")
@@ -70,7 +70,7 @@ def main(placeholder_6: T.Buffer[(192,), "int8"], ethosu_conv2d_1: T.Buffer[(512
 @tvm.script.ir_module
 class MultiEthosUCopy:
     @T.prim_func
-    def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(2048,), "int8"]) -> None:
+    def main(placeholder_3: T.Buffer((8192,), "int8"), ethosu_conv2d_1: T.Buffer((2048,), "int8")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         placeholder_5 = T.Buffer([1], "int32")
@@ -89,7 +89,7 @@ def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(20
 @tvm.script.ir_module
 class WeightStreamOnly:
     @T.prim_func
-    def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,), "int8"]) -> None:
+    def main(placeholder: T.Buffer((8192,), "int8"), ethosu_write: T.Buffer((2048,), "int8")) -> None:
         buffer = T.Buffer([1], "uint8")
         buffer_1 = T.Buffer([1], "uint8")
         buffer_2 = T.Buffer([1], "uint8")
@@ -135,7 +135,7 @@ def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,),
 @tvm.script.ir_module
 class MixedRead:
     @T.prim_func
-    def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,), "int8"]) -> None:
+    def main(placeholder: T.Buffer((8192,), "int8"), ethosu_write: T.Buffer((2048,), "int8")) -> None:
         buffer = T.Buffer([1], "uint8")
         buffer_1 = T.Buffer([1], "uint8")
         buffer_2 = T.Buffer([1], "uint8")
@@ -672,7 +672,7 @@ def populate_ethosu_copy_calls(stmt):
 @tvm.script.ir_module
 class MixedConstantDatatypes:
     @T.prim_func
-    def main(placeholder_4: T.Buffer[(2048,), "int8"], ethosu_write_1: T.Buffer[(16,), "int8"]) -> None:
+    def main(placeholder_4: T.Buffer((2048,), "int8"), ethosu_write_1: T.Buffer((16,), "int8")) -> None:
         buffer = T.Buffer([1], "uint8")
         buffer_1 = T.Buffer([1], "uint8")
         buffer_2 = T.Buffer([1], "int16")
diff --git a/tests/python/contrib/test_hexagon/test_async_dma_pipeline.py b/tests/python/contrib/test_hexagon/test_async_dma_pipeline.py
index 2b6bca008e05..bc8edca7d844 100644
--- a/tests/python/contrib/test_hexagon/test_async_dma_pipeline.py
+++ b/tests/python/contrib/test_hexagon/test_async_dma_pipeline.py
@@ -28,14 +28,14 @@
 # pylint: disable=invalid-name
 @T.prim_func
 def conv2d_async_non_contig(
-    p0: T.Buffer[(T.int64(1), T.int64(1), T.int64(56), T.int64(56), T.int64(4)), "uint8"],
-    fused_constant_1: T.Buffer[
+    p0: T.Buffer((T.int64(1), T.int64(1), T.int64(56), T.int64(56), T.int64(4)), "uint8"),
+    fused_constant_1: T.Buffer(
         (T.int64(1), T.int64(1), T.int64(3), T.int64(3), T.int64(1), T.int64(32), T.int64(4)),
         "uint8",
-    ],
-    conv2d_NCHWc_int8: T.Buffer[
+    ),
+    conv2d_NCHWc_int8: T.Buffer(
         (T.int64(1), T.int64(1), T.int64(54), T.int64(54), T.int64(32)), "int32"
-    ],
+    ),
 ):
     """Non contiguous memory access is used in this conv2d taken from MS."""
     # pylint: disable=no-self-argument
@@ -538,9 +538,9 @@ class ModulePipelined:
     # pylint: disable=no-self-argument
     @T.prim_func
     def main(
-        p0_buffer: T.Buffer[(1, 1, 230, 230, 4), "uint8"],
-        p1_buffer: T.Buffer[(2, 1, 7, 7, 1, 32, 4), "int8"],
-        t_cast: T.Buffer[(1, 2, 112, 112, 32), "int32"],
+        p0_buffer: T.Buffer((1, 1, 230, 230, 4), "uint8"),
+        p1_buffer: T.Buffer((2, 1, 7, 7, 1, 32, 4), "int8"),
+        t_cast: T.Buffer((1, 2, 112, 112, 32), "int32"),
     ) -> None:
         # pylint: disable=missing-function-docstring
         # function attr dict
@@ -690,9 +690,9 @@ class ModuleBase:
     # pylint: disable=no-self-argument
     @T.prim_func
     def main(
-        p0_buffer: T.Buffer[(1, 1, 230, 230, 4), "uint8"],
-        p1_buffer: T.Buffer[(2, 1, 7, 7, 1, 32, 4), "int8"],
-        t_cast: T.Buffer[(1, 2, 112, 112, 32), "int32"],
+        p0_buffer: T.Buffer((1, 1, 230, 230, 4), "uint8"),
+        p1_buffer: T.Buffer((2, 1, 7, 7, 1, 32, 4), "int8"),
+        t_cast: T.Buffer((1, 2, 112, 112, 32), "int32"),
     ) -> None:
         # pylint: disable=missing-function-docstring
         # function attr dict
diff --git a/tests/python/contrib/test_hexagon/test_meta_schedule.py b/tests/python/contrib/test_hexagon/test_meta_schedule.py
index 1089f0f03589..a64f0fc28653 100644
--- a/tests/python/contrib/test_hexagon/test_meta_schedule.py
+++ b/tests/python/contrib/test_hexagon/test_meta_schedule.py
@@ -241,9 +241,9 @@ class ModuleVRMPYAutoTensorize:
     # pylint: disable=no-self-argument
     @T.prim_func
     def main(  # type: ignore
-        X: T.Buffer[(128, 768), "uint8"],  # type: ignore
-        packed_width: T.Buffer[(24, 192, 32, 4), "uint8"],  # type: ignore
-        compute: T.Buffer[(128, 768), "int32"],  # type: ignore
+        X: T.Buffer((128, 768), "uint8"),  # type: ignore
+        packed_width: T.Buffer((24, 192, 32, 4), "uint8"),  # type: ignore
+        compute: T.Buffer((128, 768), "int32"),  # type: ignore
     ) -> None:
         # pylint: disable=missing-function-docstring
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/contrib/test_hexagon/test_software_pipeline_async.py b/tests/python/contrib/test_hexagon/test_software_pipeline_async.py
index c831472a521d..7c010f363fe1 100644
--- a/tests/python/contrib/test_hexagon/test_software_pipeline_async.py
+++ b/tests/python/contrib/test_hexagon/test_software_pipeline_async.py
@@ -31,7 +31,7 @@ def compute(comp_type, outer, inner, dtype):
 
         @T.prim_func
         def a_plus_1_primfunc(
-            a_buffer: T.Buffer[(outer, inner), dtype], out: T.Buffer[(outer, inner), dtype]
+            a_buffer: T.Buffer((outer, inner), dtype), out: T.Buffer((outer, inner), dtype)
         ):
             for i in T.serial(outer):
                 for j in T.serial(inner):
@@ -44,9 +44,9 @@ def a_plus_1_primfunc(
 
         @T.prim_func
         def a_plus_b_plus_1_primfunc(
-            a_buffer: T.Buffer[(outer, inner), dtype],
-            b_buffer: T.Buffer[(outer, inner), dtype],
-            out: T.Buffer[(outer, inner), dtype],
+            a_buffer: T.Buffer((outer, inner), dtype),
+            b_buffer: T.Buffer((outer, inner), dtype),
+            out: T.Buffer((outer, inner), dtype),
         ):
             for i in T.serial(outer):
                 for j in T.serial(inner):
diff --git a/tests/python/contrib/test_hexagon/test_vtcm.py b/tests/python/contrib/test_hexagon/test_vtcm.py
index e71f890740c1..a549588e5768 100644
--- a/tests/python/contrib/test_hexagon/test_vtcm.py
+++ b/tests/python/contrib/test_hexagon/test_vtcm.py
@@ -24,7 +24,7 @@
 
 
 @T.prim_func
-def scale_by_two(buffer_a: T.Buffer[(8192,), "int8"], buffer_c: T.Buffer[(8192,), "int8"]):
+def scale_by_two(buffer_a: T.Buffer((8192,), "int8"), buffer_c: T.Buffer((8192,), "int8")):
     for i in T.serial(
         0,
         8192,
diff --git a/tests/python/frontend/darknet/test_forward.py b/tests/python/frontend/darknet/test_forward.py
index 58695e1fd63f..e78e35ff5c7c 100644
--- a/tests/python/frontend/darknet/test_forward.py
+++ b/tests/python/frontend/darknet/test_forward.py
@@ -63,9 +63,9 @@ def astext(program, unify_free_vars=False):
     """check that program is parsable in text format"""
     text = program.astext()
     if isinstance(program, relay.Expr):
-        roundtrip_program = tvm.parser.parse_expr(text)
+        roundtrip_program = tvm.relay.parse_expr(text)
     else:
-        roundtrip_program = tvm.parser.fromtext(text)
+        roundtrip_program = tvm.relay.fromtext(text)
 
     tvm.ir.assert_structural_equal(roundtrip_program, program, map_free_vars=True)
 
diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py
index 1e1bd435d51f..1ca0f3faef47 100755
--- a/tests/python/frontend/tensorflow/test_forward.py
+++ b/tests/python/frontend/tensorflow/test_forward.py
@@ -5795,7 +5795,7 @@ def @main(%A: Tensor[(4, 176, 8, 8), float32]) {
         divide(%528, %533) /* truediv */
     }
     """
-    mod_golden = tvm.parser.parse('#[version = "0.0.5"]\n' + program)
+    mod_golden = tvm.relay.parse('#[version = "0.0.5"]\n' + program)
     tvm.ir.assert_structural_equal(mod["main"].body, mod_golden["main"].body, map_free_vars=True)
 
 
diff --git a/tests/python/integration/test_legacy_tuning.py b/tests/python/integration/test_legacy_tuning.py
index 04c5f85ce5d4..5dc6aa2106a8 100644
--- a/tests/python/integration/test_legacy_tuning.py
+++ b/tests/python/integration/test_legacy_tuning.py
@@ -340,7 +340,7 @@ def test_tuning_cpu():
     """Test tuning on cpu."""
 
     def runner():
-        ir_mod = tvm.parser.fromtext(
+        ir_mod = tvm.relay.fromtext(
             textwrap.dedent(
                 """
             #[version = "0.0.5"]
diff --git a/tests/python/relay/aot/test_aot_create_executor_metadata.py b/tests/python/relay/aot/test_aot_create_executor_metadata.py
index 0ef4449541f8..1bc79fe2a607 100644
--- a/tests/python/relay/aot/test_aot_create_executor_metadata.py
+++ b/tests/python/relay/aot/test_aot_create_executor_metadata.py
@@ -53,7 +53,7 @@ def test_create_executor_metadata_single_func():
     class Module:
         @T.prim_func
         def __tvm_main__(
-            a: T.handle, output: T.handle, workspace: T.Ptr[T.uint8], constants: T.Ptr[T.uint8]
+            a: T.handle, output: T.handle, workspace: T.Ptr(T.uint8), constants: T.Ptr(T.uint8)
         ) -> None:
             # function attr dict
             T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind": "llvm", "tag": "", "keys": ["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": ["test_device"]})
diff --git a/tests/python/relay/aot/test_cpp_aot.py b/tests/python/relay/aot/test_cpp_aot.py
index 89c34eaac8b6..0c5931a55d31 100644
--- a/tests/python/relay/aot/test_cpp_aot.py
+++ b/tests/python/relay/aot/test_cpp_aot.py
@@ -81,7 +81,7 @@ def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5),
         }
     """
     )
-    ir_mod = tvm.parser.fromtext(relay_model)
+    ir_mod = tvm.relay.fromtext(relay_model)
 
     main_func = ir_mod["main"]
     shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
index 2e7e23ead65f..d99d6173bc5f 100644
--- a/tests/python/relay/aot/test_crt_aot.py
+++ b/tests/python/relay/aot/test_crt_aot.py
@@ -773,7 +773,7 @@ def test_aot_codegen_backend_alloc_workspace_calls():
     # small tensors that would get lowered to stack allocations in the CPU PrimFuncs.
     # However, the AoT executor codegen should retain them as TVMBAW calls
     # pylint: disable=line-too-long
-    relay_mod = tvm.parser.fromtext(
+    relay_mod = tvm.relay.fromtext(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(1, 4, 4, 4), float32], %weight: Tensor[(4, 4, 3, 3), float32], src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 4, 4, 4), float32] {
diff --git a/tests/python/relay/aot/test_crt_aot_usmp.py b/tests/python/relay/aot/test_crt_aot_usmp.py
index 75613d81e145..12c60a726651 100644
--- a/tests/python/relay/aot/test_crt_aot_usmp.py
+++ b/tests/python/relay/aot/test_crt_aot_usmp.py
@@ -456,7 +456,7 @@ def test_tflite_model_u3_usecase_single_external_pool(model_url, usmp_algo):
 def test_tflite_model_u3_usecase_conv2d_var_cons(usmp_algo):
     """This checks for inference using workspace and constant pools placed in the application"""
 
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """\
         #[version = "0.0.5"]
         def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) {
diff --git a/tests/python/relay/aot/test_pass_aot_lower_main.py b/tests/python/relay/aot/test_pass_aot_lower_main.py
index b523e019299c..f2455e97a051 100644
--- a/tests/python/relay/aot/test_pass_aot_lower_main.py
+++ b/tests/python/relay/aot/test_pass_aot_lower_main.py
@@ -52,7 +52,7 @@ def _assert_lowered_main(mod, main_func, call_type, print_script=False):
 
 
 def test_single_call_cpacked():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
@@ -79,7 +79,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_single_call_packed():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
@@ -106,7 +106,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_single_call_unpacked():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
@@ -133,7 +133,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_constant():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) { %x }
@@ -164,7 +164,7 @@ def func(a: T.handle, output: T.handle) -> None:
 # TODO(@mbaret) There seems to be a TVMScript round-trip bug causing this to fail
 @pytest.mark.xfail()
 def test_copy_to_output():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
@@ -178,13 +178,13 @@ def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
     def func(a: T.handle, output: T.handle) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": []})
-        tmp_read = T.buffer_var("uint8", "")
+        tmp_read = T.Ptr("uint8", "")
         # buffer definition
         tmp_read_1 = T.Buffer([T.uint64(140)], dtype="uint8", data=tmp_read)
         a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
         output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
         # body
-        tmp_write: T.Ptr[T.uint8] = output_buffer.data
+        tmp_write: T.Ptr(T.uint8) = output_buffer.data
         tmp_write_1 = T.Buffer([T.uint64(140)], dtype="uint8", data=tmp_write)
         for i in T.serial(140):
             tmp_write_1[i] = T.let(tmp_read, a_buffer.data, tmp_read_1[i])
@@ -194,7 +194,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_two_calls():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
@@ -225,7 +225,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_tuple_output():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) { (%x, %x) }
@@ -253,7 +253,7 @@ def func(a: T.handle, output0: T.handle, output1: T.handle) -> None:
 
 
 def test_tuple_intermediate():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add_0(%x: Tensor[(5, 7), float32]) -> (Tensor[(5, 7), float32], Tensor[(5, 7), float32]) { (%x, %x) }
@@ -286,7 +286,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_multi_input():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) { %x }
@@ -314,7 +314,7 @@ def func(a: T.handle, b: T.handle, output: T.handle) -> None:
 
 
 def test_let_binding():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { %x }
@@ -342,7 +342,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_let_binding_branch():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add_0(%x: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { %x }
@@ -383,7 +383,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_device_hooks():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { %x }
diff --git a/tests/python/relay/backend/test_pass_lower_te.py b/tests/python/relay/backend/test_pass_lower_te.py
index d439f22b1246..89bd62fe5aa8 100644
--- a/tests/python/relay/backend/test_pass_lower_te.py
+++ b/tests/python/relay/backend/test_pass_lower_te.py
@@ -50,7 +50,7 @@ def transform(mod):
 
 
 def test_lower_primitive():
-    input_mod = tvm.parser.parse(
+    input_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
@@ -95,7 +95,7 @@ def test_lower_compiler():
     def relay_ext_test_pass_lower_te(func):
         return None
 
-    input_mod = tvm.parser.parse(
+    input_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
@@ -140,7 +140,7 @@ def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
 
 
 def test_lower_extern():
-    input_mod = tvm.parser.parse(
+    input_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
@@ -183,7 +183,7 @@ def @my_add(%x : Tensor[(5, 7), float32], %y : Tensor[(5, 7), float32], Extern=1
 
 
 def test_lower_extern_with_dynamic_shape():
-    input_mod = tvm.parser.parse(
+    input_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(?, ?), float32] {
diff --git a/tests/python/relay/backend/test_pass_remove_standalone_reshapes.py b/tests/python/relay/backend/test_pass_remove_standalone_reshapes.py
index 2113ae7b5c72..8b1b10d68e16 100644
--- a/tests/python/relay/backend/test_pass_remove_standalone_reshapes.py
+++ b/tests/python/relay/backend/test_pass_remove_standalone_reshapes.py
@@ -75,7 +75,7 @@ def reshape_primfunc(a: T.handle, d: T.handle) -> None:
 
     reshape_gv = relay.GlobalVar("reshape", type_annot=reshape_ty)
     mod[reshape_gv] = reshape_primfunc
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(128, 128), float32],
@@ -141,7 +141,7 @@ def reshape_primfunc(a: T.handle, d: T.handle) -> None:
 
     reshape_gv = relay.GlobalVar("reshape", type_annot=reshape_ty)
     mod[reshape_gv] = reshape_primfunc
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(128, 128), float32],
@@ -221,7 +221,7 @@ def fused_reshape_primfunc(a: T.handle, d: T.handle) -> None:
 
     reshape_gv = relay.GlobalVar("fused_reshape", type_annot=reshape_ty)
     mod[reshape_gv] = fused_reshape_primfunc
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(128, 128), float32],
diff --git a/tests/python/relay/collage/menangerie.py b/tests/python/relay/collage/menangerie.py
index d5275fbd34c5..e74059282e3e 100644
--- a/tests/python/relay/collage/menangerie.py
+++ b/tests/python/relay/collage/menangerie.py
@@ -86,7 +86,7 @@ def mnist_consts(dtype):
 
 def mnist():
     metatable = {"relay.Constant": mnist_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1, 1, 28, 28), float32]) -> Tensor[(1, 10), float32] {
@@ -301,7 +301,7 @@ def gpt2_consts(dtype):
 
 def gpt2():
     metatable = {"relay.Constant": gpt2_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1, 50, 32), int64]) -> (Tensor[(1, 50, 32, 768), float32],
@@ -1252,7 +1252,7 @@ def @main(%x: Tensor[(1, 50, 32), int64]) -> (Tensor[(1, 50, 32, 768), float32],
 
 def gpt2_16():
     metatable = {"relay.Constant": gpt2_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1, 50, 32), int64]) -> (Tensor[(1, 50, 32, 768), float16],
@@ -2218,7 +2218,7 @@ def gpt2_extract_consts(dtype):
 
 def gpt2_extract():
     metatable = {"relay.Constant": gpt2_extract_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1600, 768), float32]) -> Tensor[(50, 32, 3072), float32] {
@@ -2265,7 +2265,7 @@ def @main(%x: Tensor[(1600, 768), float32]) -> Tensor[(50, 32, 3072), float32] {
 
 def gpt2_extract_16():
     metatable = {"relay.Constant": gpt2_extract_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1600, 768), float16]) -> Tensor[(50, 32, 3072), float16] {
@@ -2325,7 +2325,7 @@ def gpt2_16_for_cutlass_extract_consts(dtype):
 
 def gpt2_16_for_cutlass_extract():
     metatable = {"relay.Constant": gpt2_16_for_cutlass_extract_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0: Tensor[(1600, 768), float16],
@@ -2622,7 +2622,7 @@ def resnet50_consts(dtype):
 
 def resnet50():
     metatable = {"relay.Constant": resnet50_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 1000), float32] {
@@ -2870,7 +2870,7 @@ def @main(%data: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 1000), float32
 
 def resnet50_16():
     metatable = {"relay.Constant": resnet50_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(1, 3, 224, 224), float16]) -> Tensor[(1, 1000), float16] {
@@ -3392,7 +3392,7 @@ def mobilenet_consts(dtype):
 
 def mobilenet():
     metatable = {"relay.Constant": mobilenet_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 1000), float32] {
@@ -3622,7 +3622,7 @@ def @main(%data: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 1000), float32
 
 def mobilenet_16():
     metatable = {"relay.Constant": mobilenet_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(1, 3, 224, 224), float16]) -> Tensor[(1, 1000), float16] {
@@ -3861,7 +3861,7 @@ def batch_norm_extract():
         ],
     )
     metatable = {"relay.Constant": consts}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%FunctionVar_0: Tensor[(1, 32, 112, 112), float32]) -> Tensor[(1, 32, 112, 112), float32] {
@@ -3945,7 +3945,7 @@ def resnext50_32x4d_consts(dtype):
 
 def resnext50_32x4d():
     metatable = {"relay.Constant": resnext50_32x4d_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1, 64, 56, 56), float32]) {
@@ -4083,7 +4083,7 @@ def @main(%x: Tensor[(1, 64, 56, 56), float32]) {
 
 def resnext50_32x4d_16():
     metatable = {"relay.Constant": resnext50_32x4d_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1, 64, 56, 56), float16]) {
diff --git a/tests/python/relay/collage/test_sub_graph.py b/tests/python/relay/collage/test_sub_graph.py
index 21f12c43dccd..785bdf750169 100644
--- a/tests/python/relay/collage/test_sub_graph.py
+++ b/tests/python/relay/collage/test_sub_graph.py
@@ -53,7 +53,7 @@ def run(in_mod, expected_mod, max_outputs, allow_taps, compiler, map):
 
 def test_single_op():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -66,7 +66,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -83,7 +83,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
 
 def test_multi_output():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -96,7 +96,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -117,7 +117,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
 
 def test_classic_conv2d_add_relu():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32],
@@ -131,7 +131,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32],
@@ -151,7 +151,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
 
 def test_diamond_single_output():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -165,7 +165,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -185,7 +185,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
 
 def test_diamond_multi_output():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -199,7 +199,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -222,7 +222,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
 
 def test_with_tap():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -234,7 +234,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -258,7 +258,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
 
 def test_no_cycles():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
@@ -270,7 +270,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
@@ -291,7 +291,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
 
 def test_labels_direct_connection():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32]) {
@@ -309,7 +309,7 @@ def @main(%a: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32]) {
@@ -337,7 +337,7 @@ def @main(%a: Tensor[(5, 7), float32]) {
 
 def test_labels_nested_tap():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32]) {
@@ -355,7 +355,7 @@ def @main(%a: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32]) {
diff --git a/tests/python/relay/dyn/test_dynamic_op_level3.py b/tests/python/relay/dyn/test_dynamic_op_level3.py
index ab562f0f49f5..afc42c778a72 100644
--- a/tests/python/relay/dyn/test_dynamic_op_level3.py
+++ b/tests/python/relay/dyn/test_dynamic_op_level3.py
@@ -428,7 +428,7 @@ def verify_sparse_fill_empty_rows(
 def test_dyn_copy():
     target = tvm.target.Target("llvm")
     dev = tvm.cpu()
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(?, 3), int64]) -> Tensor[(?, 3), int64] {
@@ -444,7 +444,7 @@ def @main(%x: Tensor[(?, 3), int64]) -> Tensor[(?, 3), int64] {
 def test_dyn_copy_scalar():
     target = tvm.target.Target("llvm")
     dev = tvm.cpu()
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """
         #[version = "0.0.5"]
         def @main(%x: int32, %y: Tensor[(?), int32]) -> Tensor[(?), int32] {
@@ -464,7 +464,7 @@ def @main(%x: int32, %y: Tensor[(?), int32]) -> Tensor[(?), int32] {
 def test_dyn_cast():
     target = tvm.target.Target("llvm")
     dev = tvm.cpu()
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(?, 3), int64]) -> Tensor[(?, 3), int32] {
diff --git a/tests/python/relay/test_backend_graph_executor.py b/tests/python/relay/test_backend_graph_executor.py
index 179077e8742d..fc6ec59a6d51 100644
--- a/tests/python/relay/test_backend_graph_executor.py
+++ b/tests/python/relay/test_backend_graph_executor.py
@@ -196,7 +196,7 @@ def test_plan_2d_memory():
         ]
     }
 
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data1: Tensor[(1, 32, 40, 40), float32],
diff --git a/tests/python/relay/test_dataflow_pattern.py b/tests/python/relay/test_dataflow_pattern.py
index a11673bf6930..1bd05f5258b1 100644
--- a/tests/python/relay/test_dataflow_pattern.py
+++ b/tests/python/relay/test_dataflow_pattern.py
@@ -1826,7 +1826,7 @@ def test_matched_outside_but_dominated():
     """In this example the pattern matches the nn.conv2d/add/multiply flow. Even though the
     add output is consumed by the sigmoid, the sigmoid itself is dominated by the multiply.
     So partitioning can proceed, all be it with a duplication of the add."""
-    in_mod = tvm.parser.parse(
+    in_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(16, 16, 32, 32), float16], %weight: Tensor[(32, 16, 3, 3), float16], %bias: Tensor[(32), float32]) -> Tensor[(16, 32, 32, 32), float32] {
@@ -1843,7 +1843,7 @@ def @main(%data: Tensor[(16, 16, 32, 32), float16], %weight: Tensor[(32, 16, 3,
         }
         """
     )
-    expected_mod = tvm.parser.parse(
+    expected_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(16, 16, 32, 32), float16], %weight: Tensor[(32, 16, 3, 3), float16], %bias: Tensor[(32), float32]) -> Tensor[(16, 32, 32, 32), float32] {
diff --git a/tests/python/relay/test_ir_parser.py b/tests/python/relay/test_ir_parser.py
index 08fa01f0b39b..7e8f8c54f486 100644
--- a/tests/python/relay/test_ir_parser.py
+++ b/tests/python/relay/test_ir_parser.py
@@ -75,18 +75,18 @@ def graph_equal(lhs, rhs):
 
 def roundtrip_expr(expr):
     text = expr.astext()
-    x = tvm.parser.parse_expr(text)
+    x = tvm.relay.parse_expr(text)
     assert_graph_equal(x, expr)
 
 
 # Testing Utilities for expressions.
 def roundtrip(expr):
-    x = tvm.parser.fromtext(expr.astext())
+    x = tvm.relay.fromtext(expr.astext())
     assert_graph_equal(x, expr)
 
 
 def parse_text(code):
-    expr = tvm.parser.parse_expr(code)
+    expr = tvm.relay.parse_expr(code)
     roundtrip_expr(expr)
     return expr
 
@@ -100,7 +100,7 @@ def parses_as(code, expr):
 
 # Testing Utilities for full modules.
 def parse_module(code):
-    mod = tvm.parser.parse(SEMVER + code)
+    mod = tvm.relay.parse(SEMVER + code)
     roundtrip(mod)
     return mod
 
@@ -423,7 +423,7 @@ def @main(%x: float32) {
         ref_read(%0)
     }
     """
-    tvm.parser.parse(program)
+    tvm.relay.parse(program)
 
 
 def test_call():
@@ -868,7 +868,7 @@ def test_import_grad():
 def test_mlp():
     mod, _ = relay.testing.mlp.get_workload(1)
     text = mod.astext()
-    parsed_mod = tvm.parser.parse(text)
+    parsed_mod = tvm.relay.parse(text)
     tvm.ir.assert_structural_equal(mod, parsed_mod)
 
 
@@ -893,7 +893,7 @@ def test_mlp_inlined_params():
     mod = inline_params(mod, params)
     mod = relay.transform.InferType()(mod)
     text = mod.astext()
-    parsed_mod = tvm.parser.parse(text)
+    parsed_mod = tvm.relay.parse(text)
     tvm.ir.assert_structural_equal(mod, parsed_mod)
 
 
@@ -945,7 +945,7 @@ def test_op_string_attr():
 def test_load_prelude():
     mod = tvm.IRModule()
     mod.import_from_std("prelude.rly")
-    tvm.parser.parse(mod.astext())
+    tvm.relay.parse(mod.astext())
 
 
 def test_call_attrs():
@@ -1006,7 +1006,7 @@ def test_func_attrs():
 
 def test_init_module_and_metatable():
     init_metatable = {"relay.Constant": [relay.const(np.random.rand(2, 3), dtype="float32")]}
-    init_module = tvm.parser.fromtext(
+    init_module = tvm.relay.fromtext(
         SEMVER
         + """
             def @f(%y : Tensor[(2, 3), float32]) -> Tensor[(2, 3), float32] {
@@ -1014,7 +1014,7 @@ def @f(%y : Tensor[(2, 3), float32]) -> Tensor[(2, 3), float32] {
             }
         """,
     )
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         SEMVER
         + """
             def @main(%x: Tensor[(2, 3), float32]) {
diff --git a/tests/python/relay/test_ir_text_printer.py b/tests/python/relay/test_ir_text_printer.py
index ba3b2b348acc..b1599c1b919f 100644
--- a/tests/python/relay/test_ir_text_printer.py
+++ b/tests/python/relay/test_ir_text_printer.py
@@ -33,9 +33,9 @@ def astext(program, unify_free_vars=False):
     text = program.astext()
 
     if isinstance(program, Expr):
-        roundtrip_program = tvm.parser.parse_expr(text)
+        roundtrip_program = tvm.relay.parse_expr(text)
     else:
-        roundtrip_program = tvm.parser.fromtext(text)
+        roundtrip_program = tvm.relay.fromtext(text)
 
     tvm.ir.assert_structural_equal(roundtrip_program, program, map_free_vars=True)
 
@@ -254,7 +254,7 @@ def @main[A]() -> fn (A, List[A]) -> List[A] {
   Cons
 }
     """
-    mod = tvm.parser.parse(SEMVER + type_def_str + main_def_str)
+    mod = tvm.relay.parse(SEMVER + type_def_str + main_def_str)
     mod_str = str(mod)
     # ensure constructors are printed correctly in type definitions (with their
     # signature) and as exprs (without their signature)
diff --git a/tests/python/relay/test_op_grad_level2.py b/tests/python/relay/test_op_grad_level2.py
index 32e7d2ca3867..bbd851dc9c7f 100644
--- a/tests/python/relay/test_op_grad_level2.py
+++ b/tests/python/relay/test_op_grad_level2.py
@@ -353,7 +353,7 @@ def test_conv2d_backward_weight_infer_type():
     SEMVER = '#[version = "0.0.5"]\n'
 
     for code in [normal_conv_code, depthwise_conv_code]:
-        expr = tvm.parser.parse_expr(SEMVER + code)
+        expr = tvm.relay.parse_expr(SEMVER + code)
         fmod = tvm.IRModule.from_expr(expr)
 
         mod = relay.transform.InferType()(fmod)
diff --git a/tests/python/relay/test_op_level1.py b/tests/python/relay/test_op_level1.py
index 0549f4f2fbcc..e7def019239b 100644
--- a/tests/python/relay/test_op_level1.py
+++ b/tests/python/relay/test_op_level1.py
@@ -901,7 +901,7 @@ def test_extern_concat_injective_fuse():
     # do not have their elem_offset explicitly set as a variable.
 
     # fmt: off
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """
        #[version = "0.0.5"]
        def @main(%p0844: Tensor[(1, 384), int64], %p1652: Tensor[(2016, 128), float16]) {
diff --git a/tests/python/relay/test_op_level10.py b/tests/python/relay/test_op_level10.py
index ed044989ac18..9db1bcf78b2a 100644
--- a/tests/python/relay/test_op_level10.py
+++ b/tests/python/relay/test_op_level10.py
@@ -73,7 +73,7 @@ def test_checkpoint_alpha_equal():
         mod = tvm.transform.Sequential(passes)(tvm.IRModule.from_expr(df))
         df = mod["main"]
 
-    df_parsed = tvm.parser.parse_expr(
+    df_parsed = tvm.relay.parse_expr(
         """
         #[version = "0.0.5"]
         fn (%x: Tensor[(1), float32], %y: Tensor[(1), float32],
@@ -137,7 +137,7 @@ def test_checkpoint_alpha_equal_tuple():
         mod = tvm.transform.Sequential(passes)(tvm.IRModule.from_expr(df))
         df = mod["main"]
 
-    df_parsed = tvm.parser.parse_expr(
+    df_parsed = tvm.relay.parse_expr(
         """
         #[version = "0.0.5"]
         fn (%x: Tensor[(1), float32], %y: Tensor[(1), float32],
diff --git a/tests/python/relay/test_pass_auto_quantize.py b/tests/python/relay/test_pass_auto_quantize.py
index 24cdabd2a6c3..488866ab6ff8 100644
--- a/tests/python/relay/test_pass_auto_quantize.py
+++ b/tests/python/relay/test_pass_auto_quantize.py
@@ -232,7 +232,7 @@ def _eval_mod(mod):
 
 
 def test_add_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
@@ -247,7 +247,7 @@ def @main(
 
 
 def test_conv2d_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
@@ -266,7 +266,7 @@ def @main(
 
 
 def test_multiple_arg_conversions_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
@@ -295,7 +295,7 @@ def @main(
 
 
 def test_unquantizable_prefix_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
@@ -318,7 +318,7 @@ def @main(
 
 
 def test_unquantizable_core_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
@@ -351,7 +351,7 @@ def @main(
 
 
 def test_unquantizable_suffix_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
diff --git a/tests/python/relay/test_pass_collage_partition.py b/tests/python/relay/test_pass_collage_partition.py
index fa7e0a472a49..f40631628ea5 100644
--- a/tests/python/relay/test_pass_collage_partition.py
+++ b/tests/python/relay/test_pass_collage_partition.py
@@ -91,7 +91,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         nn.relu(%x)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -99,7 +99,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
         nn.relu(%x)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -122,7 +122,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         nn.relu(%x)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -137,7 +137,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
         @collage_example_target_hook_nn_relu(%x)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -164,7 +164,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         add(%1, %2)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_3_txt = """
       #[version = "0.0.5"]
@@ -218,7 +218,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
         @collage_example_target_hook(%5)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_1_txt if byoc_max_depth == 1 else expected_3_txt)
+    expected_mod = tvm.relay.fromtext(expected_1_txt if byoc_max_depth == 1 else expected_3_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -246,7 +246,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         nn.relu(%1)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txts = {
         1: """
@@ -294,7 +294,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
           }
         """,
     }
-    expected_mod = tvm.parser.fromtext(expected_txts[tvm_max_depth])
+    expected_mod = tvm.relay.fromtext(expected_txts[tvm_max_depth])
 
     targets = [
         tvm.target.Target("llvm"),
@@ -322,7 +322,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         nn.relu(%1)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txts = {
         1: """
@@ -373,7 +373,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
           }
         """,
     }
-    expected_mod = tvm.parser.fromtext(expected_txts[byoc_max_depth])
+    expected_mod = tvm.relay.fromtext(expected_txts[byoc_max_depth])
 
     targets = [
         tvm.target.Target("llvm"),
@@ -401,7 +401,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         (%0, %1, %2)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -425,7 +425,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> (Tensor[(10, 10), float32], Tensor[(
         (%6, %5, %7)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -451,7 +451,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         concatenate(%2)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -482,7 +482,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(20, 10), float32] {
         @collage_example_target_hook_concatenate(%8)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -511,7 +511,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         abs(%5)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -544,7 +544,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
         abs(%10)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -570,7 +570,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         add(%1, %2)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -598,7 +598,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
         @collage_example_target_hook_add_add(%5, %4)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -630,7 +630,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         add(%1, %2)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -661,7 +661,7 @@ def @main(%x: Tensor[(10, 10), float32] ) -> Tensor[(10, 10), float32] {
         @collage_example_target_hook_nn_relu_nn_relu_add_add(%x)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
diff --git a/tests/python/relay/test_pass_dead_code_elimination.py b/tests/python/relay/test_pass_dead_code_elimination.py
index abd9be99e3d9..68d2919ec38d 100644
--- a/tests/python/relay/test_pass_dead_code_elimination.py
+++ b/tests/python/relay/test_pass_dead_code_elimination.py
@@ -28,9 +28,9 @@
 
 def optimize_and_check(before_program, after_program, passes):
     if isinstance(before_program, str):
-        before_program = tvm.parser.parse(before_program)
+        before_program = tvm.relay.parse(before_program)
     if isinstance(after_program, str):
-        after_program = tvm.parser.parse(after_program)
+        after_program = tvm.relay.parse(after_program)
     if not isinstance(passes, list):
         passes = [passes]
     optimize = tvm.transform.Sequential(passes)
@@ -229,7 +229,7 @@ def @main() {
 
 def test_impure_op():
     """Don't elide calls to side-effecting operators."""
-    before_program = tvm.parser.parse(
+    before_program = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main() {
@@ -245,7 +245,7 @@ def @main() {
         metatable,
     )
 
-    after_program = tvm.parser.parse(
+    after_program = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main() {
@@ -268,7 +268,7 @@ def @main() {
 
 def test_impure_func():
     """Don't elide calls to side-effecting functions."""
-    before_program = tvm.parser.parse(
+    before_program = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @f() -> int {
@@ -288,7 +288,7 @@ def @main() -> int {
         metatable,
     )
 
-    after_program = tvm.parser.parse(
+    after_program = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @f() -> int {
diff --git a/tests/python/relay/test_pass_defunctionalization.py b/tests/python/relay/test_pass_defunctionalization.py
index a01c1c7e39d7..96c061bd93b1 100644
--- a/tests/python/relay/test_pass_defunctionalization.py
+++ b/tests/python/relay/test_pass_defunctionalization.py
@@ -142,7 +142,7 @@ def @main(%l: Tensor[(5, 5), float32]) -> Tensor[(5, 5), float32] {
   @simple(%0, %l)
 }
 """
-    mod = tvm.parser.fromtext(code)
+    mod = tvm.relay.fromtext(code)
     defunc_mod = defunctionalized(mod)
 
     input = np.random.rand(5, 5).astype("float32")
@@ -174,7 +174,7 @@ def @main(%l: List[float32]) -> List[float32] {
   @map(@id, %l)
 }
 """
-    mod = tvm.parser.fromtext(code)
+    mod = tvm.relay.fromtext(code)
     defunc_mod = defunctionalized(mod)
 
     input = np.random.rand(10).astype("float32")
@@ -212,7 +212,7 @@ def @main(%l: List[int32]) -> int32 {
   @sum(@id, %l)
 }
 """
-    mod = tvm.parser.fromtext(code)
+    mod = tvm.relay.fromtext(code)
     defunc_mod = defunctionalized(mod)
 
     input = np.random.randint(1, 100, 10)
diff --git a/tests/python/relay/test_pass_eta_expand.py b/tests/python/relay/test_pass_eta_expand.py
index 62cc27d9c94b..b1776cb801aa 100644
--- a/tests/python/relay/test_pass_eta_expand.py
+++ b/tests/python/relay/test_pass_eta_expand.py
@@ -25,7 +25,7 @@
 
 
 def test_eta_expand_global_var():
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         r"""
         #[version = "0.0.5"]
         def @aux(%x: Tensor[(), int32]) -> Tensor[(), int32] {
@@ -39,7 +39,7 @@ def @main() -> fn(Tensor[(), int32]) -> Tensor[(), int32] {
     seq = tvm.transform.Sequential([_transform.EtaExpand(expand_global_var=True)])
     with tvm.transform.PassContext(opt_level=3):
         mod = seq(mod)
-    expected = tvm.parser.fromtext(
+    expected = tvm.relay.fromtext(
         r"""
         #[version = "0.0.5"]
         def @aux(%x: Tensor[(), int32]) -> Tensor[(), int32] {
@@ -56,7 +56,7 @@ def @main() -> fn(Tensor[(), int32]) -> Tensor[(), int32] {
 
 
 def test_eta_expand_constructor():
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         r"""
         #[version = "0.0.5"]
         type List[A] {
@@ -73,7 +73,7 @@ def @main[A]() -> fn(A, List[A]) -> List[A] {
     )
     with tvm.transform.PassContext(opt_level=3):
         mod = seq(mod)
-    expected = tvm.parser.fromtext(
+    expected = tvm.relay.fromtext(
         r"""
         #[version = "0.0.5"]
         type List[A] {
diff --git a/tests/python/relay/test_pass_manifest_lifetimes.py b/tests/python/relay/test_pass_manifest_lifetimes.py
index f5b4cab20708..98e203e697be 100644
--- a/tests/python/relay/test_pass_manifest_lifetimes.py
+++ b/tests/python/relay/test_pass_manifest_lifetimes.py
@@ -24,9 +24,9 @@
 
 def optimize_and_check(before_program, after_program, passes):
     if isinstance(before_program, str):
-        before_program = tvm.parser.parse(before_program)
+        before_program = tvm.relay.parse(before_program)
     if isinstance(after_program, str):
-        after_program = tvm.parser.parse(after_program)
+        after_program = tvm.relay.parse(after_program)
     if not isinstance(passes, list):
         passes = [passes]
     optimize = tvm.transform.Sequential(passes)
diff --git a/tests/python/relay/test_pass_plan_devices.py b/tests/python/relay/test_pass_plan_devices.py
index 1c48589a51aa..3ff49389cbdc 100644
--- a/tests/python/relay/test_pass_plan_devices.py
+++ b/tests/python/relay/test_pass_plan_devices.py
@@ -113,7 +113,7 @@ def test_plain():
 
     # Everything defaults to GPU
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -129,7 +129,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32],
@@ -156,7 +156,7 @@ def test_left_add_on_cpu():
 
     # Force some args to be on CPU, rest default to GPU.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -173,7 +173,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -202,7 +202,7 @@ def test_left_add_on_cpu_via_copy():
 
     # As for test_left_add_on_cpu, but with an explicit device_copy.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -219,7 +219,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -248,7 +248,7 @@ def test_left_add_on_cpu_via_copy_as_map():
 
     # As for test_left_add_on_cpu, but with an explicit device_copy.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -272,7 +272,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
     # Same expected result as for test_left_add_on_cpu, but we'll include indexes to help
     # the test make sense.
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], // index 0
@@ -323,7 +323,7 @@ def test_both_adds_on_cpu():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -341,7 +341,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -372,7 +372,7 @@ def test_sharing():
 
     # The same add sub-expression is annotated twice.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
@@ -388,7 +388,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -418,7 +418,7 @@ def test_let_on_cpu():
 
     # The device for a let-bound expression can flow from uses of the let-bound var.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -435,7 +435,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -464,7 +464,7 @@ def test_func_param_on_cpu():
 
     # Devices for function parameters flow to call sites.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -484,7 +484,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -515,7 +515,7 @@ def test_func_result_on_cpu():
 
     # Devices for call sites flow to function results.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -535,7 +535,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -568,7 +568,7 @@ def test_higher_order():
 
     # The constraint on %a flows back to %y via %f and %h
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
@@ -593,7 +593,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -635,7 +635,7 @@ def test_function_in_tuple():
 
     # Since %f ends up in a tuple its argument and result is forced to be on the CPU
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
@@ -655,7 +655,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -686,7 +686,7 @@ def test_device_copy():
     metatable = {"VirtualDevice": [CPU, GPU], "relay.Constant": [relay.const(const)]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32]) {
@@ -700,7 +700,7 @@ def @main(%x: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -727,7 +727,7 @@ def test_shape_of():
     # result defaults to the result device for @main which is the CPU, thus forcing a copy.
     # TODO(mbs): Perhaps the defaulting heuristics are being too clever?
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(?, ?), float32]) {
@@ -741,7 +741,7 @@ def @main(%x: Tensor[(?, ?), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(?, ?), float32],
@@ -764,7 +764,7 @@ def test_alloc_storage():
     metatable = {"VirtualDevice": [HOST, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%size: int64, %alignment: int64) {
@@ -777,7 +777,7 @@ def @main(%size: int64, %alignment: int64) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%size {virtual_device=meta[VirtualDevice][0]}: int64, %alignment {virtual_device=meta[VirtualDevice][0]}: int64,
@@ -802,7 +802,7 @@ def test_alloc_tensor():
     }
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%sto: Storage[]) {
@@ -816,7 +816,7 @@ def @main(%sto: Storage[]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%sto {virtual_device=meta[VirtualDevice][1]}: Storage[], virtual_device=meta[VirtualDevice][1]) {
@@ -842,7 +842,7 @@ def test_reshape_tensor():
     }
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(2, 8), float32]) {
@@ -855,7 +855,7 @@ def @main(%x: Tensor[(2, 8), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(2, 8), float32],
@@ -880,7 +880,7 @@ def test_dynamic_input():
 
     # There's nothing special about inferring devices for partially unknown types.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x0: Tensor[(?, ?), float32], %x1: Tensor[(?, ?), float32]) {
@@ -893,7 +893,7 @@ def @main(%x0: Tensor[(?, ?), float32], %x1: Tensor[(?, ?), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x0 {virtual_device=meta[VirtualDevice][0]}: Tensor[(?, ?), float32], %x1 {virtual_device=meta[VirtualDevice][0]}: Tensor[(?, ?), float32],
@@ -916,7 +916,7 @@ def test_redundant_annotation():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) {
@@ -933,7 +933,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %z {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32],
@@ -963,7 +963,7 @@ def test_annotate_expr():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) {
@@ -979,7 +979,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %z {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1005,7 +1005,7 @@ def test_annotate_all():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) {
@@ -1021,7 +1021,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %z {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1057,7 +1057,7 @@ def test_conv_network():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%data1: Tensor[(1, 64, 56, 56), float32], %data2: Tensor[(1, 64, 56, 56), float32],
@@ -1078,7 +1078,7 @@ def @main(%data1: Tensor[(1, 64, 56, 56), float32], %data2: Tensor[(1, 64, 56, 5
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%data1 {virtual_device=meta[VirtualDevice][0]}: Tensor[(1, 64, 56, 56), float32], %data2 {virtual_device=meta[VirtualDevice][0]}: Tensor[(1, 64, 56, 56), float32],
@@ -1111,7 +1111,7 @@ def test_tuple_get_item():
     # Note that the device copy should be placed after projection rather than before. This is handled by
     # a heuristic in the pass.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(3, 3, 4), float32]) {
@@ -1130,7 +1130,7 @@ def @main(%x: Tensor[(3, 3, 4), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(3, 3, 4), float32],
@@ -1175,7 +1175,7 @@ def test_propogation():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32]) {
@@ -1198,7 +1198,7 @@ def @main(%x: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1247,7 +1247,7 @@ def test_fusible_network():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
@@ -1268,7 +1268,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32],
@@ -1315,7 +1315,7 @@ def test_unpropagatable_graph():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -1334,7 +1334,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1363,7 +1363,7 @@ def test_conditional():
 
     # The conditional is over a function type, thus exercising the first-order/higher-order domain handling.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: bool, %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) {
@@ -1388,7 +1388,7 @@ def @main(%x: bool, %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: bool, %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %z {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1429,7 +1429,7 @@ def test_global():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @f(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
@@ -1447,7 +1447,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) -> Tensor[(5
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @f(%a {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1479,7 +1479,7 @@ def test_ref():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
@@ -1496,7 +1496,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1526,7 +1526,7 @@ def test_adt():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             type List[A] {
@@ -1549,7 +1549,7 @@ def @main(%x : Tensor[(5, 7), float32], %y : Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             type List[A] {
@@ -1592,7 +1592,7 @@ def test_free_on_device():
 
     # Everything defaults to GPU
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @on_scope_b(%x {virtual_device=meta[VirtualDevice][2]}: Tensor[(5, 7), float32],
@@ -1618,7 +1618,7 @@ def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @on_scope_b(%x {virtual_device=meta[VirtualDevice][2]}: Tensor[(5, 7), float32],
@@ -1702,7 +1702,7 @@ def input():
         # - %y on CPU "scopeB", so will flow in to second param of gem.
         # - %z on CPU "scopeA", so will clash with third param of gem and will need device_copy.
         # - result on CPU "scopeB", but result of gem on "scopeA" so will need device_copy
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(128, 128), float32],
@@ -1724,7 +1724,7 @@ def expected():
         # - %y still on CPU "scopeB", no device_copy needed.
         # - %z still on CPU "scopeA", needs device_copy to "scopeB".
         # - result still on CPU "scopeB", needs device_copy  from "scopeA".
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(128, 128), float32],
@@ -1767,7 +1767,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         mod += "\n\t" + "add(%" + str(end - 1) + ", %" + str(end - 2) + ")"
         mod += "\n\t}"
 
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             mod,
             "from_string",
             None,
@@ -1792,7 +1792,7 @@ def test_primitive():
         ]
     }
 
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data1: Tensor[(1, 32, 40, 40), float32],
diff --git a/tests/python/relay/test_pass_unmatched_cases.py b/tests/python/relay/test_pass_unmatched_cases.py
index 885f26025167..528dc4b6826e 100644
--- a/tests/python/relay/test_pass_unmatched_cases.py
+++ b/tests/python/relay/test_pass_unmatched_cases.py
@@ -416,7 +416,7 @@ def @shallow_opt[A](%a: Arith[A]) -> Arith[A] {
     }
 }
 """
-    tvm.parser.fromtext(code)
+    tvm.relay.fromtext(code)
     # fromtext parse the module, then checked it (which include strictness checking).
 
 
@@ -444,7 +444,7 @@ def @expand_on_nil_match(%a: List[(List[()],)]) -> int {
     # Cons((Nil), Nil) passes the first pattern
     # Cons((Cons(*, *)), Nil) fails the first pattern, passes _
     # Note Nil() is passed to ExpandWildcardsConstructor many times in the above!
-    tvm.parser.fromtext(code)
+    tvm.relay.fromtext(code)
 
 
 def test_expanding_empty_tuple():
@@ -463,7 +463,7 @@ def @expand_on_empty_tuple_match(%a: (List[()], ())) -> int {
     }
 }
 """
-    tvm.parser.fromtext(code)
+    tvm.relay.fromtext(code)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/relay/test_target_hooks.py b/tests/python/relay/test_target_hooks.py
index 3a76fd2fbbf3..1df2b2a919a3 100644
--- a/tests/python/relay/test_target_hooks.py
+++ b/tests/python/relay/test_target_hooks.py
@@ -73,7 +73,7 @@ def test_tir_external_generation_outline_with_target_instance(check_result):
     extern_codegen_target = tvm.target.Target(
         "example_target_hook -example_attribute=42", host=host_target
     )
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(8), float32], %y: Tensor[(8), float32]) -> Tensor[(8), float32] {
diff --git a/tests/python/relay/test_type_infer.py b/tests/python/relay/test_type_infer.py
index 13d164c2caf6..187455570216 100644
--- a/tests/python/relay/test_type_infer.py
+++ b/tests/python/relay/test_type_infer.py
@@ -399,7 +399,7 @@ def @main(%f: float32) -> float32 {
   @id(%f)
 }
 """
-    mod = tvm.parser.fromtext(code)
+    mod = tvm.relay.fromtext(code)
     mod = transform.InferType()(mod)
     tvm.ir.assert_structural_equal(mod["main"].body.type_args, [relay.TensorType((), "float32")])
 
diff --git a/tests/python/relay/test_vm.py b/tests/python/relay/test_vm.py
index 45e305c9a195..6443d50f9e98 100644
--- a/tests/python/relay/test_vm.py
+++ b/tests/python/relay/test_vm.py
@@ -1267,7 +1267,7 @@ def test_storage_size_and_offset_on_cpu():
     # CPU = device type 1
     # GPU = device type 2
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32],
@@ -1303,7 +1303,7 @@ def test_reshape_shape_on_cpu():
     # CPU = device type 1
     # GPU = device type 2
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(2, 8), float32],
diff --git a/tests/python/relay/transform/test_capture_postdfsindex_in_spans.py b/tests/python/relay/transform/test_capture_postdfsindex_in_spans.py
index 16a7bd447992..ab585fb4e011 100644
--- a/tests/python/relay/transform/test_capture_postdfsindex_in_spans.py
+++ b/tests/python/relay/transform/test_capture_postdfsindex_in_spans.py
@@ -42,7 +42,7 @@ def make_consts(dtype, shapes):
 
 
 def input_mod():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
diff --git a/tests/python/relay/transform/test_compiler_function_utils.py b/tests/python/relay/transform/test_compiler_function_utils.py
index 1bb07e268439..2e5f3b5ecf0e 100644
--- a/tests/python/relay/transform/test_compiler_function_utils.py
+++ b/tests/python/relay/transform/test_compiler_function_utils.py
@@ -43,7 +43,7 @@ def make_consts(dtype, shapes):
 
 
 def original_mod():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
@@ -76,7 +76,7 @@ def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float1
 
 
 def original_mod_let_bound():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
@@ -109,7 +109,7 @@ def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float1
 
 
 def expected_outlined_mod():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
@@ -143,7 +143,7 @@ def @tvmgen_default_cutlass_main_0(%y_0_i0: Tensor[(1600, 768), float16], %y_0_i
 
 
 def expected_extern_mod():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
@@ -177,7 +177,7 @@ def @tvmgen_default_cutlass_main_0(%y_0_i0: Tensor[(1600, 768), float16], %y_0_i
 
 
 def expected_inlined_mod():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
diff --git a/tests/python/relay/utils/assert_diagnostic.py b/tests/python/relay/utils/assert_diagnostic.py
index ba73d8755e0c..5fcd1c20a018 100644
--- a/tests/python/relay/utils/assert_diagnostic.py
+++ b/tests/python/relay/utils/assert_diagnostic.py
@@ -15,14 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 import tvm
-
-from tvm import register_func, get_global_func, IRModule
-from tvm import relay
-from tvm.parser import SpanCheck
+from tvm import IRModule, get_global_func, register_func, relay
+from tvm.error import DiagnosticError
+from tvm.ir.diagnostics import get_renderer, override_renderer
+from tvm.relay import SpanCheck
 from tvm.relay.transform import AnnotateSpans
 from tvm.runtime import Object
-from tvm.ir.diagnostics import get_renderer, override_renderer
-from tvm.error import DiagnosticError
 
 DEFAULT_RENDERER = get_renderer()
 
diff --git a/tests/python/unittest/test_aot_legalize_packed_call.py b/tests/python/unittest/test_aot_legalize_packed_call.py
index 3f6e3f776cff..ad970d52c082 100644
--- a/tests/python/unittest/test_aot_legalize_packed_call.py
+++ b/tests/python/unittest/test_aot_legalize_packed_call.py
@@ -26,10 +26,10 @@
 class Module:
     @T.prim_func
     def tvm_test_cpacked(
-        A: T.Buffer[(1,), "float32"],
-        B: T.Buffer[(1,), "float32"],
-        C: T.Buffer[(1,), "float32"],
-        device_context: T.Buffer[(1,), "float32"],
+        A: T.Buffer((1,), "float32"),
+        B: T.Buffer((1,), "float32"),
+        C: T.Buffer((1,), "float32"),
+        device_context: T.Buffer((1,), "float32"),
     ) -> T.handle:
         T.evaluate(C.data)
 
@@ -56,10 +56,10 @@ def tir_packed_call() -> None:
 class Expected:
     @T.prim_func
     def tvm_test_cpacked(
-        A: T.Buffer[(1,), "float32"],
-        B: T.Buffer[(1,), "float32"],
-        C: T.Buffer[(1,), "float32"],
-        device_context: T.Buffer[(1,), "float32"],
+        A: T.Buffer((1,), "float32"),
+        B: T.Buffer((1,), "float32"),
+        C: T.Buffer((1,), "float32"),
+        device_context: T.Buffer((1,), "float32"),
     ) -> T.handle:
         T.evaluate(C.data)
 
diff --git a/tests/python/unittest/test_auto_scheduler_feature.py b/tests/python/unittest/test_auto_scheduler_feature.py
index 8be6e0a8f2ed..ddd86347c2ec 100644
--- a/tests/python/unittest/test_auto_scheduler_feature.py
+++ b/tests/python/unittest/test_auto_scheduler_feature.py
@@ -203,9 +203,9 @@ def test_gpu_feature():
 
 @T.prim_func
 def tir_matmul(
-    A: T.Buffer[(256, 256), "float32"],
-    B: T.Buffer[(256, 256), "float32"],
-    C: T.Buffer[(256, 256), "float32"],
+    A: T.Buffer((256, 256), "float32"),
+    B: T.Buffer((256, 256), "float32"),
+    C: T.Buffer((256, 256), "float32"),
 ) -> None:
     # function attr dict
     T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
@@ -263,7 +263,7 @@ def test_dense_lowered():
 
 
 @T.prim_func
-def negative_extent(A: T.Buffer[(1,), "float32"]):
+def negative_extent(A: T.Buffer((1,), "float32")):
     for j in range(0, -1):
         A[j] = A[j] + 1.0
 
@@ -275,8 +275,8 @@ def test_negative_extent():
 
 @T.prim_func
 def zero_dim(
-    p2: T.Buffer[(), "float32"],
-    T_cast: T.Buffer[(T.int64(1), T.int64(768)), "int8"],
+    p2: T.Buffer((), "float32"),
+    T_cast: T.Buffer((T.int64(1), T.int64(768)), "int8"),
 ):
     # function attr dict
     T.func_attr(
diff --git a/tests/python/unittest/test_crt.py b/tests/python/unittest/test_crt.py
index 198a5816321e..e51745d08be1 100644
--- a/tests/python/unittest/test_crt.py
+++ b/tests/python/unittest/test_crt.py
@@ -118,7 +118,7 @@ def test_graph_executor():
     """Test use of the graph executor with microTVM."""
 
     temp_dir = tvm.contrib.utils.tempdir()
-    relay_mod = tvm.parser.fromtext(
+    relay_mod = tvm.relay.fromtext(
         """
       #[version = "0.0.5"]
       def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
@@ -166,7 +166,7 @@ def test_aot_executor():
     """Test use of the AOT executor with microTVM."""
 
     temp_dir = tvm.contrib.utils.tempdir()
-    relay_mod = tvm.parser.fromtext(
+    relay_mod = tvm.relay.fromtext(
         """
       #[version = "0.0.5"]
       def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
@@ -217,7 +217,7 @@ def test_aot_executor_usmp_const_pool():
     """Test the AOT executor with microTVM using USMP to generate a constant data pool."""
 
     temp_dir = tvm.contrib.utils.tempdir()
-    relay_mod = tvm.parser.fromtext(
+    relay_mod = tvm.relay.fromtext(
         """
       #[version = "0.0.5"]
       def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8], %c : Tensor[(1,2), uint8]) {
diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py
index 70caa99c9bca..594567eff3ae 100644
--- a/tests/python/unittest/test_link_params.py
+++ b/tests/python/unittest/test_link_params.py
@@ -176,7 +176,7 @@ def _add_decl(name, dtype):
         "}",
     ]
 
-    mod = tvm.parser.fromtext("\n".join(mod_lines))
+    mod = tvm.relay.fromtext("\n".join(mod_lines))
     return mod, param_init
 
 
diff --git a/tests/python/unittest/test_lower_build.py b/tests/python/unittest/test_lower_build.py
index 4c188d2f834b..e94a4f09ec56 100644
--- a/tests/python/unittest/test_lower_build.py
+++ b/tests/python/unittest/test_lower_build.py
@@ -54,9 +54,9 @@ def matmul(a: T.handle, b: T.handle, c: T.handle) -> None:
 class LoweredModule:
     @T.prim_func
     def main(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "from_legacy_te_schedule": True, "tir.noalias": True})
@@ -76,9 +76,9 @@ def main(
 class LoweredTIRModule:
     @T.prim_func
     def main(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py b/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py
index 701e1826b38a..88947962d69d 100644
--- a/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py
+++ b/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py
@@ -31,9 +31,9 @@
 
 @T.prim_func
 def matmul(
-    A: T.Buffer[(512, 512), "float32"],
-    B: T.Buffer[(512, 512), "float32"],
-    C: T.Buffer[(512, 512), "float32"],
+    A: T.Buffer((512, 512), "float32"),
+    B: T.Buffer((512, 512), "float32"),
+    C: T.Buffer((512, 512), "float32"),
 ) -> None:
     # function attr dict
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -56,7 +56,7 @@ def matmul(
 @tvm.script.ir_module
 class LayoutTransform:
     @T.prim_func
-    def main(placeholder: T.Buffer[(1, 16, 7, 7, 32), "float32"], placeholder_1: T.Buffer[(25088,), "float32"], T_layout_trans: T.Buffer[(1, 1, 7, 7, 512), "float32"]) -> None:
+    def main(placeholder: T.Buffer((1, 16, 7, 7, 32), "float32"), placeholder_1: T.Buffer((25088,), "float32"), T_layout_trans: T.Buffer((1, 1, 7, 7, 512), "float32")) -> None:
         # function attr dict
         T.func_attr({"tir.noalias": True, "global_symbol": "main"})
         # body
@@ -1595,7 +1595,7 @@ def test_cpu_layout_transform():
 
 
 @T.prim_func
-def negative_extent(A: T.Buffer[(1,), "float32"]):
+def negative_extent(A: T.Buffer((1,), "float32")):
     for j in range(0, -1):
         A[j] = A[j] + 1.0
 
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_cooperative_fetch.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_cooperative_fetch.py
index c82bc697c993..9bb550e79e4a 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_cooperative_fetch.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_cooperative_fetch.py
@@ -107,9 +107,9 @@ def main(var_A: T.handle, var_B: T.handle, var_C: T.handle) -> None:
 class WarpExecutionAfterRewrite:
     @T.prim_func
     def main(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        C: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        C: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_layout.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_layout.py
index 80ca954cca5c..c03ba83c0229 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_layout.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_layout.py
@@ -74,9 +74,9 @@ class TestTIRMatmul(BaseBeforeAfter):
     """
 
     def before(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 16), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 16), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [1]})
         for i0, j, k0, i1, k1 in T.grid(4, 16, 4, 4, 4):
@@ -89,9 +89,9 @@ def before(
                 C[vi, vj] = C[vi, vj] + A[vi, vk] * B[vk, vj]
 
     def expected(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 16), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 16), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [1]})
         B_reindex = T.alloc_buffer([16, 4, 4], dtype="float32")
@@ -114,7 +114,7 @@ class TestRewrittenBuffersMustOccurWithinBlock(BaseBeforeAfter):
     """Buffers must occur within a Block"""
 
     def before(
-        A: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [0]})
         for i, j in T.grid(16, 16):
@@ -131,7 +131,7 @@ class TestExtentOne(BaseBeforeAfter):
     """
 
     def before(
-        A: T.Buffer[(16, 1), "float32"],
+        A: T.Buffer((16, 1), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [0]})
         for i, j in T.grid(16, 1):
@@ -139,7 +139,7 @@ def before(
                 vi, vj = T.axis.remap("SS", [i, j])
                 T.evaluate(A[vi, vj])
 
-    def expected(A: T.Buffer[(16, 1), "float32"]):
+    def expected(A: T.Buffer((16, 1), "float32")):
         T.func_attr({"layout_free_buffers": [0]})
 
         A_global = T.alloc_buffer([16], dtype="float32")
@@ -157,9 +157,9 @@ def expected(A: T.Buffer[(16, 1), "float32"]):
 
 @T.prim_func
 def tir_matmul(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ) -> None:
     T.func_attr({"layout_free_buffers": [1]})
     for i0, j, k0, i1, k1 in T.grid(4, 16, 4, 4, 4):
@@ -174,9 +174,9 @@ def tir_matmul(
 
 @T.prim_func
 def rewritten_tir_matmul(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ) -> None:
     T.func_attr({"layout_free_buffers": [1]})
     B_reindex = T.alloc_buffer([16, 4, 4], dtype="float32")
@@ -208,7 +208,7 @@ def test_layout_rewrite():
 @tvm.script.ir_module
 class Conv2dCacheRead:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(3, 3, 64, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 56, 56, 64), "float32"]):
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((3, 3, 64, 64), "float32"), conv2d_nhwc: T.Buffer((1, 56, 56, 64), "float32")):
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
         pad_temp = T.alloc_buffer([1, 58, 58, 64], dtype="float32")
         conv2d_nhwc_global = T.alloc_buffer([1, 56, 56, 64], dtype="float32")
@@ -285,7 +285,7 @@ def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(3, 3, 64, 64),
 @tvm.script.ir_module
 class Conv2dCacheReadRewritten:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(3, 3, 64, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 56, 56, 64), "float32"]):
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((3, 3, 64, 64), "float32"), conv2d_nhwc: T.Buffer((1, 56, 56, 64), "float32")):
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
         pad_temp = T.alloc_buffer([1, 58, 58, 64], dtype="float32")
         conv2d_nhwc_global = T.alloc_buffer([1, 56, 56, 64], dtype="float32")
@@ -370,7 +370,7 @@ def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(3, 3, 64, 64),
 @tvm.script.ir_module
 class Conv2dCacheReadMultipleRewritten:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(3, 3, 64, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 56, 56, 64), "float32"]):
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((3, 3, 64, 64), "float32"), conv2d_nhwc: T.Buffer((1, 56, 56, 64), "float32")):
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
         pad_temp = T.alloc_buffer([1, 58, 58, 64], dtype="float32")
         conv2d_nhwc_global = T.alloc_buffer([1, 56, 56, 64], dtype="float32")
@@ -482,9 +482,9 @@ def test_layout_rewrite_cache_read_multiple():
 
 class TestLayoutRewriteInt64Index(BaseBeforeAfter):
     def before(
-        p0: T.Buffer[(T.int64(12), T.int64(197), T.int64(64)), "int8"],
-        p1: T.Buffer[(T.int64(12), T.int64(197), T.int64(64)), "int8"],
-        T_batch_matmul_NT: T.Buffer[(T.int64(12), T.int64(197), T.int64(197)), "int32"],
+        p0: T.Buffer((T.int64(12), T.int64(197), T.int64(64)), "int8"),
+        p1: T.Buffer((T.int64(12), T.int64(197), T.int64(64)), "int8"),
+        T_batch_matmul_NT: T.Buffer((T.int64(12), T.int64(197), T.int64(197)), "int32"),
     ):
         T.func_attr({"layout_free_buffers": [1], "global_symbol": "main", "tir.noalias": True})
         for b_0_i_0_fused in T.parallel(T.int64(394)):
@@ -542,9 +542,9 @@ def before(
                             )
 
     def expected(
-        p0: T.Buffer[(T.int64(12), T.int64(197), T.int64(64)), "int8"],
-        p1: T.Buffer[(T.int64(12), T.int64(197), T.int64(64)), "int8"],
-        T_batch_matmul_NT: T.Buffer[(T.int64(12), T.int64(197), T.int64(197)), "int32"],
+        p0: T.Buffer((T.int64(12), T.int64(197), T.int64(64)), "int8"),
+        p1: T.Buffer((T.int64(12), T.int64(197), T.int64(64)), "int8"),
+        T_batch_matmul_NT: T.Buffer((T.int64(12), T.int64(197), T.int64(197)), "int32"),
     ):
         T.func_attr({"tir.noalias": True, "global_symbol": "main", "layout_free_buffers": [1]})
         p1_global = T.alloc_buffer(
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_parallel_vectorize_unroll.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_parallel_vectorize_unroll.py
index 44b0e79f0cc2..a3b1cc5e0139 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_parallel_vectorize_unroll.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_parallel_vectorize_unroll.py
@@ -73,7 +73,7 @@ def Move_PUV0(a: T.handle, b: T.handle) -> None:
 @tvm.script.ir_module
 class Fused_NN_Dense:
     @T.prim_func
-    def main(placeholder: T.Buffer[(64, 768), "float32"], placeholder_1: T.Buffer[(768, 768), "float32"], T_matmul_NT: T.Buffer[(64, 768), "float32"]) -> None:
+    def main(placeholder: T.Buffer((64, 768), "float32"), placeholder_1: T.Buffer((768, 768), "float32"), T_matmul_NT: T.Buffer((64, 768), "float32")) -> None:
         for i0, i1, i2 in T.grid(64, 768, 768):
             with T.block("T_matmul_NT"):
                 i, j, k = T.axis.remap("SSR", [i0, i1, i2])
@@ -85,9 +85,9 @@ def main(placeholder: T.Buffer[(64, 768), "float32"], placeholder_1: T.Buffer[(7
 
 @T.prim_func
 def before_matmul_vectorize(
-    placeholder: T.Buffer[(64, 768), "float32"],
-    placeholder_1: T.Buffer[(768, 768), "float32"],
-    T_matmul_NT: T.Buffer[(64, 768), "float32"],
+    placeholder: T.Buffer((64, 768), "float32"),
+    placeholder_1: T.Buffer((768, 768), "float32"),
+    T_matmul_NT: T.Buffer((64, 768), "float32"),
 ) -> None:
     with T.block("root"):
         T.reads()
@@ -115,9 +115,9 @@ def before_matmul_vectorize(
 
 @T.prim_func
 def after_matmul_vectorize(
-    placeholder: T.Buffer[(64, 768), "float32"],
-    placeholder_1: T.Buffer[(768, 768), "float32"],
-    T_matmul_NT: T.Buffer[(64, 768), "float32"],
+    placeholder: T.Buffer((64, 768), "float32"),
+    placeholder_1: T.Buffer((768, 768), "float32"),
+    T_matmul_NT: T.Buffer((64, 768), "float32"),
 ) -> None:
     T_matmul_NT_global = T.alloc_buffer([64, 768], dtype="float32")
     for i0_0, i1_0, i0_1, i1_1 in T.grid(1, 16, 1, 3):
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_reduction_block.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_reduction_block.py
index 7e499424058d..347b773b7ed0 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_reduction_block.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_reduction_block.py
@@ -158,7 +158,7 @@ def main(var_A: T.handle, var_B: T.handle, var_C: T.handle) -> None:
 @tvm.script.ir_module
 class Softmax_cross_thread_reduction:
     @T.prim_func
-    def main(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def main(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         T_softmax_maxelem_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
         T_softmax_expsum_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
         for i0 in T.serial(256):
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_tensorize.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_tensorize.py
index 21755e1338eb..a8ce704bd0ce 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_tensorize.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_tensorize.py
@@ -25,9 +25,9 @@
 class Conv2dNCHWcVNNIModuleTiled:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for (
@@ -145,9 +145,9 @@ def main(
 class Conv2dNCHWcVNNIModuleTensorized:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -248,9 +248,9 @@ def main(
 class DenseDP4ATiled:
     @T.prim_func
     def main(
-        X: T.Buffer[(128, 128), "int8"],
-        W: T.Buffer[(128, 128), "int8"],
-        compute: T.Buffer[(128, 128), "int32"],
+        X: T.Buffer((128, 128), "int8"),
+        W: T.Buffer((128, 128), "int8"),
+        compute: T.Buffer((128, 128), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         compute_local = T.alloc_buffer([128, 128], dtype="int32", scope="local")
@@ -334,9 +334,9 @@ def main(
 class DenseDP4ATensorized:
     @T.prim_func
     def main(
-        X: T.Buffer[(128, 128), "int8"],
-        W: T.Buffer[(128, 128), "int8"],
-        compute: T.Buffer[(128, 128), "int32"],
+        X: T.Buffer((128, 128), "int8"),
+        W: T.Buffer((128, 128), "int8"),
+        compute: T.Buffer((128, 128), "int32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_unbound_block.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_unbound_block.py
index b01447ad4a9e..963f660ffb67 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_unbound_block.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_unbound_block.py
@@ -73,7 +73,7 @@ def main(var_A: T.handle, var_B: T.handle) -> None:
 @tvm.script.ir_module
 class Before_norm_bmn:
     @T.prim_func
-    def main(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[(1,), "float32"]) -> None:
+    def main(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer((1,), "float32")) -> None:
         C = T.alloc_buffer([1], dtype="float32")
         for i0, i1, i2 in T.grid(1, 256, 256):
             with T.block("C"):
@@ -90,7 +90,7 @@ def main(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[(1,), "float32"]) ->
 @tvm.script.ir_module
 class After_norm_bmn:
     @T.prim_func
-    def main(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[(1,), "float32"]) -> None:
+    def main(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer((1,), "float32")) -> None:
         C = T.alloc_buffer([1], dtype="float32")
         for i0_fused_0 in T.thread_binding(1, thread="blockIdx.x"):
             for i0_fused_1 in T.thread_binding(1, thread="threadIdx.x"):
@@ -112,7 +112,7 @@ def main(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[(1,), "float32"]) ->
 class Bert_fused_reshape_transpose_reshape:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(12, 64, 64), "float32"], T_reshape: T.Buffer[(64, 768), "float32"]
+        placeholder: T.Buffer((12, 64, 64), "float32"), T_reshape: T.Buffer((64, 768), "float32")
     ) -> None:
         for i0_i1_fused_0, i0_i1_fused_1 in T.grid(1536, 32):
             with T.block("T_reshape_1"):
@@ -131,7 +131,7 @@ def main(
 class Bert_fused_reshape_transpose_reshape_large:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(12, 64, 64), "float32"], T_reshape: T.Buffer[(64, 768), "float32"]
+        placeholder: T.Buffer((12, 64, 64), "float32"), T_reshape: T.Buffer((64, 768), "float32")
     ) -> None:
         for i0_i1_fused_0, i0_i1_fused_1 in T.grid(1536000, 32):
             with T.block("T_reshape_1"):
@@ -150,7 +150,7 @@ def main(
 class Bert_fused_reshape_transpose_reshape_after_rub:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(12, 64, 64), "float32"], T_reshape: T.Buffer[(64, 768), "float32"]
+        placeholder: T.Buffer((12, 64, 64), "float32"), T_reshape: T.Buffer((64, 768), "float32")
     ) -> None:
         for i0_i1_fused_0_i0_i1_fused_1_fused_0 in T.thread_binding(48, thread="blockIdx.x"):
             for i0_i1_fused_0_i0_i1_fused_1_fused_1 in T.thread_binding(1024, thread="threadIdx.x"):
@@ -202,7 +202,7 @@ def main(
 class Bert_fused_reshape_transpose_reshape_after_rub_large:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(12, 64, 64), "float32"], T_reshape: T.Buffer[(64, 768), "float32"]
+        placeholder: T.Buffer((12, 64, 64), "float32"), T_reshape: T.Buffer((64, 768), "float32")
     ) -> None:
         # body
         # with T.block("root")
@@ -269,7 +269,7 @@ def main(
 
 @T.prim_func
 def before_unrolled_loop(
-    placeholder: T.Buffer[(1, 56, 56, 64), "float32"],
+    placeholder: T.Buffer((1, 56, 56, 64), "float32"),
 ) -> None:
     # function attr dict
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -294,7 +294,7 @@ def before_unrolled_loop(
 
 @T.prim_func
 def after_unrolled_loop(
-    placeholder: T.Buffer[(1, 56, 56, 64), "float32"],
+    placeholder: T.Buffer((1, 56, 56, 64), "float32"),
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     # body
diff --git a/tests/python/unittest/test_meta_schedule_postproc_verify_gpu_code.py b/tests/python/unittest/test_meta_schedule_postproc_verify_gpu_code.py
index 86a88af40309..59de0b0c570a 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_verify_gpu_code.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_verify_gpu_code.py
@@ -220,7 +220,7 @@ def main(a: T.handle, b: T.handle) -> None:
             B[blockIdx_z * 131072 + blockIdx_y * 16384 + threadIdx_y * 2048 + ff_inner_inner_inner * 256 + blockIdx_x * 64 + threadIdx_x * 8 + nn_inner_inner_inner] = B_local[ff_inner_inner_inner * 8 + nn_inner_inner_inner]# fmt: on
 
 @T.prim_func
-def GmmCuda0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+def GmmCuda0(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
     Z_local = T.alloc_buffer([1, 128, 128], dtype="float32", scope="local")
     X_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
     Y_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
@@ -274,7 +274,7 @@ def GmmCuda0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "
                         Z[v0, v1, v2] = Z_local[v0, v1, v2]
 
 @T.prim_func
-def GmmCuda1(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+def GmmCuda1(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
     Z_local = T.alloc_buffer([1, 128, 128], dtype="float32", scope="local")
     X_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
     Y_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
@@ -333,7 +333,7 @@ def GmmCuda1(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "
 
 
 @T.prim_func
-def GmmCuda2(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+def GmmCuda2(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
     Z_local = T.alloc_buffer([1, 128, 128], dtype="float32", scope="local")
     X_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
     Y_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
@@ -393,9 +393,9 @@ def GmmCuda2(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "
 
 @T.prim_func
 def GMMCUDATensorCore(
-    X: T.Buffer[(1024, 1024), "float16"],
-    Y: T.Buffer[(1024, 1024), "float16"],
-    Z: T.Buffer[(1024, 1024), "float32"],
+    X: T.Buffer((1024, 1024), "float16"),
+    Y: T.Buffer((1024, 1024), "float16"),
+    Z: T.Buffer((1024, 1024), "float32"),
 ) -> None:
     # function attr dict
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_meta_schedule_postproc_verify_vtcm_limit.py b/tests/python/unittest/test_meta_schedule_postproc_verify_vtcm_limit.py
index 55ea0a6ed80f..cb4767221915 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_verify_vtcm_limit.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_verify_vtcm_limit.py
@@ -42,7 +42,7 @@ def _create_context(mod, target) -> ms.TuneContext:
 @tvm.script.ir_module
 class Conv2dNCHWcVTCM:
     @T.prim_func
-    def main(p0: T.Buffer[(T.int64(1), T.int64(2), T.int64(56), T.int64(56), T.int64(32)), "uint8"], p1: T.Buffer[(T.int64(2), T.int64(2), T.int64(3), T.int64(3), T.int64(8), T.int64(32), T.int64(4)), "uint8"], conv2d_NCHWc_int8: T.Buffer[(T.int64(1), T.int64(2), T.int64(54), T.int64(54), T.int64(32)), "int32"]):
+    def main(p0: T.Buffer((T.int64(1), T.int64(2), T.int64(56), T.int64(56), T.int64(32)), "uint8"), p1: T.Buffer((T.int64(2), T.int64(2), T.int64(3), T.int64(3), T.int64(8), T.int64(32), T.int64(4)), "uint8"), conv2d_NCHWc_int8: T.Buffer((T.int64(1), T.int64(2), T.int64(54), T.int64(54), T.int64(32)), "int32")):
         T.func_attr({"tir.noalias": True, "global_symbol": "main"})
         p0_global_vtcm = T.alloc_buffer([T.int64(1), T.int64(2), T.int64(56), T.int64(56), T.int64(32)], dtype="uint8", scope="global.vtcm")
         p1_global_vtcm = T.alloc_buffer([T.int64(2), T.int64(2), T.int64(3), T.int64(3), T.int64(8), T.int64(32), T.int64(4)], dtype="uint8", scope="global.vtcm")
diff --git a/tests/python/unittest/test_meta_schedule_relay_integration.py b/tests/python/unittest/test_meta_schedule_relay_integration.py
index 8cd58e5a6f36..90be1ec0a1e9 100644
--- a/tests/python/unittest/test_meta_schedule_relay_integration.py
+++ b/tests/python/unittest/test_meta_schedule_relay_integration.py
@@ -408,8 +408,8 @@ def test_meta_schedule_te2primfunc_argument_order_and_lowering():
     class _fused_layout_transform:
         @T.prim_func
         def main( # type: ignore
-            placeholder: T.Buffer[(T.int64(1), T.int64(3), T.int64(16), T.int64(16)), "float32"], # type: ignore
-            T_layout_trans: T.Buffer[(T.int64(1), T.int64(1), T.int64(16), T.int64(16), T.int64(3)), "float32"], # type: ignore
+            placeholder: T.Buffer((T.int64(1), T.int64(3), T.int64(16), T.int64(16)), "float32"), # type: ignore
+            T_layout_trans: T.Buffer((T.int64(1), T.int64(1), T.int64(16), T.int64(16), T.int64(3)), "float32"), # type: ignore
         ) -> None: # type: ignore
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -430,7 +430,7 @@ def main( # type: ignore
     @tvm.script.ir_module
     class _fused_layout_transform_1:
         @T.prim_func
-        def main(placeholder: T.Buffer[(T.int64(1), T.int64(2), T.int64(16), T.int64(16), T.int64(4)), "float32"], T_layout_trans: T.Buffer[(T.int64(1), T.int64(8), T.int64(16), T.int64(16)), "float32"]) -> None: # type: ignore
+        def main(placeholder: T.Buffer((T.int64(1), T.int64(2), T.int64(16), T.int64(16), T.int64(4)), "float32"), T_layout_trans: T.Buffer((T.int64(1), T.int64(8), T.int64(16), T.int64(16)), "float32")) -> None: # type: ignore
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
             # body
@@ -445,7 +445,7 @@ def main(placeholder: T.Buffer[(T.int64(1), T.int64(2), T.int64(16), T.int64(16)
     @tvm.script.ir_module
     class _fused_nn_contrib_conv2d_NCHWc:
         @T.prim_func
-        def main(placeholder: T.Buffer[(T.int64(1), T.int64(1), T.int64(16), T.int64(16), T.int64(3)), "float32"], placeholder_1: T.Buffer[(T.int64(2), T.int64(1), T.int64(5), T.int64(5), T.int64(3), T.int64(4)), "float32"], conv2d_NCHWc: T.Buffer[(T.int64(1), T.int64(2), T.int64(16), T.int64(16), T.int64(4)), "float32"]) -> None: # type: ignore
+        def main(placeholder: T.Buffer((T.int64(1), T.int64(1), T.int64(16), T.int64(16), T.int64(3)), "float32"), placeholder_1: T.Buffer((T.int64(2), T.int64(1), T.int64(5), T.int64(5), T.int64(3), T.int64(4)), "float32"), conv2d_NCHWc: T.Buffer((T.int64(1), T.int64(2), T.int64(16), T.int64(16), T.int64(4)), "float32")) -> None: # type: ignore
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
             # body
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_add_rfactor.py b/tests/python/unittest/test_meta_schedule_schedule_rule_add_rfactor.py
index 7f56683588ba..b21a4e0f7ec8 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_add_rfactor.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_add_rfactor.py
@@ -29,9 +29,9 @@
 def test_cpu_matmul():
     @T.prim_func
     def cpu_matmul_0(
-        A: T.Buffer[(4, 512), "float32"],
-        B: T.Buffer[(512, 4), "float32"],
-        C: T.Buffer[(4, 4), "float32"],
+        A: T.Buffer((4, 512), "float32"),
+        B: T.Buffer((512, 4), "float32"),
+        C: T.Buffer((4, 4), "float32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i0, i1, i2 in T.grid(4, 4, 512):
@@ -45,9 +45,9 @@ def cpu_matmul_0(
 
     @T.prim_func
     def cpu_matmul_1(
-        A: T.Buffer[(4, 512), "float32"],
-        B: T.Buffer[(512, 4), "float32"],
-        C: T.Buffer[(4, 4), "float32"],
+        A: T.Buffer((4, 512), "float32"),
+        B: T.Buffer((512, 4), "float32"),
+        C: T.Buffer((4, 4), "float32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         C_rf = T.alloc_buffer([4, 4, 128], dtype="float32")
@@ -73,9 +73,9 @@ def cpu_matmul_1(
 
     @T.prim_func
     def cpu_matmul_2(
-        A: T.Buffer[(4, 512), "float32"],
-        B: T.Buffer[(512, 4), "float32"],
-        C: T.Buffer[(4, 4), "float32"],
+        A: T.Buffer((4, 512), "float32"),
+        B: T.Buffer((512, 4), "float32"),
+        C: T.Buffer((4, 4), "float32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         C_rf = T.alloc_buffer([4, 4, 4], dtype="float32")
@@ -124,10 +124,10 @@ def cpu_matmul_2(
 def test_cpu_argmax():
     @T.prim_func
     def argmax(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[(128,), "int32"],
-        argmax_v1: T.Buffer[(128,), "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer((128,), "int32"),
+        argmax_v1: T.Buffer((128,), "float32"),
     ) -> None:
         for i0, i1 in T.grid(128, 128):
             with T.block("argmax"):
@@ -147,10 +147,10 @@ def argmax(
 
     @T.prim_func
     def argmax_0(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[128, "int32"],
-        argmax_v1: T.Buffer[128, "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer(128, "int32"),
+        argmax_v1: T.Buffer(128, "float32"),
     ) -> None:
         for i0, i1 in T.grid(128, 128):
             with T.block("argmax"):
@@ -169,10 +169,10 @@ def argmax_0(
 
     @T.prim_func
     def argmax_1(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[128, "int32"],
-        argmax_v1: T.Buffer[128, "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer(128, "int32"),
+        argmax_v1: T.Buffer(128, "float32"),
     ) -> None:
         argmax_v0_rf = T.alloc_buffer([128, 16], dtype="int32")
         argmax_v1_rf = T.alloc_buffer([128, 16], dtype="float32")
@@ -216,10 +216,10 @@ def argmax_1(
 
     @T.prim_func
     def argmax_2(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[128, "int32"],
-        argmax_v1: T.Buffer[128, "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer(128, "int32"),
+        argmax_v1: T.Buffer(128, "float32"),
     ) -> None:
         # body
         # with T.block("root")
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_auto_bind.py b/tests/python/unittest/test_meta_schedule_schedule_rule_auto_bind.py
index f0eee4138daa..a8219ca01a68 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_auto_bind.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_auto_bind.py
@@ -36,9 +36,9 @@ def element_wise(var_A: T.handle, var_B: T.handle) -> None:
 
 @T.prim_func
 def reduction_loop_only(
-    A: T.Buffer[2, "float32"],
-    B: T.Buffer[2, "float32"],
-    C: T.Buffer[(), "float32"],
+    A: T.Buffer(2, "float32"),
+    B: T.Buffer(2, "float32"),
+    C: T.Buffer((), "float32"),
 ) -> None:
     for i0 in T.serial(2):
         with T.block("C"):
@@ -52,9 +52,9 @@ def reduction_loop_only(
 
 @T.prim_func
 def zero_dim_add(
-    A: T.Buffer[(), "float32"],
-    B: T.Buffer[(), "float32"],
-    C: T.Buffer[(), "float32"],
+    A: T.Buffer((), "float32"),
+    B: T.Buffer((), "float32"),
+    C: T.Buffer((), "float32"),
 ) -> None:
     with T.block("C"):
         vi = T.axis.spatial(1, 0)
@@ -64,8 +64,8 @@ def zero_dim_add(
 def test_cuda_element_wise():
     @T.prim_func
     def elementwise_0(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
     ) -> None:
         # body
         # with T.block("root")
@@ -99,9 +99,9 @@ def elementwise_0(
 def test_cuda_reduction_loop_only():
     @T.prim_func
     def reduction_loop_only_0(
-        A: T.Buffer[2, "float32"],
-        B: T.Buffer[2, "float32"],
-        C: T.Buffer[(), "float32"],
+        A: T.Buffer(2, "float32"),
+        B: T.Buffer(2, "float32"),
+        C: T.Buffer((), "float32"),
     ) -> None:
         for u_fused_0 in T.thread_binding(1, thread="blockIdx.x"):
             for u_fused_1 in T.thread_binding(1, thread="threadIdx.x"):
@@ -132,9 +132,9 @@ def reduction_loop_only_0(
 def test_cuda_zero_dim_add():
     @T.prim_func
     def zero_dim_add_0(
-        A: T.Buffer[(), "float32"],
-        B: T.Buffer[(), "float32"],
-        C: T.Buffer[(), "float32"],
+        A: T.Buffer((), "float32"),
+        B: T.Buffer((), "float32"),
+        C: T.Buffer((), "float32"),
     ) -> None:
         for u_fused_0 in T.thread_binding(1, thread="blockIdx.x"):
             for u_fused_1 in T.thread_binding(1, thread="threadIdx.x"):
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_auto_inline.py b/tests/python/unittest/test_meta_schedule_schedule_rule_auto_inline.py
index 1baa13793f38..0b2e7fc086be 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_auto_inline.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_auto_inline.py
@@ -164,7 +164,7 @@ def main(var_X: T.handle, var_W: T.handle, var_B: T.handle, var_bn_scale: T.hand
 @tvm.script.ir_module
 class MultiLevelTiledConv2DAfterInline:
     @T.prim_func
-    def main(X: T.Buffer[(1, 512, 56, 56), "float32"], W: T.Buffer[(512, 512, 3, 3), "float32"], B: T.Buffer[(512, 1, 1), "float32"], bn_scale: T.Buffer[(512, 1, 1), "float32"], bn_offset: T.Buffer[(512, 1, 1), "float32"], compute: T.Buffer[(1, 512, 56, 56), "float32"]) -> None:
+    def main(X: T.Buffer((1, 512, 56, 56), "float32"), W: T.Buffer((512, 512, 3, 3), "float32"), B: T.Buffer((512, 1, 1), "float32"), bn_scale: T.Buffer((512, 1, 1), "float32"), bn_offset: T.Buffer((512, 1, 1), "float32"), compute: T.Buffer((1, 512, 56, 56), "float32")) -> None:
         compute_local = T.alloc_buffer([1, 512, 56, 56], dtype="float32", scope="local")
         for i0_0_i1_0_i2_0_i3_0_fused in T.thread_binding(224, thread="blockIdx.x"):
             for i0_1_i1_1_i2_1_i3_1_fused in T.thread_binding(2, thread="vthread.x"):
@@ -192,7 +192,7 @@ def main(X: T.Buffer[(1, 512, 56, 56), "float32"], W: T.Buffer[(512, 512, 3, 3),
 @tvm.script.ir_module
 class SoftmaxBeforeInline:
     @T.prim_func
-    def main(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def main(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_exp = T.alloc_buffer([256, 256], dtype="float32")
         T_softmax_expsum = T.alloc_buffer([256], dtype="float32")
@@ -221,7 +221,7 @@ def main(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256)
 @tvm.script.ir_module
 class SoftmaxAfterInline:
     @T.prim_func
-    def main(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def main(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_expsum = T.alloc_buffer([256], dtype="float32")
         for i0, i1 in T.grid(256, 256):
@@ -246,10 +246,10 @@ def main(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256)
 class BeforePureSpatial:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 384), "int64"],
-        placeholder_1: T.Buffer[(30522, 768), "float32"],
-        placeholder_2: T.Buffer[(1, 384, 768), "float32"],
-        T_add: T.Buffer[(1, 384, 768), "float32"],
+        placeholder: T.Buffer((1, 384), "int64"),
+        placeholder_1: T.Buffer((30522, 768), "float32"),
+        placeholder_2: T.Buffer((1, 384, 768), "float32"),
+        T_add: T.Buffer((1, 384, 768), "float32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         compile_engine_const = T.alloc_buffer([], dtype="int64")
@@ -310,7 +310,7 @@ def main(
 @tvm.script.ir_module
 class AfterPureSpatial:
     @T.prim_func
-    def main(placeholder: T.Buffer[(1, 384), "int64"], placeholder_1: T.Buffer[(30522, 768), "float32"], placeholder_2: T.Buffer[(1, 384, 768), "float32"], T_add: T.Buffer[(1, 384, 768), "float32"]) -> None:
+    def main(placeholder: T.Buffer((1, 384), "int64"), placeholder_1: T.Buffer((30522, 768), "float32"), placeholder_2: T.Buffer((1, 384, 768), "float32"), T_add: T.Buffer((1, 384, 768), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -325,7 +325,7 @@ def main(placeholder: T.Buffer[(1, 384), "int64"], placeholder_1: T.Buffer[(3052
 @tvm.script.ir_module
 class ConstConsumer:
     @T.prim_func
-    def main(T_full: T.Buffer[(1, 12, 4096), "int64"]) -> None:
+    def main(T_full: T.Buffer((1, 12, 4096), "int64")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -341,7 +341,7 @@ def main(T_full: T.Buffer[(1, 12, 4096), "int64"]) -> None:
 @tvm.script.ir_module
 class Conv2dInt8:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 14, 14, 256), "int8"], p1: T.Buffer[(1024, 1, 1, 256), "int8"], p2: T.Buffer[(1, 1, 1, 1024), "int32"], p3: T.Buffer[(1, 1, 1, 1024), "int32"], p4: T.Buffer[1024, "int32"], p5: T.Buffer[1024, "int32"], p6: T.Buffer[1024, "int32"], p7: T.Buffer[1, "int32"], p8: T.Buffer[(16, 14, 14, 1024), "int32"], compute: T.Buffer[(16, 14, 14, 1024), "int32"]) -> None:
+    def main(p0: T.Buffer((16, 14, 14, 256), "int8"), p1: T.Buffer((1024, 1, 1, 256), "int8"), p2: T.Buffer((1, 1, 1, 1024), "int32"), p3: T.Buffer((1, 1, 1, 1024), "int32"), p4: T.Buffer(1024, "int32"), p5: T.Buffer(1024, "int32"), p6: T.Buffer(1024, "int32"), p7: T.Buffer(1, "int32"), p8: T.Buffer((16, 14, 14, 1024), "int32"), compute: T.Buffer((16, 14, 14, 1024), "int32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_cross_thread_reduction.py b/tests/python/unittest/test_meta_schedule_schedule_rule_cross_thread_reduction.py
index c851c9bec3b5..489b0ddef0e4 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_cross_thread_reduction.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_cross_thread_reduction.py
@@ -32,7 +32,7 @@
 class Softmax_mn_after_inline:
     @T.prim_func
     def main(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_expsum = T.alloc_buffer([256], dtype="float32")
@@ -63,8 +63,8 @@ def main(
 def test_gpu_softmax_mn():
     @T.prim_func
     def softmax_mn_0(
-        A: T.Buffer[(256, 256), "float32"],
-        T_softmax_norm: T.Buffer[(256, 256), "float32"],
+        A: T.Buffer((256, 256), "float32"),
+        T_softmax_norm: T.Buffer((256, 256), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -107,7 +107,7 @@ def softmax_mn_0(
 
     @T.prim_func
     def softmax_mn_1(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -159,7 +159,7 @@ def softmax_mn_1(
 
     @T.prim_func
     def softmax_mn_2(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -211,7 +211,7 @@ def softmax_mn_2(
 
     @T.prim_func
     def softmax_mn_3(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -299,7 +299,7 @@ def softmax_mn_3(
 def test_gpu_softmax_mn_after_inline():
     @T.prim_func
     def softmax_mn_after_inline_0(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_expsum = T.alloc_buffer([256], dtype="float32")
@@ -334,7 +334,7 @@ def softmax_mn_after_inline_0(
 
     @T.prim_func
     def softmax_mn_after_inline_1(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_expsum = T.alloc_buffer([256], dtype="float32")
@@ -371,7 +371,7 @@ def softmax_mn_after_inline_1(
 
     @T.prim_func
     def softmax_mn_after_inline_2(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_expsum_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
@@ -415,7 +415,7 @@ def softmax_mn_after_inline_2(
 
     @T.prim_func
     def softmax_mn_after_inline_3(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         T_softmax_maxelem_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
         T_softmax_expsum_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
@@ -498,7 +498,7 @@ def softmax_mn_after_inline_3(
 
 def test_gpu_batch_norm_bmn():
     @T.prim_func
-    def batch_norm_bmn_0(A: T.Buffer[(1, 512, 512), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def batch_norm_bmn_0(A: T.Buffer((1, 512, 512), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -520,7 +520,7 @@ def batch_norm_bmn_0(A: T.Buffer[(1, 512, 512), "float32"], D: T.Buffer[1, "floa
                 D[b] = T.sqrt(C[b], dtype="float32")
 
     @T.prim_func
-    def batch_norm_bmn_1(A: T.Buffer[(1, 512, 512), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def batch_norm_bmn_1(A: T.Buffer((1, 512, 512), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -568,10 +568,10 @@ def batch_norm_bmn_1(A: T.Buffer[(1, 512, 512), "float32"], D: T.Buffer[1, "floa
 
 @T.prim_func
 def argmax(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1 in T.grid(128, 128):
         with T.block("argmax"):
@@ -590,10 +590,10 @@ def argmax(
 
 @T.prim_func
 def argmax_32(
-    idx: T.Buffer[(1, 32), "int32"],
-    val: T.Buffer[(1, 32), "float32"],
-    argmax_v0: T.Buffer[(1,), "int32"],
-    argmax_v1: T.Buffer[(1,), "float32"],
+    idx: T.Buffer((1, 32), "int32"),
+    val: T.Buffer((1, 32), "float32"),
+    argmax_v0: T.Buffer((1,), "int32"),
+    argmax_v1: T.Buffer((1,), "float32"),
 ) -> None:
     for i0, i1 in T.grid(1, 32):
         with T.block("argmax"):
@@ -613,10 +613,10 @@ def argmax_32(
 def test_gpu_argmax():
     @T.prim_func
     def argmax_0(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[128, "int32"],
-        argmax_v1: T.Buffer[128, "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer(128, "int32"),
+        argmax_v1: T.Buffer(128, "float32"),
     ) -> None:
         # body
         # with T.block("root")
@@ -637,10 +637,10 @@ def argmax_0(
 
     @T.prim_func
     def argmax_1(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[128, "int32"],
-        argmax_v1: T.Buffer[128, "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer(128, "int32"),
+        argmax_v1: T.Buffer(128, "float32"),
     ) -> None:
         # body
         # with T.block("root")
@@ -686,10 +686,10 @@ def argmax_1(
 def test_gpu_argmax_32():
     @T.prim_func
     def argmax_0(
-        idx: T.Buffer[(1, 32), "int32"],
-        val: T.Buffer[(1, 32), "float32"],
-        argmax_v0: T.Buffer[(1,), "int32"],
-        argmax_v1: T.Buffer[(1,), "float32"],
+        idx: T.Buffer((1, 32), "int32"),
+        val: T.Buffer((1, 32), "float32"),
+        argmax_v0: T.Buffer((1,), "int32"),
+        argmax_v1: T.Buffer((1,), "float32"),
     ) -> None:
         # body
         # with T.block("root")
@@ -710,10 +710,10 @@ def argmax_0(
 
     @T.prim_func
     def argmax_1(
-        idx: T.Buffer[(1, 32), "int32"],
-        val: T.Buffer[(1, 32), "float32"],
-        argmax_v0: T.Buffer[(1,), "int32"],
-        argmax_v1: T.Buffer[(1,), "float32"],
+        idx: T.Buffer((1, 32), "int32"),
+        val: T.Buffer((1, 32), "float32"),
+        argmax_v0: T.Buffer((1,), "int32"),
+        argmax_v1: T.Buffer((1,), "float32"),
     ) -> None:
         # body
         # with T.block("root")
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt.py b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt.py
index f40d9427490d..66eb81912293 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt.py
@@ -31,9 +31,9 @@
 def test_cpu_matmul():
     @T.prim_func
     def cpu_matmul_0(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        C: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        C: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -62,9 +62,9 @@ def cpu_matmul_0(
 
     @T.prim_func
     def cpu_matmul_1(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        C: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        C: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -93,9 +93,9 @@ def cpu_matmul_1(
 
     @T.prim_func
     def cpu_matmul_2(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        C: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        C: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -149,9 +149,9 @@ def cpu_matmul_2(
 def test_cpu_matmul_relu():
     @T.prim_func
     def cpu_matmul_relu_0(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        compute: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        compute: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -180,9 +180,9 @@ def cpu_matmul_relu_0(
 
     @T.prim_func
     def cpu_matmul_relu_1(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        compute: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        compute: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -211,9 +211,9 @@ def cpu_matmul_relu_1(
 
     @T.prim_func
     def cpu_matmul_relu_2(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        compute: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        compute: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -273,9 +273,9 @@ def cpu_matmul_relu_2(
 def test_cuda_matmul():
     @T.prim_func
     def cuda_matmul_0(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        C: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        C: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -379,9 +379,9 @@ def cuda_matmul_0(
 def test_cuda_matmul_relu():
     @T.prim_func
     def cuda_matmul_relu_0(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        compute: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        compute: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -497,8 +497,8 @@ def cuda_matmul_relu_0(
 def test_cuda_sum_with_trivial_block_iter():
     @T.prim_func
     def sum_with_trivial_block_iter(
-        A: T.Buffer[(1, 64, 768), "float32"],
-        B: T.Buffer[(1, 64, 1), "float32"],
+        A: T.Buffer((1, 64, 768), "float32"),
+        B: T.Buffer((1, 64, 1), "float32"),
     ) -> None:
         for i0, i1, i2, i3 in T.grid(1, 64, 1, 768):
             with T.block("sum"):
@@ -523,9 +523,9 @@ def sum_with_trivial_block_iter(
 def test_multi_level_tiling_hexagon():
     @T.prim_func
     def cpu_conv2d_nhwc(
-        inputs: T.Buffer[(1, 56, 56, 64), "float16"],
-        weight: T.Buffer[(3, 3, 64, 64), "float16"],
-        conv2d_nhwc: T.Buffer[(1, 56, 56, 64), "float16"],
+        inputs: T.Buffer((1, 56, 56, 64), "float16"),
+        weight: T.Buffer((3, 3, 64, 64), "float16"),
+        conv2d_nhwc: T.Buffer((1, 56, 56, 64), "float16"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -738,8 +738,8 @@ def test_max_pool_blocked():
     # fmt off
     @T.prim_func
     def pool_blocked_cache_read_write(
-        X: T.Buffer[(1, 2, 8, 8, 8, 8, 32), "uint8"],
-        pool: T.Buffer[(1, 2, 4, 4, 8, 8, 32), "uint8"],
+        X: T.Buffer((1, 2, 8, 8, 8, 8, 32), "uint8"),
+        pool: T.Buffer((1, 2, 4, 4, 8, 8, 32), "uint8"),
     ):
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         pool_global = T.alloc_buffer([1, 2, 4, 4, 8, 8, 32], dtype="uint8")
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_intrin.py b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_intrin.py
index 4667626f1706..a1c024d287ad 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_intrin.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_intrin.py
@@ -32,9 +32,9 @@
 def test_x86_conv2d_nchwc(intrin=VNNI_INTRIN, target="llvm -mcpu=cascadelake -num-cores=4"):
     @T.prim_func
     def conv2d_nchwc(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i0, i1, i2, i3, i4, i5, i6, i7, i8, i9 in T.grid(1, 16, 56, 56, 16, 1, 1, 4, 4, 4):
@@ -69,7 +69,7 @@ def conv2d_nchwc(
 
     # fmt: off
     @T.prim_func
-    def x86_conv2d_nchwc_0(placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"], placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"], conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"]) -> None:
+    def x86_conv2d_nchwc_0(placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"), placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"), conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         conv2d_NCHWc_int8_global = T.alloc_buffer([1, 16, 56, 56, 16], dtype="int32")
         for i0_0, i1_0, i2_0, i3_0, i4_0_0, i0_1, i1_1, i2_1, i3_1, i4_0_1 in T.grid(1, 8, 28, 56, 1, 1, 2, 1, 1, 1):
@@ -114,7 +114,7 @@ def x86_conv2d_nchwc_0(placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"], place
                     conv2d_NCHWc_int8[v0, v1, v2, v3, v4] = conv2d_NCHWc_int8_global[v0, v1, v2, v3, v4]
 
     @T.prim_func
-    def x86_conv2d_nchwc_1(placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"], placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"], conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"]) -> None:
+    def x86_conv2d_nchwc_1(placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"), placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"), conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         conv2d_NCHWc_int8_global = T.alloc_buffer([1, 16, 56, 56, 16], dtype="int32")
         for i0_0, i1_0, i2_0, i3_0, i4_0_0 in T.grid(1, 8, 28, 56, 1):
@@ -159,7 +159,7 @@ def x86_conv2d_nchwc_1(placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"], place
                     conv2d_NCHWc_int8[v0, v1, v2, v3, v4] = conv2d_NCHWc_int8_global[v0, v1, v2, v3, v4]
 
     @T.prim_func
-    def x86_conv2d_nchwc_2(placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"], placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"], conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"]) -> None:
+    def x86_conv2d_nchwc_2(placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"), placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"), conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i0_0, i1_0, i2_0, i3_0, i4_0_0, i0_1, i1_1, i2_1, i3_1, i4_0_1, i5_0, i6_0, i7_0, i8_0, i9_0_0, i0_2, i1_2, i2_2, i3_2, i4_0_2, i5_1, i6_1, i7_1, i8_1, i9_0_1, i0_3, i1_3, i2_3, i3_3, i4_0_3 in T.grid(1, 8, 28, 56, 1, 1, 2, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 2, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1):
             with T.block("conv2d_NCHWc_int8_o"):
@@ -298,9 +298,9 @@ def _dense(m, n, k, in_dtype, out_dtype):
 def test_dp4a_dense():
     @T.prim_func
     def dp4a_dense_0(
-        X: T.Buffer[(128, 128), "int8"],
-        W: T.Buffer[(128, 128), "int8"],
-        compute: T.Buffer[(128, 128), "int32"],
+        X: T.Buffer((128, 128), "int8"),
+        W: T.Buffer((128, 128), "int8"),
+        compute: T.Buffer((128, 128), "int32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_tc.py b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_tc.py
index 064769915955..9b869b4436c0 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_tc.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_tc.py
@@ -81,7 +81,7 @@ def test_matmul_relu(shared_scope):
     intrin_suffix = shared_scope.replace(".", "_")
     # fmt: off
     @T.prim_func
-    def matmul_relu_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128, 128), "float16"], compute: T.Buffer[(128, 128), "float32"]) -> None:
+    def matmul_relu_0(A: T.Buffer((128, 128), "float16"), B: T.Buffer((128, 128), "float16"), compute: T.Buffer((128, 128), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         C_reindex_shared = T.alloc_buffer([128, 128], dtype="float32", scope=shared_scope)
         C_reindex_shared_wmma_accumulator = T.alloc_buffer([128, 128], dtype="float32", scope="wmma.accumulator")
@@ -222,7 +222,7 @@ def matmul_relu_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128, 128), "f
 def test_matmul_relu_with_fallback():
     # fmt: off
     @T.prim_func
-    def matmul_relu_fallback_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128, 128), "float16"], compute: T.Buffer[(128, 128), "float32"]) -> None:
+    def matmul_relu_fallback_0(A: T.Buffer((128, 128), "float16"), B: T.Buffer((128, 128), "float16"), compute: T.Buffer((128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -371,7 +371,7 @@ def test_conv2d(shared_scope):
     intrin_suffix = shared_scope.replace(".", "_")
     # fmt: off
     @T.prim_func
-    def conv2d_0(inputs: T.Buffer[(1, 16, 16, 32), "float16"], weight: T.Buffer[(3, 3, 32, 32), "float16"], conv2d_nhwc: T.Buffer[(1, 16, 16, 32), "float32"]) -> None:
+    def conv2d_0(inputs: T.Buffer((1, 16, 16, 32), "float16"), weight: T.Buffer((3, 3, 32, 32), "float16"), conv2d_nhwc: T.Buffer((1, 16, 16, 32), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         PadInput = T.alloc_buffer([1, 18, 18, 32], dtype="float16")
         conv2d_nhwc_reindex_shared = T.alloc_buffer([256, 32], dtype="float32", scope=shared_scope)
@@ -546,7 +546,7 @@ def test_matmul_relu_pipeline(shared_scope):
     intrin_suffix = shared_scope.replace(".", "_")
     # fmt: off
     @T.prim_func
-    def matmul_relu_pipeline_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128, 128), "float16"], compute: T.Buffer[(128, 128), "float32"]) -> None:
+    def matmul_relu_pipeline_0(A: T.Buffer((128, 128), "float16"), B: T.Buffer((128, 128), "float16"), compute: T.Buffer((128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -696,7 +696,7 @@ def matmul_relu_pipeline_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128,
 def test_matmul_relu_global():
     # fmt: off
     @T.prim_func
-    def matmul_relu_global_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128, 128), "float16"], compute: T.Buffer[(128, 128), "float32"]) -> None:
+    def matmul_relu_global_0(A: T.Buffer((128, 128), "float16"), B: T.Buffer((128, 128), "float16"), compute: T.Buffer((128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -851,7 +851,7 @@ def test_matmul_relu_non_tensorizable():
 def test_padded_matmul_relu():
     # fmt: off
     @T.prim_func
-    def padded_matmul_relu_0(A: T.Buffer[(127, 127), "float16"], B: T.Buffer[(127, 127), "float16"], compute: T.Buffer[(127, 127), "float32"]) -> None:
+    def padded_matmul_relu_0(A: T.Buffer((127, 127), "float16"), B: T.Buffer((127, 127), "float16"), compute: T.Buffer((127, 127), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -992,7 +992,7 @@ def padded_matmul_relu_0(A: T.Buffer[(127, 127), "float16"], B: T.Buffer[(127, 1
 def test_conv_1x1():
     # fmt: off
     @T.prim_func
-    def conv2d_1x1_0(inputs: T.Buffer[(1, 16, 16, 64), "float16"], weight: T.Buffer[(1, 1, 64, 64), "float16"], conv2d_nhwc: T.Buffer[(1, 16, 16, 64), "float32"]) -> None:
+    def conv2d_1x1_0(inputs: T.Buffer((1, 16, 16, 64), "float16"), weight: T.Buffer((1, 1, 64, 64), "float16"), conv2d_nhwc: T.Buffer((1, 16, 16, 64), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         conv2d_nhwc_reindex_shared = T.alloc_buffer([256, 64], dtype="float32", scope="shared")
         conv2d_nhwc_reindex_shared_wmma_accumulator = T.alloc_buffer([256, 64], dtype="float32", scope="wmma.accumulator")
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_parallel_vectorize_unroll.py b/tests/python/unittest/test_meta_schedule_schedule_rule_parallel_vectorize_unroll.py
index 520dfbfb1cc5..2a0a67d4c786 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_parallel_vectorize_unroll.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_parallel_vectorize_unroll.py
@@ -67,7 +67,7 @@ def main(a: T.handle, b: T.handle, c: T.handle) -> None:
 @tvm.script.ir_module
 class PureSpatial:
     @T.prim_func
-    def main(placeholder: T.Buffer[(1, 13, 13, 3, 85), "float32"], placeholder_1: T.Buffer[(1, 26, 26, 3, 85), "float32"], placeholder_2: T.Buffer[(1, 52, 52, 3, 85), "float32"], T_expand_dims: T.Buffer[(1, 80, 10647), "float32"]) -> None:
+    def main(placeholder: T.Buffer((1, 13, 13, 3, 85), "float32"), placeholder_1: T.Buffer((1, 26, 26, 3, 85), "float32"), placeholder_2: T.Buffer((1, 52, 52, 3, 85), "float32"), T_expand_dims: T.Buffer((1, 80, 10647), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         T_strided_slice_with_axes = T.alloc_buffer([1, 52, 52, 3, 1], dtype="float32")
         T_sigmoid = T.alloc_buffer([1, 52, 52, 3, 1], dtype="float32")
@@ -224,9 +224,9 @@ def main(placeholder: T.Buffer[(1, 13, 13, 3, 85), "float32"], placeholder_1: T.
 def test_parallel_vectorize_unroll():
     @T.prim_func
     def Matmul_0(
-        A: T.Buffer[(1024, 1024), "float32"],
-        B: T.Buffer[(1024, 1024), "float32"],
-        C: T.Buffer[(1024, 1024), "float32"],
+        A: T.Buffer((1024, 1024), "float32"),
+        B: T.Buffer((1024, 1024), "float32"),
+        C: T.Buffer((1024, 1024), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main"})
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_random_compute_location.py b/tests/python/unittest/test_meta_schedule_schedule_rule_random_compute_location.py
index 7c9433cedf50..2e912af18a6a 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_random_compute_location.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_random_compute_location.py
@@ -59,8 +59,8 @@ def main(a: T.handle, b: T.handle) -> None:
 def test_random_compute_location():
     @T.prim_func
     def add_0(
-        A: T.Buffer[(2048, 2048, 2048), "float32"],
-        B: T.Buffer[(2048, 2048, 2048), "float32"],
+        A: T.Buffer((2048, 2048, 2048), "float32"),
+        B: T.Buffer((2048, 2048, 2048), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main"})
diff --git a/tests/python/unittest/test_meta_schedule_space_cpu.py b/tests/python/unittest/test_meta_schedule_space_cpu.py
index 47f3e6d4cc51..93e1bdad4438 100644
--- a/tests/python/unittest/test_meta_schedule_space_cpu.py
+++ b/tests/python/unittest/test_meta_schedule_space_cpu.py
@@ -42,7 +42,7 @@ def _design_space(mod):
 def test_cpu_c1d():
     # fmt: off
     @T.prim_func
-    def c1d_0(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 128), "float32"], conv1d_nlc: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def c1d_0(inputs: T.Buffer((1, 256, 64), "float32"), weight: T.Buffer((3, 64, 128), "float32"), conv1d_nlc: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -80,7 +80,7 @@ def c1d_0(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 12
                         T.writes(conv1d_nlc[v0, v1, v2])
                         conv1d_nlc[v0, v1, v2] = conv1d_nlc_global[v0, v1, v2]
     @T.prim_func
-    def c1d_1(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 128), "float32"], conv1d_nlc: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def c1d_1(inputs: T.Buffer((1, 256, 64), "float32"), weight: T.Buffer((3, 64, 128), "float32"), conv1d_nlc: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -122,7 +122,7 @@ def c1d_1(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 12
                         conv1d_nlc[v0, v1, v2] = conv1d_nlc_global[v0, v1, v2]
 
     @T.prim_func
-    def c1d_2(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 128), "float32"], conv1d_nlc: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def c1d_2(inputs: T.Buffer((1, 256, 64), "float32"), weight: T.Buffer((3, 64, 128), "float32"), conv1d_nlc: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -186,7 +186,7 @@ def c1d_2(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 12
 def test_cpu_c2d():
     # fmt: off
     @T.prim_func
-    def c2d_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def c2d_0(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -232,7 +232,7 @@ def c2d_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
                             T.writes(conv2d_nhwc[v0, v1, v2, v3])
                             conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def c2d_1(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def c2d_1(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -274,7 +274,7 @@ def c2d_1(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
                         T.writes(conv2d_nhwc[v0, v1, v2, v3])
                         conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def c2d_2(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def c2d_2(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -357,7 +357,7 @@ def c2d_2(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
 def test_cpu_c3d():
     # fmt: off
     @T.prim_func
-    def c3d_0(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 7, 3, 64), "float32"], conv3d_ndhwc: T.Buffer[(1, 8, 112, 112, 64), "float32"]) -> None:
+    def c3d_0(inputs: T.Buffer((1, 16, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 7, 3, 64), "float32"), conv3d_ndhwc: T.Buffer((1, 8, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -407,7 +407,7 @@ def c3d_0(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7
                             T.writes(conv3d_ndhwc[v0, v1, v2, v3, v4])
                             conv3d_ndhwc[v0, v1, v2, v3, v4] = conv3d_ndhwc_global[v0, v1, v2, v3, v4]
     @T.prim_func
-    def c3d_1(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 7, 3, 64), "float32"], conv3d_ndhwc: T.Buffer[(1, 8, 112, 112, 64), "float32"]) -> None:
+    def c3d_1(inputs: T.Buffer((1, 16, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 7, 3, 64), "float32"), conv3d_ndhwc: T.Buffer((1, 8, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -457,7 +457,7 @@ def c3d_1(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7
                         T.writes(conv3d_ndhwc[v0, v1, v2, v3, v4])
                         conv3d_ndhwc[v0, v1, v2, v3, v4] = conv3d_ndhwc_global[v0, v1, v2, v3, v4]
     @T.prim_func
-    def c3d_2(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 7, 3, 64), "float32"], conv3d_ndhwc: T.Buffer[(1, 8, 112, 112, 64), "float32"]) -> None:
+    def c3d_2(inputs: T.Buffer((1, 16, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 7, 3, 64), "float32"), conv3d_ndhwc: T.Buffer((1, 8, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -549,7 +549,7 @@ def c3d_2(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7
 def test_cpu_cap():
     # fmt: off
     @T.prim_func
-    def cap_0(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(3, 3, 4, 4, 32, 32), "float32"], conv2d_capsule_nhwijc: T.Buffer[(1, 8, 8, 4, 4, 32), "float32"]) -> None:
+    def cap_0(inputs: T.Buffer((1, 16, 16, 4, 4, 32), "float32"), weight: T.Buffer((3, 3, 4, 4, 32, 32), "float32"), conv2d_capsule_nhwijc: T.Buffer((1, 8, 8, 4, 4, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -600,7 +600,7 @@ def cap_0(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(
                             T.writes(conv2d_capsule_nhwijc[v0, v1, v2, v3, v4, v5])
                             conv2d_capsule_nhwijc[v0, v1, v2, v3, v4, v5] = conv2d_capsule_nhwijc_global[v0, v1, v2, v3, v4, v5]
     @T.prim_func
-    def cap_1(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(3, 3, 4, 4, 32, 32), "float32"], conv2d_capsule_nhwijc: T.Buffer[(1, 8, 8, 4, 4, 32), "float32"]) -> None:
+    def cap_1(inputs: T.Buffer((1, 16, 16, 4, 4, 32), "float32"), weight: T.Buffer((3, 3, 4, 4, 32, 32), "float32"), conv2d_capsule_nhwijc: T.Buffer((1, 8, 8, 4, 4, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -648,7 +648,7 @@ def cap_1(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(
                         T.writes(conv2d_capsule_nhwijc[v0, v1, v2, v3, v4, v5])
                         conv2d_capsule_nhwijc[v0, v1, v2, v3, v4, v5] = conv2d_capsule_nhwijc_global[v0, v1, v2, v3, v4, v5]
     @T.prim_func
-    def cap_2(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(3, 3, 4, 4, 32, 32), "float32"], conv2d_capsule_nhwijc: T.Buffer[(1, 8, 8, 4, 4, 32), "float32"]) -> None:
+    def cap_2(inputs: T.Buffer((1, 16, 16, 4, 4, 32), "float32"), weight: T.Buffer((3, 3, 4, 4, 32, 32), "float32"), conv2d_capsule_nhwijc: T.Buffer((1, 8, 8, 4, 4, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -737,7 +737,7 @@ def cap_2(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(
 def test_cpu_dep():
     # fmt: off
     @T.prim_func
-    def dep_0(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.Buffer[(1, 3, 3, 32), "float32"], depth_conv2d_nhwc: T.Buffer[(1, 112, 112, 32), "float32"]) -> None:
+    def dep_0(placeholder: T.Buffer((1, 112, 112, 32), "float32"), placeholder_1: T.Buffer((1, 3, 3, 32), "float32"), depth_conv2d_nhwc: T.Buffer((1, 112, 112, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -778,7 +778,7 @@ def dep_0(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.
                         T.writes(depth_conv2d_nhwc[v0, v1, v2, v3])
                         depth_conv2d_nhwc[v0, v1, v2, v3] = depth_conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def dep_1(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.Buffer[(1, 3, 3, 32), "float32"], depth_conv2d_nhwc: T.Buffer[(1, 112, 112, 32), "float32"]) -> None:
+    def dep_1(placeholder: T.Buffer((1, 112, 112, 32), "float32"), placeholder_1: T.Buffer((1, 3, 3, 32), "float32"), depth_conv2d_nhwc: T.Buffer((1, 112, 112, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -816,7 +816,7 @@ def dep_1(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.
                         T.writes(depth_conv2d_nhwc[v0, v1, v2, v3])
                         depth_conv2d_nhwc[v0, v1, v2, v3] = depth_conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def dep_2(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.Buffer[(1, 3, 3, 32), "float32"], depth_conv2d_nhwc: T.Buffer[(1, 112, 112, 32), "float32"]) -> None:
+    def dep_2(placeholder: T.Buffer((1, 112, 112, 32), "float32"), placeholder_1: T.Buffer((1, 3, 3, 32), "float32"), depth_conv2d_nhwc: T.Buffer((1, 112, 112, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -892,7 +892,7 @@ def dep_2(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.
 def test_cpu_dil():
     # fmt: off
     @T.prim_func
-    def dil_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 109, 109, 64), "float32"]) -> None:
+    def dil_0(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 109, 109, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -937,7 +937,7 @@ def dil_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
                         T.writes(conv2d_nhwc[v0, v1, v2, v3])
                         conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def dil_1(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 109, 109, 64), "float32"]) -> None:
+    def dil_1(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 109, 109, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -983,7 +983,7 @@ def dil_1(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
                         T.writes(conv2d_nhwc[v0, v1, v2, v3])
                         conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def dil_2(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 109, 109, 64), "float32"]) -> None:
+    def dil_2(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 109, 109, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1065,7 +1065,7 @@ def dil_2(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
 def test_cpu_gmm():
     # fmt: off
     @T.prim_func
-    def gmm_0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def gmm_0(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1096,7 +1096,7 @@ def gmm_0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "flo
                         T.writes(Z[v0, v1, v2])
                         Z[v0, v1, v2] = Z_global[v0, v1, v2]
     @T.prim_func
-    def gmm_1(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def gmm_1(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1127,7 +1127,7 @@ def gmm_1(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "flo
                         T.writes(Z[v0, v1, v2])
                         Z[v0, v1, v2] = Z_global[v0, v1, v2]
     @T.prim_func
-    def gmm_2(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def gmm_2(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1182,7 +1182,7 @@ def gmm_2(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "flo
 def test_cpu_grp():
     # fmt: off
     @T.prim_func
-    def grp_0(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3, 16, 128), "float32"], conv2d_nhwc: T.Buffer[(1, 28, 28, 128), "float32"]) -> None:
+    def grp_0(inputs: T.Buffer((1, 56, 56, 64), "float32"), weight: T.Buffer((3, 3, 16, 128), "float32"), conv2d_nhwc: T.Buffer((1, 28, 28, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1228,7 +1228,7 @@ def grp_0(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3,
                             T.writes(conv2d_nhwc[v0, v1, v2, v3])
                             conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def grp_1(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3, 16, 128), "float32"], conv2d_nhwc: T.Buffer[(1, 28, 28, 128), "float32"]) -> None:
+    def grp_1(inputs: T.Buffer((1, 56, 56, 64), "float32"), weight: T.Buffer((3, 3, 16, 128), "float32"), conv2d_nhwc: T.Buffer((1, 28, 28, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1270,7 +1270,7 @@ def grp_1(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3,
                         T.writes(conv2d_nhwc[v0, v1, v2, v3])
                         conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def grp_2(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3, 16, 128), "float32"], conv2d_nhwc: T.Buffer[(1, 28, 28, 128), "float32"]) -> None:
+    def grp_2(inputs: T.Buffer((1, 56, 56, 64), "float32"), weight: T.Buffer((3, 3, 16, 128), "float32"), conv2d_nhwc: T.Buffer((1, 28, 28, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1351,7 +1351,7 @@ def grp_2(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3,
 def test_cpu_t2d():
     # fmt: off
     @T.prim_func
-    def t2d_0(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def t2d_0(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1393,7 +1393,7 @@ def t2d_0(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 5
                         T.writes(conv2d_transpose_nhwc[v0, v1, v2, v3])
                         conv2d_transpose_nhwc[v0, v1, v2, v3] = conv2d_transpose_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def t2d_1(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def t2d_1(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1436,7 +1436,7 @@ def t2d_1(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 5
                         T.writes(conv2d_transpose_nhwc[v0, v1, v2, v3])
                         conv2d_transpose_nhwc[v0, v1, v2, v3] = conv2d_transpose_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def t2d_2(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def t2d_2(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1507,7 +1507,7 @@ def t2d_2(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 5
 def test_cpu_nrm():
     # fmt: off
     @T.prim_func
-    def nrm_0(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def nrm_0(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1540,7 +1540,7 @@ def nrm_0(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> N
                     T.writes(D[b])
                     D[b] = T.sqrt(C[b], dtype="float32")
     @T.prim_func
-    def nrm_1(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def nrm_1(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1573,7 +1573,7 @@ def nrm_1(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> N
                     T.writes(D[b])
                     D[b] = T.sqrt(C[b], dtype="float32")
     @T.prim_func
-    def nrm_2(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def nrm_2(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1626,7 +1626,7 @@ def nrm_2(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> N
 def test_cpu_sfm():
     # fmt: off
     @T.prim_func
-    def sfm_0(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_0(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1679,7 +1679,7 @@ def sfm_0(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_7, i1_2] = T.exp(A[i0_7, i1_2] - T_softmax_maxelem[i0_7], dtype="float32") / T_softmax_expsum[i0_7]
     @T.prim_func
-    def sfm_1(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_1(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1742,7 +1742,7 @@ def sfm_1(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_9, i1_2] = T_softmax_exp[i0_9, i1_2] / T_softmax_expsum[i0_9]
     @T.prim_func
-    def sfm_2(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_2(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1785,7 +1785,7 @@ def sfm_2(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_6, i1_2] = T.exp(A[i0_6, i1_2] - T_softmax_maxelem[i0_6], dtype="float32") / T_softmax_expsum[i0_6]
     @T.prim_func
-    def sfm_3(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_3(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1852,7 +1852,7 @@ def sfm_3(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_6, i1_2] = T_softmax_exp[i0_6, i1_2] / T_softmax_expsum[i0_6]
     @T.prim_func
-    def sfm_4(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_4(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1914,7 +1914,7 @@ def sfm_4(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_10, i1_4] = T_softmax_exp[i0_10, i1_4] / T_softmax_expsum[i0_10]
     @T.prim_func
-    def sfm_5(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_5(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1971,7 +1971,7 @@ def sfm_5(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                         T.block_attr({"axis":1})
                         T_softmax_norm[i0_5, i1_1] = T_softmax_exp[i0_5, i1_1] / T_softmax_expsum[i0_5]
     @T.prim_func
-    def sfm_6(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_6(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2017,7 +2017,7 @@ def sfm_6(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_6, i1_1] = T.exp(A[i0_6, i1_1] - T_softmax_maxelem[i0_6], dtype="float32") / T_softmax_expsum[i0_6]
     @T.prim_func
-    def sfm_7(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_7(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2061,7 +2061,7 @@ def sfm_7(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_5, i1_2] = T.exp(A[i0_5, i1_2] - T_softmax_maxelem[i0_5], dtype="float32") / T_softmax_expsum[i0_5]
     @T.prim_func
-    def sfm_8(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_8(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2205,7 +2205,7 @@ def sfm_8(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
 def test_cpu_cbr():
     # fmt: off
     @T.prim_func
-    def cbr_0(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3, 64), "float32"], bias: T.Buffer[64, "float32"], bn_offset: T.Buffer[64, "float32"], bn_scale: T.Buffer[64, "float32"], compute: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def cbr_0(data: T.Buffer((1, 224, 224, 3), "float32"), kernel: T.Buffer((7, 7, 3, 64), "float32"), bias: T.Buffer(64, "float32"), bn_offset: T.Buffer(64, "float32"), bn_scale: T.Buffer(64, "float32"), compute: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2236,7 +2236,7 @@ def cbr_0(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3
                     T.writes(compute[i0_4, i1_4, i2_4, i3_4])
                     compute[i0_4, i1_4, i2_4, i3_4] = T.max((Conv2dOutput[i0_4, i1_4, i2_4, i3_4] + bias[i3_4]) * bn_scale[i3_4] + bn_offset[i3_4], T.float32(0))
     @T.prim_func
-    def cbr_1(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3, 64), "float32"], bias: T.Buffer[64, "float32"], bn_offset: T.Buffer[64, "float32"], bn_scale: T.Buffer[64, "float32"], compute: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def cbr_1(data: T.Buffer((1, 224, 224, 3), "float32"), kernel: T.Buffer((7, 7, 3, 64), "float32"), bias: T.Buffer(64, "float32"), bn_offset: T.Buffer(64, "float32"), bn_scale: T.Buffer(64, "float32"), compute: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2282,7 +2282,7 @@ def cbr_1(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3
                             T.writes(compute[i0, i1, i2, i3])
                             compute[i0, i1, i2, i3] = T.max((Conv2dOutput[i0, i1, i2, i3] + bias[i3]) * bn_scale[i3] + bn_offset[i3], T.float32(0))
     @T.prim_func
-    def cbr_2(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3, 64), "float32"], bias: T.Buffer[64, "float32"], bn_offset: T.Buffer[64, "float32"], bn_scale: T.Buffer[64, "float32"], compute: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def cbr_2(data: T.Buffer((1, 224, 224, 3), "float32"), kernel: T.Buffer((7, 7, 3, 64), "float32"), bias: T.Buffer(64, "float32"), bn_offset: T.Buffer(64, "float32"), bn_scale: T.Buffer(64, "float32"), compute: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2374,7 +2374,7 @@ def cbr_2(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3
 def test_cpu_tbg():
     # fmt: off
     @T.prim_func
-    def tbg_0(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128, 12, 64), "float32"], C: T.Buffer[(1, 12, 128, 128), "float32"]) -> None:
+    def tbg_0(query: T.Buffer((1, 128, 12, 64), "float32"), value: T.Buffer((1, 128, 12, 64), "float32"), C: T.Buffer((1, 12, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2428,7 +2428,7 @@ def tbg_0(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128,
                             T.writes(C[v0, v1, v2, v3])
                             C[v0, v1, v2, v3] = C_global[v0, v1, v2, v3]
     @T.prim_func
-    def tbg_1(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128, 12, 64), "float32"], C: T.Buffer[(1, 12, 128, 128), "float32"]) -> None:
+    def tbg_1(query: T.Buffer((1, 128, 12, 64), "float32"), value: T.Buffer((1, 128, 12, 64), "float32"), C: T.Buffer((1, 12, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2477,7 +2477,7 @@ def tbg_1(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128,
                         T.writes(C[v0, v1, v2, v3])
                         C[v0, v1, v2, v3] = C_global[v0, v1, v2, v3]
     @T.prim_func
-    def tbg_2(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128, 12, 64), "float32"], C: T.Buffer[(1, 12, 128, 128), "float32"]) -> None:
+    def tbg_2(query: T.Buffer((1, 128, 12, 64), "float32"), value: T.Buffer((1, 128, 12, 64), "float32"), C: T.Buffer((1, 12, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
diff --git a/tests/python/unittest/test_meta_schedule_space_cpu_winograd.py b/tests/python/unittest/test_meta_schedule_space_cpu_winograd.py
index 135304286b4b..1da2794a4cc6 100644
--- a/tests/python/unittest/test_meta_schedule_space_cpu_winograd.py
+++ b/tests/python/unittest/test_meta_schedule_space_cpu_winograd.py
@@ -41,7 +41,7 @@ def _design_space(mod):
 def test_cpu_nhwc():
     # fmt: off
     @T.prim_func
-    def cpu_nhwc_0(X: T.Buffer[(1, 14, 14, 128), "float32"], W: T.Buffer[(6, 6, 128, 128), "float32"], conv2d_winograd: T.Buffer[(1, 12, 12, 128), "float32"]) -> None:
+    def cpu_nhwc_0(X: T.Buffer((1, 14, 14, 128), "float32"), W: T.Buffer((6, 6, 128, 128), "float32"), conv2d_winograd: T.Buffer((1, 12, 12, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
diff --git a/tests/python/unittest/test_meta_schedule_space_cuda.py b/tests/python/unittest/test_meta_schedule_space_cuda.py
index 0a518c840d11..241fe63e1da0 100644
--- a/tests/python/unittest/test_meta_schedule_space_cuda.py
+++ b/tests/python/unittest/test_meta_schedule_space_cuda.py
@@ -42,7 +42,7 @@ def _design_space(mod):
 def test_cuda_c1d():
     # fmt: off
     @T.prim_func
-    def c1d_0(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 128), "float32"], conv1d_nlc: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def c1d_0(inputs: T.Buffer((1, 256, 64), "float32"), weight: T.Buffer((3, 64, 128), "float32"), conv1d_nlc: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -122,7 +122,7 @@ def c1d_0(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 12
 def test_cuda_c2d():
     # fmt: off
     @T.prim_func
-    def c2d_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def c2d_0(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
             T.reads()
@@ -206,7 +206,7 @@ def c2d_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
 def test_cuda_c3d():
     # fmt: off
     @T.prim_func
-    def c3d_0(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 7, 3, 64), "float32"], conv3d_ndhwc: T.Buffer[(1, 8, 112, 112, 64), "float32"]) -> None:
+    def c3d_0(inputs: T.Buffer((1, 16, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 7, 3, 64), "float32"), conv3d_ndhwc: T.Buffer((1, 8, 112, 112, 64), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
             T.reads()
@@ -296,7 +296,7 @@ def c3d_0(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7
 def test_cuda_cap():
     # fmt: off
     @T.prim_func
-    def cap_0(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(3, 3, 4, 4, 32, 32), "float32"], conv2d_capsule_nhwijc: T.Buffer[(1, 8, 8, 4, 4, 32), "float32"]) -> None:
+    def cap_0(inputs: T.Buffer((1, 16, 16, 4, 4, 32), "float32"), weight: T.Buffer((3, 3, 4, 4, 32, 32), "float32"), conv2d_capsule_nhwijc: T.Buffer((1, 8, 8, 4, 4, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -392,7 +392,7 @@ def cap_0(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(
 def test_cuda_dep():
     # fmt: off
     @T.prim_func
-    def dep_0(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.Buffer[(1, 3, 3, 32), "float32"], depth_conv2d_nhwc: T.Buffer[(1, 112, 112, 32), "float32"]) -> None:
+    def dep_0(placeholder: T.Buffer((1, 112, 112, 32), "float32"), placeholder_1: T.Buffer((1, 3, 3, 32), "float32"), depth_conv2d_nhwc: T.Buffer((1, 112, 112, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -475,7 +475,7 @@ def dep_0(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.
 def test_cuda_dil():
     # fmt: off
     @T.prim_func
-    def dil_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 109, 109, 64), "float32"]) -> None:
+    def dil_0(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 109, 109, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -558,7 +558,7 @@ def dil_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
 def test_cuda_gmm():
     # fmt: off
     @T.prim_func
-    def gmm_0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def gmm_0(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -634,7 +634,7 @@ def gmm_0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "flo
 def test_cuda_grp():
     # fmt: off
     @T.prim_func
-    def grp_0(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3, 16, 128), "float32"], conv2d_nhwc: T.Buffer[(1, 28, 28, 128), "float32"]) -> None:
+    def grp_0(inputs: T.Buffer((1, 56, 56, 64), "float32"), weight: T.Buffer((3, 3, 16, 128), "float32"), conv2d_nhwc: T.Buffer((1, 28, 28, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -718,7 +718,7 @@ def grp_0(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3,
 def test_cuda_t2d():
     # fmt: off
     @T.prim_func
-    def t2d_0(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def t2d_0(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -804,7 +804,7 @@ def t2d_0(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 5
 def test_cuda_nrm():
     # fmt: off
     @T.prim_func
-    def nrm_0(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def nrm_0(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -832,7 +832,7 @@ def nrm_0(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> N
                         T.writes(D[b])
                         D[b] = T.sqrt(C[b], dtype="float32")
     @T.prim_func
-    def nrm_1(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def nrm_1(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -881,7 +881,7 @@ def nrm_1(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> N
 def test_cuda_sfm():
     # fmt: off
     @T.prim_func
-    def sfm_0(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_0(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -923,7 +923,7 @@ def sfm_0(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                         T.block_attr({"axis":1})
                         T_softmax_norm[i0, i1] = T.exp(A[i0, i1] - T_softmax_maxelem[i0], dtype="float32") / T_softmax_expsum[i0]
     @T.prim_func
-    def sfm_1(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_1(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -965,7 +965,7 @@ def sfm_1(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                         T.block_attr({"axis":1})
                         T_softmax_norm[i0, i1] = T.exp(A[i0, i1] - T_softmax_maxelem[i0], dtype="float32") / T_softmax_expsum[i0]
     @T.prim_func
-    def sfm_2(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_2(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1009,7 +1009,7 @@ def sfm_2(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                             T.block_attr({"axis":1})
                             T_softmax_norm[i0, i1] = T.exp(A[i0, i1] - T_softmax_maxelem[i0], dtype="float32") / T_softmax_expsum_shared[i0]
     @T.prim_func
-    def sfm_3(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_3(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1088,7 +1088,7 @@ def sfm_3(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
 def test_cuda_cbr():
     # fmt: off
     @T.prim_func
-    def cbr_0(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3, 64), "float32"], bias: T.Buffer[64, "float32"], bn_offset: T.Buffer[64, "float32"], bn_scale: T.Buffer[64, "float32"], compute: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def cbr_0(data: T.Buffer((1, 224, 224, 3), "float32"), kernel: T.Buffer((7, 7, 3, 64), "float32"), bias: T.Buffer(64, "float32"), bn_offset: T.Buffer(64, "float32"), bn_scale: T.Buffer(64, "float32"), compute: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1173,7 +1173,7 @@ def cbr_0(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3
 def test_cuda_tbg():
     # fmt: off
     @T.prim_func
-    def tbg_0(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128, 12, 64), "float32"], C: T.Buffer[(1, 12, 128, 128), "float32"]) -> None:
+    def tbg_0(query: T.Buffer((1, 128, 12, 64), "float32"), value: T.Buffer((1, 128, 12, 64), "float32"), C: T.Buffer((1, 12, 128, 128), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
             T.reads()
diff --git a/tests/python/unittest/test_meta_schedule_space_cuda_winograd.py b/tests/python/unittest/test_meta_schedule_space_cuda_winograd.py
index 53a153b90522..87a8fcac9800 100644
--- a/tests/python/unittest/test_meta_schedule_space_cuda_winograd.py
+++ b/tests/python/unittest/test_meta_schedule_space_cuda_winograd.py
@@ -42,7 +42,7 @@ def _design_space(mod):
 def test_cuda_nhwc():
     # fmt: off
     @T.prim_func
-    def cuda_nhwc_0(data: T.Buffer[(1, 14, 14, 128), "float32"], weight: T.Buffer[(6, 6, 128, 128), "float32"], conv2d_winograd: T.Buffer[(1, 12, 12, 128), "float32"]) -> None:
+    def cuda_nhwc_0(data: T.Buffer((1, 14, 14, 128), "float32"), weight: T.Buffer((6, 6, 128, 128), "float32"), conv2d_winograd: T.Buffer((1, 12, 12, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -200,7 +200,7 @@ def cuda_nhwc_0(data: T.Buffer[(1, 14, 14, 128), "float32"], weight: T.Buffer[(6
 def test_cuda_nchw():
     # fmt: off
     @T.prim_func
-    def cuda_nchw_0(data: T.Buffer[(1, 64, 56, 56), "float32"], weight: T.Buffer[(6, 6, 64, 64), "float32"], conv2d_winograd: T.Buffer[(1, 64, 56, 56), "float32"]) -> None:
+    def cuda_nchw_0(data: T.Buffer((1, 64, 56, 56), "float32"), weight: T.Buffer((6, 6, 64, 64), "float32"), conv2d_winograd: T.Buffer((1, 64, 56, 56), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -353,7 +353,7 @@ def cuda_nchw_0(data: T.Buffer[(1, 64, 56, 56), "float32"], weight: T.Buffer[(6,
 def test_cuda_nchw_add_relu():
     # fmt: off
     @T.prim_func
-    def nchw_add_relu(p0: T.Buffer[(2, 2048, 50, 75), "float32"], p1: T.Buffer[(4, 4, 2048, 2048), "float32"], p2: T.Buffer[(1, 2048, 1, 1), "float32"], T_relu: T.Buffer[(2, 2048, 50, 75), "float32"]):
+    def nchw_add_relu(p0: T.Buffer((2, 2048, 50, 75), "float32"), p1: T.Buffer((4, 4, 2048, 2048), "float32"), p2: T.Buffer((1, 2048, 1, 1), "float32"), T_relu: T.Buffer((2, 2048, 50, 75), "float32")):
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -440,7 +440,7 @@ def nchw_add_relu(p0: T.Buffer[(2, 2048, 50, 75), "float32"], p1: T.Buffer[(4, 4
                 T_relu[ax0, ax1, ax2, ax3] = T.max(T_add[ax0, ax1, ax2, ax3], T.float32(0))
 
     @T.prim_func
-    def nchw_add_relu_scheduled(p0: T.Buffer[(2, 2048, 50, 75), "float32"], p1: T.Buffer[(4, 4, 2048, 2048), "float32"], p2: T.Buffer[(1, 2048, 1, 1), "float32"], T_relu: T.Buffer[(2, 2048, 50, 75), "float32"]):
+    def nchw_add_relu_scheduled(p0: T.Buffer((2, 2048, 50, 75), "float32"), p1: T.Buffer((4, 4, 2048, 2048), "float32"), p2: T.Buffer((1, 2048, 1, 1), "float32"), T_relu: T.Buffer((2, 2048, 50, 75), "float32")):
         # function attr dict
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
         # body
diff --git a/tests/python/unittest/test_meta_schedule_trace_apply.py b/tests/python/unittest/test_meta_schedule_trace_apply.py
index c242f63b98ea..ae65cc1a815b 100644
--- a/tests/python/unittest/test_meta_schedule_trace_apply.py
+++ b/tests/python/unittest/test_meta_schedule_trace_apply.py
@@ -33,9 +33,9 @@
 class Dense:
     @T.prim_func
     def main(
-        p0: T.Buffer[(128, 128), "float32"],
-        p1: T.Buffer[(128, 128), "float32"],
-        T_matmul_NT: T.Buffer[(128, 128), "float32"],
+        p0: T.Buffer((128, 128), "float32"),
+        p1: T.Buffer((128, 128), "float32"),
+        T_matmul_NT: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
@@ -56,9 +56,9 @@ def main(
 class DenseAdd:
     @T.prim_func
     def main(
-        p0: T.Buffer[(128, 128), "float32"],
-        p1: T.Buffer[(128, 128), "float32"],
-        T_add: T.Buffer[(128, 128), "float32"],
+        p0: T.Buffer((128, 128), "float32"),
+        p1: T.Buffer((128, 128), "float32"),
+        T_add: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
@@ -92,9 +92,9 @@ def main(
 class DenseAdd_scheduled_cpu:
     @T.prim_func
     def main(
-        p0: T.Buffer[(128, 128), "float32"],
-        p1: T.Buffer[(128, 128), "float32"],
-        T_add: T.Buffer[(128, 128), "float32"],
+        p0: T.Buffer((128, 128), "float32"),
+        p1: T.Buffer((128, 128), "float32"),
+        T_add: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
@@ -174,7 +174,7 @@ def main(
 @tvm.script.ir_module
 class DenseAdd_cpu_no_write_cache:
     @T.prim_func
-    def main(p0: T.Buffer[(128, 128), "float32"], p1: T.Buffer[(128, 128), "float32"], T_add: T.Buffer[(128, 128), "float32"]) -> None:
+    def main(p0: T.Buffer((128, 128), "float32"), p1: T.Buffer((128, 128), "float32"), T_add: T.Buffer((128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -221,9 +221,9 @@ def main(p0: T.Buffer[(128, 128), "float32"], p1: T.Buffer[(128, 128), "float32"
 class DenseAdd_scheduled_gpu:
     @T.prim_func
     def main(
-        p0: T.Buffer[(128, 128), "float32"],
-        p1: T.Buffer[(128, 128), "float32"],
-        T_add: T.Buffer[(128, 128), "float32"],
+        p0: T.Buffer((128, 128), "float32"),
+        p1: T.Buffer((128, 128), "float32"),
+        T_add: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
@@ -374,7 +374,7 @@ def main(
 @tvm.script.ir_module
 class Conv2dInt8:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[(1, 1, 1, 256), "int64"], p5: T.Buffer[(1, 1, 1, 256), "int64"], p6: T.Buffer[(1, 1, 1, 256), "int64"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], compute: T.Buffer[(16, 56, 56, 256), "int32"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer((1, 1, 1, 256), "int64"), p5: T.Buffer((1, 1, 1, 256), "int64"), p6: T.Buffer((1, 1, 1, 256), "int64"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), compute: T.Buffer((16, 56, 56, 256), "int32")) -> None:
         # function attr dict
         T.func_attr({"tir.noalias": True, "global_symbol": "main"})
         # body
@@ -490,7 +490,7 @@ def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "
 @tvm.script.ir_module
 class Conv2dInt8_target:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[(1, 1, 1, 256), "int64"], p5: T.Buffer[(1, 1, 1, 256), "int64"], p6: T.Buffer[(1, 1, 1, 256), "int64"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], p9: T.Buffer[(16, 56, 56, 256), "int32"], compute: T.Buffer[(16, 56, 56, 256), "uint8"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer((1, 1, 1, 256), "int64"), p5: T.Buffer((1, 1, 1, 256), "int64"), p6: T.Buffer((1, 1, 1, 256), "int64"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), p9: T.Buffer((16, 56, 56, 256), "int32"), compute: T.Buffer((16, 56, 56, 256), "uint8")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -634,7 +634,7 @@ def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "
 @tvm.script.ir_module
 class Conv2dInt8_tensorcore_scheduled:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[(1, 1, 1, 256), "int64"], p5: T.Buffer[(1, 1, 1, 256), "int64"], p6: T.Buffer[(1, 1, 1, 256), "int64"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], p9: T.Buffer[(16, 56, 56, 256), "int32"], compute: T.Buffer[(16, 56, 56, 256), "uint8"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer((1, 1, 1, 256), "int64"), p5: T.Buffer((1, 1, 1, 256), "int64"), p6: T.Buffer((1, 1, 1, 256), "int64"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), p9: T.Buffer((16, 56, 56, 256), "int32"), compute: T.Buffer((16, 56, 56, 256), "uint8")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         A_s0 = T.var("int32")
@@ -756,7 +756,7 @@ def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "
 @tvm.script.ir_module
 class Conv2dInt8_NCHWc:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 32, 7, 7, 16), "uint8"], p1: T.Buffer[(128, 32, 1, 1, 4, 16, 4), "int8"], p2: T.Buffer[(1, 128, 1, 1, 16), "int32"], p3: T.Buffer[(1, 128, 1, 1, 16), "float32"], p4: T.Buffer[1, "float32"], p5: T.Buffer[(1, 128, 7, 7, 16), "int32"], compute: T.Buffer[(1, 128, 7, 7, 16), "uint8"]) -> None:
+    def main(p0: T.Buffer((1, 32, 7, 7, 16), "uint8"), p1: T.Buffer((128, 32, 1, 1, 4, 16, 4), "int8"), p2: T.Buffer((1, 128, 1, 1, 16), "int32"), p3: T.Buffer((1, 128, 1, 1, 16), "float32"), p4: T.Buffer(1, "float32"), p5: T.Buffer((1, 128, 7, 7, 16), "int32"), compute: T.Buffer((1, 128, 7, 7, 16), "uint8")) -> None:
         # function attr dict
         T.func_attr({"tir.noalias": True, "global_symbol": "main"})
         # body
@@ -919,7 +919,7 @@ def main(p0: T.Buffer[(1, 32, 7, 7, 16), "uint8"], p1: T.Buffer[(128, 32, 1, 1,
 @tvm.script.ir_module
 class Conv2dInt8_NCHWc_target:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 32, 7, 7, 16), "uint8"], p1: T.Buffer[(128, 32, 1, 1, 4, 16, 4), "int8"], p2: T.Buffer[(1, 128, 1, 1, 16), "int32"], p3: T.Buffer[(1, 128, 1, 1, 16), "float32"], p4: T.Buffer[1, "float32"], p5: T.Buffer[(1, 128, 7, 7, 16), "uint8"], T_cast: T.Buffer[(1, 128, 7, 7, 16), "int32"]) -> None:
+    def main(p0: T.Buffer((1, 32, 7, 7, 16), "uint8"), p1: T.Buffer((128, 32, 1, 1, 4, 16, 4), "int8"), p2: T.Buffer((1, 128, 1, 1, 16), "int32"), p3: T.Buffer((1, 128, 1, 1, 16), "float32"), p4: T.Buffer(1, "float32"), p5: T.Buffer((1, 128, 7, 7, 16), "uint8"), T_cast: T.Buffer((1, 128, 7, 7, 16), "int32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1137,7 +1137,7 @@ def get_conv2d_vnni_mod(intrin_id):
     @tvm.script.ir_module
     class Conv2dInt8_NCHWc_scheduled:
         @T.prim_func
-        def main(p0: T.Buffer[(1, 32, 7, 7, 16), "uint8"], p1: T.Buffer[(128, 32, 1, 1, 4, 16, 4), "int8"], p2: T.Buffer[(1, 128, 1, 1, 16), "int32"], p3: T.Buffer[(1, 128, 1, 1, 16), "float32"], p4: T.Buffer[1, "float32"], p5: T.Buffer[(1, 128, 7, 7, 16), "uint8"], T_cast: T.Buffer[(1, 128, 7, 7, 16), "int32"]) -> None:
+        def main(p0: T.Buffer((1, 32, 7, 7, 16), "uint8"), p1: T.Buffer((128, 32, 1, 1, 4, 16, 4), "int8"), p2: T.Buffer((1, 128, 1, 1, 16), "int32"), p3: T.Buffer((1, 128, 1, 1, 16), "float32"), p4: T.Buffer(1, "float32"), p5: T.Buffer((1, 128, 7, 7, 16), "uint8"), T_cast: T.Buffer((1, 128, 7, 7, 16), "int32")) -> None:
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
             # body
@@ -1200,7 +1200,7 @@ def main(p0: T.Buffer[(1, 32, 7, 7, 16), "uint8"], p1: T.Buffer[(128, 32, 1, 1,
 @tvm.script.ir_module
 class Conv2dWinogradAddRelu:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64), "float32"], p2: T.Buffer[(1, 1, 1, 64), "float32"], T_relu: T.Buffer[(1, 56, 56, 64), "float32"]) -> None:
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((6, 6, 64, 64), "float32"), p2: T.Buffer((1, 1, 1, 64), "float32"), T_relu: T.Buffer((1, 56, 56, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
         # body
@@ -1292,7 +1292,7 @@ def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64),
 @tvm.script.ir_module
 class Conv2dWinogradAddResidualRelu:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64), "float32"], p2: T.Buffer[(1, 1, 1, 64), "float32"], p3: T.Buffer[(1, 56, 56, 64), "float32"], T_relu: T.Buffer[(1, 56, 56, 64), "float32"]) -> None:
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((6, 6, 64, 64), "float32"), p2: T.Buffer((1, 1, 1, 64), "float32"), p3: T.Buffer((1, 56, 56, 64), "float32"), T_relu: T.Buffer((1, 56, 56, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -1391,7 +1391,7 @@ def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64),
 @tvm.script.ir_module
 class Conv2dWinogradAddResidualRelu_scheduled:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64), "float32"], p2: T.Buffer[(1, 1, 1, 64), "float32"], p3: T.Buffer[(1, 56, 56, 64), "float32"], T_relu: T.Buffer[(1, 56, 56, 64), "float32"]) -> None:
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((6, 6, 64, 64), "float32"), p2: T.Buffer((1, 1, 1, 64), "float32"), p3: T.Buffer((1, 56, 56, 64), "float32"), T_relu: T.Buffer((1, 56, 56, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -1531,7 +1531,7 @@ def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64),
 @tvm.script.ir_module
 class Conv2dInt8_with_predicate:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[256, "int32"], p5: T.Buffer[256, "int32"], p6: T.Buffer[256, "int32"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], compute: T.Buffer[(16, 56, 56, 256), "int32"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer(256, "int32"), p5: T.Buffer(256, "int32"), p6: T.Buffer(256, "int32"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), compute: T.Buffer((16, 56, 56, 256), "int32")) -> None:
         # function attr dict
         T.func_attr({"tir.noalias": True, "global_symbol": "main"})
         # body
@@ -1605,7 +1605,7 @@ def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "
 @tvm.script.ir_module
 class Conv2dInt8_with_predicate_target:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[256, "int32"], p5: T.Buffer[256, "int32"], p6: T.Buffer[256, "int32"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], p9: T.Buffer[(16, 56, 56, 256), "int32"], compute: T.Buffer[(16, 56, 56, 256), "int32"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer(256, "int32"), p5: T.Buffer(256, "int32"), p6: T.Buffer(256, "int32"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), p9: T.Buffer((16, 56, 56, 256), "int32"), compute: T.Buffer((16, 56, 56, 256), "int32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1700,7 +1700,7 @@ def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "
 @tvm.script.ir_module
 class Conv2dInt8_with_predicate_scheduled:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[256, "int32"], p5: T.Buffer[256, "int32"], p6: T.Buffer[256, "int32"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], p9: T.Buffer[(16, 56, 56, 256), "int32"], compute: T.Buffer[(16, 56, 56, 256), "int32"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer(256, "int32"), p5: T.Buffer(256, "int32"), p6: T.Buffer(256, "int32"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), p9: T.Buffer((16, 56, 56, 256), "int32"), compute: T.Buffer((16, 56, 56, 256), "int32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py
index 39919f337197..734404fb3450 100644
--- a/tests/python/unittest/test_micro_model_library_format.py
+++ b/tests/python/unittest/test_micro_model_library_format.py
@@ -160,7 +160,7 @@ def test_export_model_library_format_c(
     target = tvm.target.target.micro("host")
     with utils.TempDirectory.set_keep_for_debug(True):
         with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-            relay_mod = tvm.parser.fromtext(
+            relay_mod = tvm.relay.fromtext(
                 """
             #[version = "0.0.5"]
             def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[(1, 2), float32]) {
@@ -254,7 +254,7 @@ def test_export_model_library_format_llvm():
         assert str(target)[:2] == "c "
         target = tvm.target.Target("llvm " + str(target)[2:])
         with tvm.transform.PassContext(opt_level=3):
-            relay_mod = tvm.parser.fromtext(
+            relay_mod = tvm.relay.fromtext(
                 """
             #[version = "0.0.5"]
             def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[(1, 2), float32]) {
@@ -339,7 +339,7 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
 def test_export_model_library_format_workspace(executor, runtime):
     target = tvm.target.target.micro("host")
     with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        relay_mod = tvm.parser.fromtext(
+        relay_mod = tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int16], %p2: Tensor[(1, 1, 1, 128), int32]){
diff --git a/tests/python/unittest/test_slice_tir.py b/tests/python/unittest/test_slice_tir.py
index 03cd8f67d6b2..fea2ce480e48 100644
--- a/tests/python/unittest/test_slice_tir.py
+++ b/tests/python/unittest/test_slice_tir.py
@@ -122,7 +122,7 @@ class TestAnnotateAndSliceTIR(tvm.testing.CompareBeforeAfter):
     #    @tvm.script.ir_module
     #    class irmod_before:
     #        @T.prim_func
-    #        def main(A: T.Buffer[(1,), "int8"):
+    #        def main(A: T.Buffer((1,), "int8"):
     #            #A = T.match_buffer(a, (1,), "int8")
     #            A[0] = 0
     #            with T.block("block_foo"): # optional: give this block a name, perhaps for testing?
diff --git a/tests/python/unittest/test_target_codegen_llvm.py b/tests/python/unittest/test_target_codegen_llvm.py
index e179d17101a3..d8a853ff5dbf 100644
--- a/tests/python/unittest/test_target_codegen_llvm.py
+++ b/tests/python/unittest/test_target_codegen_llvm.py
@@ -920,7 +920,7 @@ def test_llvm_scalar_concat():
 def test_raise_exception_during_codegen():
     @T.prim_func
     def threadpool_nested_parallel_loop(
-        A: T.Buffer[(4, 4), "float32"], B: T.Buffer[(4, 4), "float32"]
+        A: T.Buffer((4, 4), "float32"), B: T.Buffer((4, 4), "float32")
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i in T.parallel(4):
diff --git a/tests/python/unittest/test_target_codegen_vulkan.py b/tests/python/unittest/test_target_codegen_vulkan.py
index 7b71f4d4ab17..bfb10ca85a38 100644
--- a/tests/python/unittest/test_target_codegen_vulkan.py
+++ b/tests/python/unittest/test_target_codegen_vulkan.py
@@ -578,7 +578,7 @@ def test_negative_operand_divmod(target, dev):
     divisor = 5
 
     @T.prim_func
-    def func(A: T.Buffer[(N, 2), "int32"]):
+    def func(A: T.Buffer((N, 2), "int32")):
         for i in T.serial(N):
             with T.block("A"):
                 v_i = T.axis.spatial(N, i)
diff --git a/tests/python/unittest/test_target_codegen_x86.py b/tests/python/unittest/test_target_codegen_x86.py
index af91ed4520fd..8ff9dbb3ddc8 100644
--- a/tests/python/unittest/test_target_codegen_x86.py
+++ b/tests/python/unittest/test_target_codegen_x86.py
@@ -81,7 +81,7 @@ def @main(%inp : Tensor[(3), float32], %cst : Tensor[(3), float32]) {
         """
     )
 
-    ir_mod = tvm.parser.fromtext(relay_model)
+    ir_mod = tvm.relay.fromtext(relay_model)
 
     arch = "i386" if machine == "i386" else "x86_64"
     aot_factory = tvm.relay.build(
diff --git a/tests/python/unittest/test_te_create_primfunc.py b/tests/python/unittest/test_te_create_primfunc.py
index 4b8d857e8619..0b6f87b833a3 100644
--- a/tests/python/unittest/test_te_create_primfunc.py
+++ b/tests/python/unittest/test_te_create_primfunc.py
@@ -78,9 +78,9 @@ def tir_matmul(a: T.handle, b: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def tir_matmul_int64(
-    A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-    B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-    C: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+    A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+    B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+    C: T.Buffer((T.int64(128), T.int64(128)), "float32"),
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     for i0, j0, k0 in T.grid(T.int64(128), T.int64(128), T.int64(128)):
@@ -396,9 +396,9 @@ def test_tensor_attr():
 
 @T.prim_func
 def expected_layout_attr(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(128, 128), "float32"],
-    D: T.Buffer[(128, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((128, 128), "float32"),
+    D: T.Buffer((128, 128), "float32"),
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
     C = T.alloc_buffer([128, 128], dtype="float32")
@@ -417,9 +417,9 @@ def expected_layout_attr(
 
 @T.prim_func
 def expected_layout_attr_int64(
-    A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-    B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-    D: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+    A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+    B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+    D: T.Buffer((T.int64(128), T.int64(128)), "float32"),
 ):
     T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
     C = T.alloc_buffer([T.int64(128), T.int64(128)], dtype="float32")
@@ -586,9 +586,9 @@ def te_func():
 
     @T.prim_func
     def expected(
-        a: T.Buffer[(), "int32"],
-        b: T.Buffer[(), "int32"],
-        c: T.Buffer[(), "int32"],
+        a: T.Buffer((), "int32"),
+        b: T.Buffer((), "int32"),
+        c: T.Buffer((), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
@@ -612,8 +612,8 @@ def te_reshape():
 
 @T.prim_func
 def tir_reshape(
-    A: T.Buffer[(T.int64(2), T.int64(4)), "float32"],
-    T_reshape: T.Buffer[(T.int64(4), T.int64(2)), "float32"],
+    A: T.Buffer((T.int64(2), T.int64(4)), "float32"),
+    T_reshape: T.Buffer((T.int64(4), T.int64(2)), "float32"),
 ):
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     for i0, i1 in T.grid(T.int64(4), T.int64(2)):
@@ -638,8 +638,8 @@ def test_reshape():
 
 @T.prim_func
 def argmax_expected(
-    p0: T.Buffer[(T.int64(1), T.int64(64), T.int64(56), T.int64(56)), "uint8"],
-    p0_red: T.Buffer[(T.int64(1), T.int64(56), T.int64(56)), "int32"],
+    p0: T.Buffer((T.int64(1), T.int64(64), T.int64(56), T.int64(56)), "uint8"),
+    p0_red: T.Buffer((T.int64(1), T.int64(56), T.int64(56)), "int32"),
 ):
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     p0_red_temp_v0 = T.alloc_buffer([T.int64(1), T.int64(56), T.int64(56)], dtype="int32")
@@ -707,7 +707,7 @@ def te_resize2d_symbolic():
 
 @T.prim_func
 def tir_resize2d_symbolic(
-    A: T.Buffer[(T.int64(2), T.int64(3), T.int64(128), T.int64(128)), "float32"],
+    A: T.Buffer((T.int64(2), T.int64(3), T.int64(128), T.int64(128)), "float32"),
     var_resize: T.handle,
 ):
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_tir_analysis_calculate_allocated_memory.py b/tests/python/unittest/test_tir_analysis_calculate_allocated_memory.py
index 1a2d50ef5d7f..2311bfbbef3c 100644
--- a/tests/python/unittest/test_tir_analysis_calculate_allocated_memory.py
+++ b/tests/python/unittest/test_tir_analysis_calculate_allocated_memory.py
@@ -22,14 +22,14 @@
 
 
 @T.prim_func
-def scale_by_two(a: T.Buffer[(128,), "int8"], c: T.Buffer[(128,), "int8"]):
+def scale_by_two(a: T.Buffer((128,), "int8"), c: T.Buffer((128,), "int8")):
     for i in T.serial(128):
         with T.block("C"):
             c[i] = a[i] * T.int8(2)
 
 
 @T.prim_func
-def scale_by_two_three(a: T.Buffer[(128,), "int8"], c: T.Buffer[(128,), "int8"]):
+def scale_by_two_three(a: T.Buffer((128,), "int8"), c: T.Buffer((128,), "int8")):
     B = T.alloc_buffer([128], dtype="int8", scope="global.vtcm")
     for i in T.serial(128):
         with T.block("B"):
diff --git a/tests/python/unittest/test_tir_analysis_detect_buffer_access_lca.py b/tests/python/unittest/test_tir_analysis_detect_buffer_access_lca.py
index d438427e1fe1..a1808c841303 100644
--- a/tests/python/unittest/test_tir_analysis_detect_buffer_access_lca.py
+++ b/tests/python/unittest/test_tir_analysis_detect_buffer_access_lca.py
@@ -95,7 +95,7 @@ def match_buffer_func(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def global_buffer_with_blockidx(
-    a: T.Buffer[(1, 32), "int32"], b: T.Buffer[(1, 32), "int32"]
+    a: T.Buffer((1, 32), "int32"), b: T.Buffer((1, 32), "int32")
 ) -> None:
     for i0 in T.thread_binding(0, 1, thread="blockIdx.x"):
         for i1 in T.thread_binding(0, 32, thread="threadIdx.x"):
diff --git a/tests/python/unittest/test_tir_analysis_estimate_tir_flops.py b/tests/python/unittest/test_tir_analysis_estimate_tir_flops.py
index 8c16c81388ed..06f6fe31278d 100644
--- a/tests/python/unittest/test_tir_analysis_estimate_tir_flops.py
+++ b/tests/python/unittest/test_tir_analysis_estimate_tir_flops.py
@@ -51,7 +51,7 @@ def test_te_workload(workload, flops):
 
 
 @T.prim_func
-def flops_with_let(a: T.Buffer[16, "float32"]):
+def flops_with_let(a: T.Buffer(16, "float32")):
     for i in range(8):
         j = i + 8
         a[j] = a[i]
@@ -63,7 +63,7 @@ def test_flops_with_let():
 
 
 @T.prim_func
-def flops_with_if(a: T.Buffer[16, "float32"], b: T.Buffer[16, "float32"]):
+def flops_with_if(a: T.Buffer(16, "float32"), b: T.Buffer(16, "float32")):
     for i in range(16):
         if i % 2 == 0:
             a[i] = b[i]
diff --git a/tests/python/unittest/test_tir_analysis_oob.py b/tests/python/unittest/test_tir_analysis_oob.py
index f910ca503be2..83c029417624 100644
--- a/tests/python/unittest/test_tir_analysis_oob.py
+++ b/tests/python/unittest/test_tir_analysis_oob.py
@@ -21,29 +21,29 @@
 
 
 @T.prim_func
-def bad_load(A: T.Buffer[(2, 3), "float32"], B: T.Buffer[(3, 2), "float32"]):
+def bad_load(A: T.Buffer((2, 3), "float32"), B: T.Buffer((3, 2), "float32")):
     B[0, 0] = A[2, 2]
 
 
 @T.prim_func
-def bad_load_loop(A: T.Buffer[(2, 3), "float32"], B: T.Buffer[(3, 2), "float32"]):
+def bad_load_loop(A: T.Buffer((2, 3), "float32"), B: T.Buffer((3, 2), "float32")):
     for i in range(3):
         B[i, 0] = A[i, 2]
 
 
 @T.prim_func
-def bad_store(A: T.Buffer[(2, 3), "float32"], B: T.Buffer[(3, 2), "float32"]):
+def bad_store(A: T.Buffer((2, 3), "float32"), B: T.Buffer((3, 2), "float32")):
     B[0, 3] = A[1, 2]
 
 
 @T.prim_func
-def bad_store_loop(A: T.Buffer[(2, 3), "float32"], B: T.Buffer[(3, 2), "float32"]):
+def bad_store_loop(A: T.Buffer((2, 3), "float32"), B: T.Buffer((3, 2), "float32")):
     for i in range(3):
         B[0, i] = A[1, i]
 
 
 @T.prim_func
-def unknown_bounds(A: T.Buffer[(2, 3), "float32"], B: T.Buffer[(3, 2), "float32"]):
+def unknown_bounds(A: T.Buffer((2, 3), "float32"), B: T.Buffer((3, 2), "float32")):
     N = T.var("int32")
     for i in range(3):
         B[0, N] = A[1, i]
diff --git a/tests/python/unittest/test_tir_analysis_verify_well_formed.py b/tests/python/unittest/test_tir_analysis_verify_well_formed.py
index b3028a0148aa..023d5f5f315c 100644
--- a/tests/python/unittest/test_tir_analysis_verify_well_formed.py
+++ b/tests/python/unittest/test_tir_analysis_verify_well_formed.py
@@ -22,8 +22,8 @@
 def test_pass_simple():
     @T.prim_func
     def element_wise(
-        A: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ):
         B = T.alloc_buffer((128, 128), "float32")
         for i, j in T.grid(128, 128):
@@ -41,8 +41,8 @@ def element_wise(
 def test_fail_use_out_loop_var():
     @T.prim_func
     def element_wise(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128, 128), "float32"),
     ):
         for i, j in T.grid(128, 128):
             with T.block("B"):
diff --git a/tests/python/unittest/test_tir_imm_values.py b/tests/python/unittest/test_tir_imm_values.py
index a2a19a09ad87..416943c85da6 100644
--- a/tests/python/unittest/test_tir_imm_values.py
+++ b/tests/python/unittest/test_tir_imm_values.py
@@ -254,19 +254,19 @@ def test_tir_floatimm_const_fold():
     """Behavior check: folding fp32 match platform f32 arithmetic"""
 
     @T.prim_func
-    def float_imm_multiply(x: T.float32, y: T.float32, z: T.Buffer[(), "float32"]):
+    def float_imm_multiply(x: T.float32, y: T.float32, z: T.Buffer((), "float32")):
         z[()] = x * y
 
     @T.prim_func
-    def float_imm_add(x: T.float32, y: T.float32, z: T.Buffer[(), "float32"]):
+    def float_imm_add(x: T.float32, y: T.float32, z: T.Buffer((), "float32")):
         z[()] = x + y
 
     @T.prim_func
-    def float_imm_sub(x: T.float32, y: T.float32, z: T.Buffer[(), "float32"]):
+    def float_imm_sub(x: T.float32, y: T.float32, z: T.Buffer((), "float32")):
         z[()] = x - y
 
     @T.prim_func
-    def float_imm_div(x: T.float32, y: T.float32, z: T.Buffer[(), "float32"]):
+    def float_imm_div(x: T.float32, y: T.float32, z: T.Buffer((), "float32")):
         z[()] = x / y
 
     def __wrap_build(f):
diff --git a/tests/python/unittest/test_tir_ptx_cp_async.py b/tests/python/unittest/test_tir_ptx_cp_async.py
index dc521f3c471a..0e61f6d1b4f9 100644
--- a/tests/python/unittest/test_tir_ptx_cp_async.py
+++ b/tests/python/unittest/test_tir_ptx_cp_async.py
@@ -21,7 +21,7 @@
 
 
 @T.prim_func
-def ptx_cp_async(A: T.Buffer[(32, 128), "float16"], B: T.Buffer[(32, 128), "float16"]) -> None:
+def ptx_cp_async(A: T.Buffer((32, 128), "float16"), B: T.Buffer((32, 128), "float16")) -> None:
     T.func_attr({"global_symbol": "default_function", "tir.noalias": True})
     bx = T.env_thread("blockIdx.x")
     tx = T.env_thread("threadIdx.x")
diff --git a/tests/python/unittest/test_tir_ptx_ldmatrix.py b/tests/python/unittest/test_tir_ptx_ldmatrix.py
index f652be442133..615d33ae004e 100644
--- a/tests/python/unittest/test_tir_ptx_ldmatrix.py
+++ b/tests/python/unittest/test_tir_ptx_ldmatrix.py
@@ -23,7 +23,7 @@
 
 @T.prim_func
 def ptx_ldmatrix(
-    A: T.Buffer[(16, 16), "float16"], B: T.Buffer[(16, 16), "float16"], num: T.int32, trans: T.uint8
+    A: T.Buffer((16, 16), "float16"), B: T.Buffer((16, 16), "float16"), num: T.int32, trans: T.uint8
 ) -> None:
     T.func_attr({"global_symbol": "default_function", "tir.noalias": True})
     bx = T.env_thread("blockIdx.x")
diff --git a/tests/python/unittest/test_tir_renew_defs.py b/tests/python/unittest/test_tir_renew_defs.py
index 65f81499bdfd..e14cd5a89832 100644
--- a/tests/python/unittest/test_tir_renew_defs.py
+++ b/tests/python/unittest/test_tir_renew_defs.py
@@ -53,7 +53,7 @@ def _check_block_signature_remap(lhs: Block, rhs: Block):
 def test_simple():
     @T.prim_func
     # Buffer A should be remapped
-    def elementwise(A: T.Buffer[(128, 128), "float32"]):
+    def elementwise(A: T.Buffer((128, 128), "float32")):
         # Buffer B should be remapped
         B = T.alloc_buffer((128, 128), "float32")
         # i, j should be remapped
@@ -86,7 +86,7 @@ def _get_block(f):
 def test_match_buffer():
     @T.prim_func
     # A and B should be remapped
-    def func_match_buffer(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]):
+    def func_match_buffer(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")):
         with T.block("root"):
             s = T.var("int32")
             e = T.var("int32")
diff --git a/tests/python/unittest/test_tir_schedule_analysis.py b/tests/python/unittest/test_tir_schedule_analysis.py
index 349c4734c9ee..0002de38794b 100644
--- a/tests/python/unittest/test_tir_schedule_analysis.py
+++ b/tests/python/unittest/test_tir_schedule_analysis.py
@@ -149,9 +149,9 @@ def test_suggest_index_map_winograd():
 class DenseTIRModule:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1024, 1024), "uint8"],
-        placeholder_1: T.Buffer[(64, 256, 16, 4), "int8"],
-        compute: T.Buffer[(1024, 1024), "int32"],
+        placeholder: T.Buffer((1024, 1024), "uint8"),
+        placeholder_1: T.Buffer((64, 256, 16, 4), "int8"),
+        compute: T.Buffer((1024, 1024), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
@@ -173,9 +173,9 @@ def main(
 class Conv2dNCHWcTIRModule:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i0, i1, i2, i3, i4, i5, i6, i7, i8, i9 in T.grid(1, 16, 56, 56, 16, 1, 1, 4, 4, 4):
diff --git a/tests/python/unittest/test_tir_schedule_blockize.py b/tests/python/unittest/test_tir_schedule_blockize.py
index a68170009bb5..cd4ce663e58e 100644
--- a/tests/python/unittest/test_tir_schedule_blockize.py
+++ b/tests/python/unittest/test_tir_schedule_blockize.py
@@ -26,7 +26,7 @@
 # pylint: disable=no-member,invalid-name,unused-variable,line-too-long,redefined-outer-name,unexpected-keyword-arg,too-many-nested-blocks
 
 @T.prim_func
-def single_elementwise(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]):
+def single_elementwise(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")):
     for i, j in T.grid(128, 128):
         with T.block("B"):
             vi, vj = T.axis.remap("SS", [i, j])
@@ -39,8 +39,8 @@ def single_elementwise(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128
 def test_blockize_outer():
     @T.prim_func
     def after_blockize_outer(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128, 128), "float32"),
     ) -> None:
         with T.block("blockized_B"):
             vio = T.axis.spatial(1, 0)
@@ -61,8 +61,8 @@ def after_blockize_outer(
 def test_blockize_inner():
     @T.prim_func
     def after_blockize_inner(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128, 128), "float32"),
     ) -> None:
         for i in T.serial(128):
             with T.block("blockized_B"):
@@ -84,8 +84,8 @@ def after_blockize_inner(
 def test_two_elementwise_blockize_reverse_compute_at():
     @T.prim_func
     def before_blockize_rca(
-        A: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         for i, j in T.grid(8, 8):
@@ -109,8 +109,8 @@ def before_blockize_rca(
 
     @T.prim_func
     def after_blockize_rca(
-        A: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         for i, j in T.grid(8, 8):
@@ -146,8 +146,8 @@ def after_blockize_rca(
 def test_two_elementwise_blockize_compute_at():
     @T.prim_func
     def before_blockize_compute_at(
-        A: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         # body
         # with T.block("root")
@@ -175,8 +175,8 @@ def before_blockize_compute_at(
 
     @T.prim_func
     def after_blockize_compute_at(
-        A: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         for i_0, j_0 in T.grid(8, 8):
@@ -215,7 +215,7 @@ def after_blockize_compute_at(
 
 def test_blockize_init_loops():
     @T.prim_func
-    def rowsum(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128,), "float32"]) -> None:
+    def rowsum(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128,), "float32")) -> None:
         for k, i in T.grid(128, 128):
             with T.block("B"):
                 vk, vi = T.axis.remap("RS", [k, i])
@@ -225,8 +225,8 @@ def rowsum(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128,), "float32"]) -
 
     @T.prim_func
     def after_rowsum_blockize(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128,), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128,), "float32"),
     ) -> None:
         with T.block("blockized_B"):
             vko = T.axis.R(1, 0)
@@ -252,8 +252,8 @@ def after_rowsum_blockize(
 def test_blockize_outer_int64_shape(preserve_unit_iters):
     @T.prim_func
     def single_elementwise_int64(
-        A: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(16), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(16), T.int64(128)), "float32"),
     ) -> None:
         for i0, j0, i1, j1 in T.grid(T.int64(1), T.int64(8), T.int64(16), T.int64(16)):
             with T.block("B"):
@@ -263,8 +263,8 @@ def single_elementwise_int64(
 
     @T.prim_func
     def after_single_elementwise_int64_blockize(
-        A: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(16), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(16), T.int64(128)), "float32"),
     ) -> None:
         for i0, j0 in T.grid(T.int64(1), T.int64(8)):
             with T.block("B_o"):
@@ -279,8 +279,8 @@ def after_single_elementwise_int64_blockize(
 
     @T.prim_func
     def after_single_elementwise_int64_blockize_preserve_unit_iters(
-        A: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(16), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(16), T.int64(128)), "float32"),
     ) -> None:
         for i0, j0 in T.grid(T.int64(1), T.int64(8)):
             with T.block("B_o"):
diff --git a/tests/python/unittest/test_tir_schedule_cache_index.py b/tests/python/unittest/test_tir_schedule_cache_index.py
index d446249e018e..a509c02b37f3 100644
--- a/tests/python/unittest/test_tir_schedule_cache_index.py
+++ b/tests/python/unittest/test_tir_schedule_cache_index.py
@@ -41,7 +41,7 @@ def resize(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def resize_cache_index(
-    A: T.Buffer[(1, 3, 40, 40), "float32"], B: T.Buffer[(1, 3, 80, 80), "float32"]
+    A: T.Buffer((1, 3, 40, 40), "float32"), B: T.Buffer((1, 3, 80, 80), "float32")
 ) -> None:
     index_var_0 = T.alloc_buffer([80, 80], dtype="int32", strides=[1])
     index_var_1 = T.alloc_buffer([80], dtype="int32", strides=[1])
@@ -67,7 +67,7 @@ def resize_cache_index(
 
 @T.prim_func
 def bilinear_resize(
-    x: T.Buffer[(1, 3, 40, 40), "float16"], resize: T.Buffer[(1, 3, 80, 80), "float16"]
+    x: T.Buffer((1, 3, 40, 40), "float16"), resize: T.Buffer((1, 3, 80, 80), "float16")
 ):
     for i0, i1, i2, i3 in T.grid(1, 3, 80, 80):
         with T.block("resize"):
@@ -336,7 +336,7 @@ def bilinear_resize(
 
 @T.prim_func
 def cached_bilinear_resize(
-    x: T.Buffer[(1, 3, 40, 40), "float16"], resize: T.Buffer[(1, 3, 80, 80), "float16"]
+    x: T.Buffer((1, 3, 40, 40), "float16"), resize: T.Buffer((1, 3, 80, 80), "float16")
 ):
     index_var_0 = T.alloc_buffer([80], dtype="float32", strides=[1])
     index_var_1 = T.alloc_buffer([80], dtype="int32", strides=[1])
diff --git a/tests/python/unittest/test_tir_schedule_cache_read_write.py b/tests/python/unittest/test_tir_schedule_cache_read_write.py
index bcb214594cb8..be91505f3d15 100644
--- a/tests/python/unittest/test_tir_schedule_cache_read_write.py
+++ b/tests/python/unittest/test_tir_schedule_cache_read_write.py
@@ -251,7 +251,7 @@ def func_with_block_predicate() -> None:
 
 
 @T.prim_func
-def inplace_func(data_io: T.Buffer[(64), "int32"]):
+def inplace_func(data_io: T.Buffer((64), "int32")):
     data_1d = T.alloc_buffer([64], dtype="int32")
     for i0 in T.serial(64):
         with T.block("copy_in"):
@@ -269,7 +269,7 @@ def inplace_func(data_io: T.Buffer[(64), "int32"]):
 
 
 @T.prim_func
-def inplace_call(data_io: T.Buffer[(64), "int32"]):
+def inplace_call(data_io: T.Buffer((64), "int32")):
     for i0 in T.serial(1):
         with T.block("ext_call"):
             T.reads(data_io[:64])
@@ -279,7 +279,7 @@ def inplace_call(data_io: T.Buffer[(64), "int32"]):
 
 @T.prim_func
 def cache_read_nested_seq_target(
-    B: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+    B: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
 ) -> None:
     A = T.alloc_buffer([128, 128], dtype="float32")
     A_global = T.alloc_buffer([128, 128], dtype="float32")
@@ -597,7 +597,7 @@ def cache_read_shape_int64(var_A: T.handle, var_C: T.handle) -> None:
 
 
 @T.prim_func
-def cache_read_inplace(data_io: T.Buffer[64, "int32"]) -> None:
+def cache_read_inplace(data_io: T.Buffer(64, "int32")) -> None:
     data_1d = T.alloc_buffer([64], dtype="int32")
     data_io_local = T.alloc_buffer([64], dtype="int32", scope="local")
     for ax0 in T.serial(64):
@@ -626,7 +626,7 @@ def cache_read_inplace(data_io: T.Buffer[64, "int32"]) -> None:
 
 
 @T.prim_func
-def cache_inplace_buffer(data_io: T.Buffer[64, "int32"]) -> None:
+def cache_inplace_buffer(data_io: T.Buffer(64, "int32")) -> None:
     data_io_local = T.alloc_buffer([64], dtype="int32", scope="local")
     data_io_global = T.alloc_buffer([64], dtype="int32")
     data_io_global_1 = T.alloc_buffer([64], dtype="int32")
@@ -1007,7 +1007,7 @@ def block_predicate_cache_write_output_buf() -> None:
 
 @T.prim_func
 def cache_write_allocate_const(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float16"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float16")
 ):
     B = T.alloc_buffer([128, 128], dtype="float32")
     const = T.allocate_const([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], "float32", [8])
@@ -1031,7 +1031,7 @@ def cache_write_allocate_const(
 
 @T.prim_func
 def cache_write_allocate_const_output(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float16"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float16")
 ):
     B = T.alloc_buffer([128, 128], dtype="float32")
     A_global = T.alloc_buffer([128, 128], dtype="float32")
diff --git a/tests/python/unittest/test_tir_schedule_compute_at.py b/tests/python/unittest/test_tir_schedule_compute_at.py
index 34ca937cc2ba..f94347409a6b 100644
--- a/tests/python/unittest/test_tir_schedule_compute_at.py
+++ b/tests/python/unittest/test_tir_schedule_compute_at.py
@@ -793,7 +793,7 @@ def read_out_of_bound_after_compute_at(a: T.handle, c: T.handle) -> None:
 
 
 @T.prim_func
-def multi_reduction(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(), "float32"]):
+def multi_reduction(A: T.Buffer((16, 16), "float32"), C: T.Buffer((), "float32")):
     B = T.alloc_buffer((16, ), dtype="float32")
     for i, k in T.grid(16, 16):
         with T.block("B"):
@@ -811,8 +811,8 @@ def multi_reduction(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(), "float32"]
 
 @T.prim_func
 def multi_reduction_after_compute_at(
-    A: T.Buffer[(16, 16), "float32"],
-    C:T.Buffer[(), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    C:T.Buffer((), "float32"),
 ):
     B = T.alloc_buffer((16, ), dtype="float32")
     for k in T.grid(16):
@@ -879,9 +879,9 @@ def tiled_pooling_read_cache_after_compute_at(a: T.handle, b: T.handle) -> None:
                     cache[h + kh - 1, w + kw - 1], 0.0, dtype="float32"))
 
 @T.prim_func
-def non_uniform_tiled_conv(x: T.Buffer[(1, 3, 100, 100), "float32"],
-                           w: T.Buffer[(16, 3, 3, 3), "float32"],
-                           y: T.Buffer[(1, 16, 98, 98), "float32"]) -> None:
+def non_uniform_tiled_conv(x: T.Buffer((1, 3, 100, 100), "float32"),
+                           w: T.Buffer((16, 3, 3, 3), "float32"),
+                           y: T.Buffer((1, 16, 98, 98), "float32")) -> None:
     x_global = T.alloc_buffer([1, 3, 100, 100], dtype="float32")
     for ax0, ax1, ax2, ax3 in T.grid(1, 3, 100, 100):
         with T.block("cache"):
@@ -901,9 +901,9 @@ def non_uniform_tiled_conv(x: T.Buffer[(1, 3, 100, 100), "float32"],
                 x_global[nn, cc // 16 * 3 + rc, hh + rh, ww + rw] * w[cc, rc, rh, rw]
 
 @T.prim_func
-def non_uniform_tiled_conv_after_compute_at(x: T.Buffer[(1, 3, 100, 100), "float32"],
-                                            w: T.Buffer[(16, 3, 3, 3), "float32"],
-                                            y: T.Buffer[(1, 16, 98, 98), "float32"]) -> None:
+def non_uniform_tiled_conv_after_compute_at(x: T.Buffer((1, 3, 100, 100), "float32"),
+                                            w: T.Buffer((16, 3, 3, 3), "float32"),
+                                            y: T.Buffer((1, 16, 98, 98), "float32")) -> None:
     x_global = T.alloc_buffer([1, 3, 100, 100], dtype="float32")
     for h_o, w_o in T.grid(7, 7):
         for ax0, ax1, ax2 in T.grid(3, 17, 17):
@@ -928,9 +928,9 @@ def non_uniform_tiled_conv_after_compute_at(x: T.Buffer[(1, 3, 100, 100), "float
                     x_global[nn, cc // 16 * 3 + rc, hh + rh, ww + rw] * w[cc, rc, rh, rw]
 
 @T.prim_func
-def concat_two_elemwise(x: T.Buffer[(16,), "float32"],
-                        y: T.Buffer[(8,), "float32"],
-                        T_concat: T.Buffer[(24,), "float32"]) -> None:
+def concat_two_elemwise(x: T.Buffer((16,), "float32"),
+                        y: T.Buffer((8,), "float32"),
+                        T_concat: T.Buffer((24,), "float32")) -> None:
     T_add_1 = T.alloc_buffer([16], dtype="float32")
     T_add_2 = T.alloc_buffer([8], dtype="float32")
     for i in T.serial(16):
@@ -947,9 +947,9 @@ def concat_two_elemwise(x: T.Buffer[(16,), "float32"],
             T_concat[ax] = T.if_then_else(16 <= ax, T_add_2[ax - 16], T_add_1[ax], dtype="float32")
 
 @T.prim_func
-def concat_two_elemwise_after_compute_at(x: T.Buffer[(16,), "float32"],
-                                         y: T.Buffer[(8,), "float32"],
-                                         T_concat: T.Buffer[(24,), "float32"]) -> None:
+def concat_two_elemwise_after_compute_at(x: T.Buffer((16,), "float32"),
+                                         y: T.Buffer((8,), "float32"),
+                                         T_concat: T.Buffer((24,), "float32")) -> None:
     T_add_1 = T.alloc_buffer([16], dtype="float32")
     T_add_2 = T.alloc_buffer([8], dtype="float32")
     for i in T.serial(24):
@@ -996,7 +996,7 @@ def floordiv_and_floormod_indices_after_reverse_compute_at(a: T.handle, b: T.han
 
 
 @T.prim_func
-def tiled_repeat_op(x: T.Buffer[(4,), "float32"], T_repeat: T.Buffer[(64,), "float32"]) -> None:
+def tiled_repeat_op(x: T.Buffer((4,), "float32"), T_repeat: T.Buffer((64,), "float32")) -> None:
     T_add = T.alloc_buffer([4], dtype="float32")
     for i0 in T.serial(4):
         with T.block("T_add"):
@@ -1008,7 +1008,7 @@ def tiled_repeat_op(x: T.Buffer[(4,), "float32"], T_repeat: T.Buffer[(64,), "flo
             T_repeat[ax0] = T_add[ax0 // 16]
 
 @T.prim_func
-def tiled_repeat_op_after_compute_at(x: T.Buffer[(4,), "float32"], T_repeat: T.Buffer[(64,), "float32"]) -> None:
+def tiled_repeat_op_after_compute_at(x: T.Buffer((4,), "float32"), T_repeat: T.Buffer((64,), "float32")) -> None:
     T_add = T.alloc_buffer([4], dtype="float32")
     for i0_0 in T.serial(8):
         with T.block("T_add"):
@@ -1020,7 +1020,7 @@ def tiled_repeat_op_after_compute_at(x: T.Buffer[(4,), "float32"], T_repeat: T.B
                 T_repeat[ax0] = T_add[ax0 // 16]
 
 @T.prim_func
-def static_bound(A: T.Buffer[(32, 1), "float32"], C: T.Buffer[(32, 1), "float32"]) -> None:
+def static_bound(A: T.Buffer((32, 1), "float32"), C: T.Buffer((32, 1), "float32")) -> None:
     B = T.alloc_buffer((32, 1), "float32")
     for i, j in T.grid(32, 1):
         with T.block("B"):
@@ -1035,7 +1035,7 @@ def static_bound(A: T.Buffer[(32, 1), "float32"], C: T.Buffer[(32, 1), "float32"
             C[vi, vj] = B[vi, vj] + 1.0
 
 @T.prim_func
-def static_bound_after_compute_at(A: T.Buffer[(32, 1), "float32"], C: T.Buffer[(32, 1), "float32"]) -> None:
+def static_bound_after_compute_at(A: T.Buffer((32, 1), "float32"), C: T.Buffer((32, 1), "float32")) -> None:
     B = T.alloc_buffer((32, 1), "float32")
     for i in range(32):
         for ax0, ax1 in T.grid(1, 1):
@@ -1251,7 +1251,7 @@ def test_compute_at_simplify_static_bound(use_block_name):
 def test_compute_at_non_perfect_channel_group(use_block_name):
     @T.prim_func
     def grouped_channel_bias(
-        X: T.Buffer[(720, 8, 8), "float32"], Y: T.Buffer[(720, 8, 8), "float32"]
+        X: T.Buffer((720, 8, 8), "float32"), Y: T.Buffer((720, 8, 8), "float32")
     ):
         B = T.alloc_buffer([45], dtype="float32", scope="")
         for i in T.grid(45):
@@ -1266,7 +1266,7 @@ def grouped_channel_bias(
 
     @T.prim_func
     def grouped_channel_bias_non_perfect_tiled(
-        X: T.Buffer[(720, 8, 8), "float32"], Y: T.Buffer[(720, 8, 8), "float32"]
+        X: T.Buffer((720, 8, 8), "float32"), Y: T.Buffer((720, 8, 8), "float32")
     ):
         B = T.alloc_buffer([45], dtype="float32")
         for c_o in range(2):
@@ -1356,9 +1356,9 @@ def _create_prim_func():
 def test_compute_at_to_index():
     @T.prim_func
     def multi_producers_conv(
-        data: T.Buffer[(1, 3, 224, 224), "int8"],
-        w: T.Buffer[(16, 3, 7, 7), "int8"],
-        conv: T.Buffer[(1, 16, 112, 112), "int32"],
+        data: T.Buffer((1, 3, 224, 224), "int8"),
+        w: T.Buffer((16, 3, 7, 7), "int8"),
+        conv: T.Buffer((1, 16, 112, 112), "int32"),
     ) -> None:
         pad = T.alloc_buffer([1, 3, 230, 230], dtype="int8")
         wbuf = T.alloc_buffer([16, 3, 7, 7], dtype="int8")
@@ -1395,9 +1395,9 @@ def multi_producers_conv(
 
     @T.prim_func
     def multi_producers_after_compute_at(
-        data: T.Buffer[(1, 3, 224, 224), "int8"],
-        w: T.Buffer[(16, 3, 7, 7), "int8"],
-        conv: T.Buffer[(1, 16, 112, 112), "int32"],
+        data: T.Buffer((1, 3, 224, 224), "int8"),
+        w: T.Buffer((16, 3, 7, 7), "int8"),
+        conv: T.Buffer((1, 16, 112, 112), "int32"),
     ) -> None:
         pad = T.alloc_buffer([1, 3, 230, 230], dtype="int8")
         wbuf = T.alloc_buffer([16, 3, 7, 7], dtype="int8")
@@ -1444,7 +1444,7 @@ def multi_producers_after_compute_at(
 
 def test_reverse_compute_at_to_index():
     @T.prim_func
-    def main(A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(128, 128), "float32"]) -> None:
+    def main(A: T.Buffer((128, 128), "float32"), D: T.Buffer((128, 128), "float32")) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         C = T.alloc_buffer([128, 128], dtype="float32")
         for i_0, j_0, i_1 in T.grid(8, 8, 16):
@@ -1471,7 +1471,7 @@ def main(A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(128, 128), "float32"])
 
     @T.prim_func
     def main_reverse_compute_at(
-        A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(128, 128), "float32"]
+        A: T.Buffer((128, 128), "float32"), D: T.Buffer((128, 128), "float32")
     ) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         C = T.alloc_buffer([128, 128], dtype="float32")
@@ -1507,7 +1507,7 @@ def main_reverse_compute_at(
 
 def test_reverse_compute_at_with_unit_loop():
     @T.prim_func
-    def main(A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(1, 2, 1), "float32"]) -> None:
+    def main(A: T.Buffer((128, 128), "float32"), D: T.Buffer((1, 2, 1), "float32")) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         for i_0, j_0, i_1 in T.grid(T.int64(8), T.int64(8), T.int64(16)):
             for j_1 in T.serial(T.int64(16)):
@@ -1526,7 +1526,7 @@ def main(A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(1, 2, 1), "float32"])
 
     @T.prim_func
     def main_reverse_compute_at(
-        A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(1, 2, 1), "float32"]
+        A: T.Buffer((128, 128), "float32"), D: T.Buffer((1, 2, 1), "float32")
     ):
         B = T.alloc_buffer([128, 128], dtype="float32")
         for i_0, j_0, i_1 in T.grid(T.int64(8), T.int64(8), T.int64(16)):
diff --git a/tests/python/unittest/test_tir_schedule_compute_inline.py b/tests/python/unittest/test_tir_schedule_compute_inline.py
index bd46e10efaea..ee5e85e4f05b 100644
--- a/tests/python/unittest/test_tir_schedule_compute_inline.py
+++ b/tests/python/unittest/test_tir_schedule_compute_inline.py
@@ -172,7 +172,7 @@ def elementwise_multi_reverse_loads_inlined(a: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def elementwise_reverse_affine_load(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(8, 32, 8, 8), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((8, 32, 8, 8), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -190,7 +190,7 @@ def elementwise_reverse_affine_load(
 
 @T.prim_func
 def elementwise_reverse_affine_load_inlined(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(8, 32, 8, 8), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((8, 32, 8, 8), "float32")
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -207,9 +207,9 @@ def elementwise_reverse_affine_load_inlined(
 
 @T.prim_func
 def elementwise_reverse_affine_load_unit_iter(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(8, 16, 1), "float32"],
-    D: T.Buffer[(1, 8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((8, 16, 1), "float32"),
+    D: T.Buffer((1, 8, 16, 128), "float32"),
 ) -> None:
     C = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -224,9 +224,9 @@ def elementwise_reverse_affine_load_unit_iter(
 
 @T.prim_func
 def elementwise_reverse_affine_load_unit_iter_inlined(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(8, 16, 1), "float32"],
-    D: T.Buffer[(1, 8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((8, 16, 1), "float32"),
+    D: T.Buffer((1, 8, 16, 128), "float32"),
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -236,9 +236,9 @@ def elementwise_reverse_affine_load_unit_iter_inlined(
 
 @T.prim_func
 def elementwise_reverse_affine_load_unit_iter_simplified(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(8, 16, 1), "float32"],
-    D: T.Buffer[(1, 8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((8, 16, 1), "float32"),
+    D: T.Buffer((1, 8, 16, 128), "float32"),
 ) -> None:
     C = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -253,9 +253,9 @@ def elementwise_reverse_affine_load_unit_iter_simplified(
 
 @T.prim_func
 def elementwise_reverse_affine_load_unit_iter_simplified_inlined(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(8, 16, 1), "float32"],
-    D: T.Buffer[(1, 8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((8, 16, 1), "float32"),
+    D: T.Buffer((1, 8, 16, 128), "float32"),
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -265,7 +265,7 @@ def elementwise_reverse_affine_load_unit_iter_simplified_inlined(
 
 @T.prim_func
 def elementwise_reverse_affine_chain(
-    A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(1, 8, 16, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), D: T.Buffer((1, 8, 16, 128), "float32")
 ):
     B = T.alloc_buffer((128, 128))
     C = T.alloc_buffer((8, 16, 128))
@@ -285,7 +285,7 @@ def elementwise_reverse_affine_chain(
 
 @T.prim_func
 def elementwise_reverse_affine_chain_inlined(
-    A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(1, 8, 16, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), D: T.Buffer((1, 8, 16, 128), "float32")
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -295,8 +295,8 @@ def elementwise_reverse_affine_chain_inlined(
 
 @T.prim_func
 def elementwise_multi_reverse_affine_load(
-    A: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((8, 16, 128), "float32"),
 ) -> None:
     B = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -311,8 +311,8 @@ def elementwise_multi_reverse_affine_load(
 
 @T.prim_func
 def elementwise_multi_reverse_affine_load_inlined(
-    A: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((8, 16, 128), "float32"),
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -322,7 +322,7 @@ def elementwise_multi_reverse_affine_load_inlined(
 
 @T.prim_func
 def elementwise_reverse_non_affine_load(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(8, 16, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((8, 16, 128), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -505,8 +505,8 @@ def matmul_relu(var_A: T.handle, var_B: T.handle, var_compute: T.handle) -> None
 
 @T.prim_func
 def inline_block_with_init(
-    A: T.Buffer[(1, 512, 7, 7), "float32"],
-    B: T.Buffer[(1, 512, 1, 1), "float32"],
+    A: T.Buffer((1, 512, 7, 7), "float32"),
+    B: T.Buffer((1, 512, 1, 1), "float32"),
 ) -> None:
     B_rf = T.alloc_buffer([1, 512, 1, 1, 49], dtype="float32")
     for i0, i1, i2, i3, i4, i5 in T.grid(1, 512, 1, 1, 49, 1):
@@ -542,9 +542,9 @@ def inline_block_with_init(
 
 @T.prim_func
 def exp_exp_opaque_access_with_tvm_access_ptr(
-    lookup_table: T.Buffer[(1024,), "int8"],
-    x: T.Buffer[(16,), "float16"],
-    compute: T.Buffer[(16,), "float16"],
+    lookup_table: T.Buffer((1024,), "int8"),
+    x: T.Buffer((16,), "float16"),
+    compute: T.Buffer((16,), "float16"),
 ) -> None:
     compute_1 = T.alloc_buffer([16], dtype="float16")
     for i0 in T.serial(16):
@@ -567,9 +567,9 @@ def exp_exp_opaque_access_with_tvm_access_ptr(
 
 @T.prim_func
 def exp_exp_opaque_access_with_tvm_access_ptr_inlined(
-    lookup_table: T.Buffer[(1024,), "int8"],
-    x: T.Buffer[(16,), "float16"],
-    compute: T.Buffer[(16,), "float16"],
+    lookup_table: T.Buffer((1024,), "int8"),
+    x: T.Buffer((16,), "float16"),
+    compute: T.Buffer((16,), "float16"),
 ) -> None:
     for i0 in T.serial(16):
         with T.block("compute_1"):
@@ -587,7 +587,7 @@ def exp_exp_opaque_access_with_tvm_access_ptr_inlined(
 
 @T.prim_func
 def elementwise_overcomputed_producer(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(127, 127), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((127, 127), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -602,7 +602,7 @@ def elementwise_overcomputed_producer(
 
 @T.prim_func
 def elementwise_overcomputed_producer_reverse_inlined(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(127, 127), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((127, 127), "float32")
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -613,7 +613,7 @@ def elementwise_overcomputed_producer_reverse_inlined(
 
 @T.prim_func
 def elementwise_producer_not_cover_consumer(
-    A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(256, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), D: T.Buffer((256, 128), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -659,7 +659,7 @@ def elementwise_predicate_producer_inlined(a: T.handle, c: T.handle) -> None:
 @tvm.script.ir_module
 class Conv2dInt8_TensorCore_with_predicate:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[256, "int32"], p5: T.Buffer[256, "int32"], p6: T.Buffer[256, "int32"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], p9: T.Buffer[(16, 56, 56, 256), "int32"], compute: T.Buffer[(16, 56, 56, 256), "int32"]):
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer(256, "int32"), p5: T.Buffer(256, "int32"), p6: T.Buffer(256, "int32"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), p9: T.Buffer((16, 56, 56, 256), "int32"), compute: T.Buffer((16, 56, 56, 256), "int32")):
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
diff --git a/tests/python/unittest/test_tir_schedule_decompose_padding.py b/tests/python/unittest/test_tir_schedule_decompose_padding.py
index ead8b0b33262..e33cfdbd3481 100644
--- a/tests/python/unittest/test_tir_schedule_decompose_padding.py
+++ b/tests/python/unittest/test_tir_schedule_decompose_padding.py
@@ -43,14 +43,14 @@ def check_decompose_padding(origin, scheduled, expected, check_run=False):
 
 def test_1d_decompose_padding():
     @T.prim_func
-    def before_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
+    def before_decompose(x: T.Buffer(128, "int32"), y: T.Buffer(140, "int32")):
         for i in range(140):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
                 y[vi] = T.if_then_else(vi >= 6 and vi < 134, x[vi - 6], 0, dtype="int32")
 
     @T.prim_func
-    def after_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
+    def after_decompose(x: T.Buffer(128, "int32"), y: T.Buffer(140, "int32")):
         for i in T.serial(140):
             with T.block("block_pad_const"):
                 vi = T.axis.spatial(140, i)
@@ -72,7 +72,7 @@ def after_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
 
 @T.prim_func
 def sum_pool_2d(
-    x: T.Buffer[(1, 16, 225, 225), "int8"], tensor: T.Buffer[(1, 16, 225, 225), "int8"]
+    x: T.Buffer((1, 16, 225, 225), "int8"), tensor: T.Buffer((1, 16, 225, 225), "int8")
 ):
     pad_temp = T.alloc_buffer([1, 16, 231, 231], dtype="int8")
     for i0, i1, i2, i3 in T.grid(1, 16, 231, 231):
@@ -99,7 +99,7 @@ def test_decompose_hw_padding_direct():
 
     @T.prim_func
     def pooling_decompose_0(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], tensor: T.Buffer[(1, 16, 225, 225), "int8"]
+        x: T.Buffer((1, 16, 225, 225), "int8"), tensor: T.Buffer((1, 16, 225, 225), "int8")
     ):
         pad_temp = T.alloc_buffer([1, 16, 231, 231], dtype="int8")
         for i0, i1, i2, i3 in T.grid(1, 16, 231, 231):
@@ -130,7 +130,7 @@ def test_decompose_hw_padding_tiled():
 
     @T.prim_func
     def pooling_decompose_1(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], tensor: T.Buffer[(1, 16, 225, 225), "int8"]
+        x: T.Buffer((1, 16, 225, 225), "int8"), tensor: T.Buffer((1, 16, 225, 225), "int8")
     ) -> None:
         pad_temp = T.alloc_buffer([1, 16, 231, 231], dtype="int8")
         for i0, i2_0, i3_0 in T.grid(1, 3, 3):
@@ -190,7 +190,7 @@ def test_decompose_hw_padding_tiled_and_lift_pad():
 
     @T.prim_func
     def pooling_decompose_2(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], tensor: T.Buffer[(1, 16, 225, 225), "int8"]
+        x: T.Buffer((1, 16, 225, 225), "int8"), tensor: T.Buffer((1, 16, 225, 225), "int8")
     ) -> None:
         pad_temp = T.alloc_buffer([1, 16, 231, 231], dtype="int8")
         for i0, i2_0, i3_0, ax0, ax1, ax2 in T.grid(1, 3, 3, 16, 81, 81):
@@ -250,7 +250,7 @@ def test_decompose_hw_padding_non_perfect_tiled():
 
     @T.prim_func
     def pooling_decompose_3(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], tensor: T.Buffer[(1, 16, 225, 225), "int8"]
+        x: T.Buffer((1, 16, 225, 225), "int8"), tensor: T.Buffer((1, 16, 225, 225), "int8")
     ) -> None:
         pad_temp = T.alloc_buffer([1, 16, 231, 231], dtype="int8")
         for i0, i2_0, i3_0 in T.grid(1, 3, 3):
@@ -314,7 +314,8 @@ def test_decompose_wrt_single_child_subtree():
 
     @T.prim_func
     def pad_op(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], y: T.Buffer([1, 16, 231, 231], dtype="int8")
+        x: T.Buffer((1, 16, 225, 225), "int8"),
+        y: T.Buffer((1, 16, 231, 231), dtype="int8"),
     ):
         for i0, i1, i2, i3 in T.grid(1, 16, 231, 231):
             with T.block("pad_temp"):
@@ -328,7 +329,7 @@ def pad_op(
 
     @T.prim_func
     def pad_op_after(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], y: T.Buffer[(1, 16, 231, 231), "int8"]
+        x: T.Buffer((1, 16, 225, 225), "int8"), y: T.Buffer((1, 16, 231, 231), "int8")
     ):
         for i0, i1 in T.grid(1, 16):
             for i2, i3 in T.grid(231, 231):
@@ -354,7 +355,7 @@ def test_not_to_decompose_trivial_predicate():
 
     @T.prim_func
     def trivial_pad(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], y: T.Buffer([1, 16, 225, 225], dtype="int8")
+        x: T.Buffer((1, 16, 225, 225), "int8"), y: T.Buffer([1, 16, 225, 225], dtype="int8")
     ):
         for i0, i1, i2, i3 in T.grid(1, 16, 225, 225):
             with T.block("pad_temp"):
diff --git a/tests/python/unittest/test_tir_schedule_for_kind.py b/tests/python/unittest/test_tir_schedule_for_kind.py
index 132e8b8b3fa5..8994f9de0ed4 100644
--- a/tests/python/unittest/test_tir_schedule_for_kind.py
+++ b/tests/python/unittest/test_tir_schedule_for_kind.py
@@ -279,9 +279,9 @@ def thread_bound_block_inside_init(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def decomposed_gemm(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ):
     local = T.alloc_buffer((16, 16), "float32")
     for i, j in T.grid(4, 4):
@@ -305,9 +305,9 @@ def decomposed_gemm(
 
 @T.prim_func
 def decomposed_gemm_after_vectorize(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ):
     local = T.alloc_buffer((16, 16), "float32")
     for i, j in T.grid(4, 4):
@@ -332,7 +332,7 @@ def decomposed_gemm_after_vectorize(
 
 @T.prim_func
 def nested_block_bind(
-    A: T.Buffer[(16, 16, 16, 16), "float32"], B: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16, 16), "float32"), B: T.Buffer((16, 16, 16), "float32")
 ):
     for i, j in T.grid(16, 16):
         with T.block("outer"):
@@ -347,7 +347,7 @@ def nested_block_bind(
 
 @T.prim_func
 def thread_bound_nested_block(
-    A: T.Buffer[(16, 16, 16, 16), "float32"], B: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16, 16), "float32"), B: T.Buffer((16, 16, 16), "float32")
 ) -> None:
     for i in T.serial(16):
         for j in T.thread_binding(16, thread="blockIdx.x"):
@@ -364,7 +364,7 @@ def thread_bound_nested_block(
 
 @T.prim_func
 def nested_block_bind_after_cache_read(
-    A: T.Buffer[(16, 16), "float32"], B: T.Buffer[(16,), "float32"]
+    A: T.Buffer((16, 16), "float32"), B: T.Buffer((16,), "float32")
 ) -> None:
     for i in T.serial(16):
         with T.block("outer"):
@@ -385,7 +385,7 @@ def nested_block_bind_after_cache_read(
 
 @T.prim_func
 def thread_bound_nested_block_after_cache_read(
-    A: T.Buffer[(16, 16), "float32"], B: T.Buffer[(16,), "float32"]
+    A: T.Buffer((16, 16), "float32"), B: T.Buffer((16,), "float32")
 ) -> None:
     for i in T.thread_binding(16, thread="blockIdx.x"):
         with T.block("outer"):
@@ -406,9 +406,9 @@ def thread_bound_nested_block_after_cache_read(
 
 @T.prim_func
 def decomposed_gemm_parallelize_init(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ) -> None:
     local = T.alloc_buffer([16, 16], dtype="float32")
     for i, j in T.grid(4, 4):
@@ -438,7 +438,7 @@ def decomposed_gemm_parallelize_init(
 
 
 @T.prim_func
-def scatter_compute(A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"]):
+def scatter_compute(A: T.Buffer((16,), "float32"), B: T.Buffer((16,), "float32")):
     for i in T.grid(8):
         with T.block("first_half"):
             vi = T.axis.spatial(16, 8 + i)
@@ -452,7 +452,7 @@ def scatter_compute(A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"]
 
 @T.prim_func
 def scatter_compute_parallelize(
-    A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"]
+    A: T.Buffer((16,), "float32"), B: T.Buffer((16,), "float32")
 ) -> None:
     # body
     # with T.block("root")
diff --git a/tests/python/unittest/test_tir_schedule_pad_einsum.py b/tests/python/unittest/test_tir_schedule_pad_einsum.py
index 89628db4ff74..ec4d000655ab 100644
--- a/tests/python/unittest/test_tir_schedule_pad_einsum.py
+++ b/tests/python/unittest/test_tir_schedule_pad_einsum.py
@@ -31,9 +31,9 @@
 
 @T.prim_func
 def matmul_before(
-    A: T.Buffer[(128, 127), "float32"],
-    B: T.Buffer[(127, 127), "float32"],
-    C: T.Buffer[(128, 127), "float32"],
+    A: T.Buffer((128, 127), "float32"),
+    B: T.Buffer((127, 127), "float32"),
+    C: T.Buffer((128, 127), "float32"),
 ) -> None:
     A_shared = T.alloc_buffer((128, 127), "float32", scope="shared")
     B_shared = T.alloc_buffer((127, 127), "float32", scope="shared")
@@ -60,9 +60,9 @@ def matmul_before(
 
 @T.prim_func
 def matmul_expected(
-    A: T.Buffer[(128, 127), "float32"],
-    B: T.Buffer[(127, 127), "float32"],
-    C: T.Buffer[(128, 127), "float32"],
+    A: T.Buffer((128, 127), "float32"),
+    B: T.Buffer((127, 127), "float32"),
+    C: T.Buffer((128, 127), "float32"),
 ) -> None:
     A_shared_padded = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
     B_shared_padded = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
diff --git a/tests/python/unittest/test_tir_schedule_reindex.py b/tests/python/unittest/test_tir_schedule_reindex.py
index b5e66943019f..60e3f004f59c 100644
--- a/tests/python/unittest/test_tir_schedule_reindex.py
+++ b/tests/python/unittest/test_tir_schedule_reindex.py
@@ -26,7 +26,7 @@
 
 @T.prim_func
 def transpose_elementwise(
-    A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -36,7 +36,7 @@ def transpose_elementwise(
 
 @T.prim_func
 def transpose_elementwise_reindex_read(
-    A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")
 ) -> None:
     A_reindex = T.alloc_buffer((128, 128), "float32")
     for i, j in T.grid(128, 128):
@@ -51,9 +51,9 @@ def transpose_elementwise_reindex_read(
 
 @T.prim_func
 def conv2d_nhwc(
-    Input: T.Buffer[(1, 224, 224, 3), "float32"],
-    Weight: T.Buffer[(7, 7, 3, 64), "float32"],
-    Conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"],
+    Input: T.Buffer((1, 224, 224, 3), "float32"),
+    Weight: T.Buffer((7, 7, 3, 64), "float32"),
+    Conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32"),
 ) -> None:
     PadInput = T.alloc_buffer([1, 230, 230, 3], dtype="float32")
     for i0, i1, i2, i3 in T.grid(1, 230, 230, 3):
@@ -78,9 +78,9 @@ def conv2d_nhwc(
 
 @T.prim_func
 def conv2d_nhwc_reindex_data(
-    Input: T.Buffer[(1, 224, 224, 3), "float32"],
-    Weight: T.Buffer[(7, 7, 3, 64), "float32"],
-    Conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"],
+    Input: T.Buffer((1, 224, 224, 3), "float32"),
+    Weight: T.Buffer((7, 7, 3, 64), "float32"),
+    Conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32"),
 ) -> None:
     PadInput = T.alloc_buffer([1, 230, 230, 3], dtype="float32")
     ReindexInput = T.alloc_buffer([1, 112, 112, 7, 7, 3], dtype="float32")
@@ -152,9 +152,9 @@ def conv2d_nhwc_reindex_weight(
 
 @T.prim_func
 def matmul(
-    A: T.Buffer[(512, 512), "float32"],
-    B: T.Buffer[(512, 512), "float32"],
-    C: T.Buffer[(512, 512), "float32"],
+    A: T.Buffer((512, 512), "float32"),
+    B: T.Buffer((512, 512), "float32"),
+    C: T.Buffer((512, 512), "float32"),
 ) -> None:
     for i0, i1, i2 in T.grid(512, 512, 512):
         with T.block("matmul"):
@@ -168,9 +168,9 @@ def matmul(
 
 @T.prim_func
 def matmul_reindex_write(
-    A: T.Buffer[(512, 512), "float32"],
-    B: T.Buffer[(512, 512), "float32"],
-    C: T.Buffer[(512, 512), "float32"],
+    A: T.Buffer((512, 512), "float32"),
+    B: T.Buffer((512, 512), "float32"),
+    C: T.Buffer((512, 512), "float32"),
 ) -> None:
     C_reindex = T.alloc_buffer([512, 512], dtype="float32")
     for i0, i1, i2 in T.grid(512, 512, 512):
@@ -190,7 +190,7 @@ def matmul_reindex_write(
 
 
 @T.prim_func
-def multiple_read(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]) -> None:
+def multiple_read(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
             vi, vj = T.axis.remap("SS", [i, j])
@@ -199,9 +199,9 @@ def multiple_read(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "f
 
 @T.prim_func
 def mixed_dtype(
-    p0: T.Buffer[(T.int64(2), 1280), "float16"],
-    p1: T.Buffer[(1280, 1280), "float16"],
-    T_matmul_NT: T.Buffer[(T.int64(2), 1280), "float16"],
+    p0: T.Buffer((T.int64(2), 1280), "float16"),
+    p1: T.Buffer((1280, 1280), "float16"),
+    T_matmul_NT: T.Buffer((T.int64(2), 1280), "float16"),
 ) -> None:
     for i0, i1, i2 in T.grid(T.int64(2), 1280, 1280):
         with T.block("T_matmul_NT"):
@@ -216,9 +216,9 @@ def mixed_dtype(
 
 @T.prim_func
 def mixed_dtype_reindex_write(
-    p0: T.Buffer[(T.int64(2), 1280), "float16"],
-    p1: T.Buffer[(1280, 1280), "float16"],
-    T_matmul_NT: T.Buffer[(T.int64(2), 1280), "float16"],
+    p0: T.Buffer((T.int64(2), 1280), "float16"),
+    p1: T.Buffer((1280, 1280), "float16"),
+    T_matmul_NT: T.Buffer((T.int64(2), 1280), "float16"),
 ) -> None:
     T_matmul_NT_reindex = T.alloc_buffer([T.int64(2), 1280], dtype="float16")
     for i0, i1, i2 in T.grid(T.int64(2), 1280, 1280):
@@ -241,9 +241,9 @@ def mixed_dtype_reindex_write(
 
 @T.prim_func
 def matmul_unit_dim(
-    A: T.Buffer[(1, 512), "float32"],
-    B: T.Buffer[(512, 1), "float32"],
-    C: T.Buffer[(1, 1), "float32"],
+    A: T.Buffer((1, 512), "float32"),
+    B: T.Buffer((512, 1), "float32"),
+    C: T.Buffer((1, 1), "float32"),
 ) -> None:
     for i0, i1, i2 in T.grid(1, 1, 512):
         with T.block("matmul"):
@@ -257,9 +257,9 @@ def matmul_unit_dim(
 
 @T.prim_func
 def matmul_unit_dim_reindex_write(
-    A: T.Buffer[(1, 512), "float32"],
-    B: T.Buffer[(512, 1), "float32"],
-    C: T.Buffer[(1, 1), "float32"],
+    A: T.Buffer((1, 512), "float32"),
+    B: T.Buffer((512, 1), "float32"),
+    C: T.Buffer((1, 1), "float32"),
 ) -> None:
     C_reindex = T.alloc_buffer([1, 1], dtype="float32")
     for i0, i1, i2 in T.grid(1, 1, 512):
diff --git a/tests/python/unittest/test_tir_schedule_reorder.py b/tests/python/unittest/test_tir_schedule_reorder.py
index b859b655efc8..763ce8c36ef0 100644
--- a/tests/python/unittest/test_tir_schedule_reorder.py
+++ b/tests/python/unittest/test_tir_schedule_reorder.py
@@ -216,7 +216,7 @@ def test_reorder_with_opaque_access():
 
 def test_reorder_overlapped_access():
     @T.prim_func
-    def overlapped_access(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "float32"]):
+    def overlapped_access(A: T.Buffer((14, 4), "float32"), B: T.Buffer((14, 4), "float32")):
         # example to write first axis multiple times
         for v0, v1, v2 in T.grid(6, 4, 4):
             with T.block("block"):
@@ -225,7 +225,7 @@ def overlapped_access(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "flo
                 B[i, j] = A[i, j] + 1.0
 
     @T.prim_func
-    def overlapped_access_reorder(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "float32"]):
+    def overlapped_access_reorder(A: T.Buffer((14, 4), "float32"), B: T.Buffer((14, 4), "float32")):
         # example to write first axis multiple times
         for v0, v2, v1 in T.grid(6, 4, 4):
             with T.block("block"):
@@ -242,7 +242,7 @@ def overlapped_access_reorder(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14,
 
 def test_reorder_with_partial_affineness():
     @T.prim_func
-    def non_affine_func(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "float32"]):
+    def non_affine_func(A: T.Buffer((14, 4), "float32"), B: T.Buffer((14, 4), "float32")):
         for v0, v1, v2 in T.grid(6, 4, 4):
             with T.block("block"):
                 i = T.axis.spatial(14, v0 * v0 + v1)
@@ -250,7 +250,7 @@ def non_affine_func(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "float
                 B[i, j] = A[i, j] + 1.0
 
     @T.prim_func
-    def non_affine_func_reorder(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "float32"]):
+    def non_affine_func_reorder(A: T.Buffer((14, 4), "float32"), B: T.Buffer((14, 4), "float32")):
         for v0, v2, v1 in T.grid(6, 4, 4):
             with T.block("block"):
                 i = T.axis.spatial(14, v0 * v0 + v1)
@@ -270,7 +270,7 @@ def non_affine_func_reorder(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4)
 def test_reorder_with_cascade_tiled_ops():
     @T.prim_func
     def cascade_pool_ops(
-        x: T.Buffer[(1, 16, 112, 112), "float32"], y2: T.Buffer[(1, 16, 108, 108), "float32"]
+        x: T.Buffer((1, 16, 112, 112), "float32"), y2: T.Buffer((1, 16, 108, 108), "float32")
     ) -> None:
         y1 = T.alloc_buffer([1, 16, 110, 110], dtype="float32")
         for n, c, h, w, kh, kw in T.grid(1, 16, 110, 110, 3, 3):
@@ -288,7 +288,7 @@ def cascade_pool_ops(
 
     @T.prim_func
     def cascade_pool_ops_tile_reordered(
-        x: T.Buffer[(1, 16, 112, 112), "float32"], y2: T.Buffer[(1, 16, 108, 108), "float32"]
+        x: T.Buffer((1, 16, 112, 112), "float32"), y2: T.Buffer((1, 16, 108, 108), "float32")
     ) -> None:
         y1 = T.alloc_buffer([1, 16, 110, 110], dtype="float32")
         for n, c, h_o in T.grid(1, 16, 27):
diff --git a/tests/python/unittest/test_tir_schedule_rfactor.py b/tests/python/unittest/test_tir_schedule_rfactor.py
index 964fe772d8af..766cc3f8671c 100644
--- a/tests/python/unittest/test_tir_schedule_rfactor.py
+++ b/tests/python/unittest/test_tir_schedule_rfactor.py
@@ -252,7 +252,7 @@ def transformed_square_sum_square_root_factor_one_1(a: T.handle, d: T.handle) ->
 
 @T.prim_func
 def square_sum_square_root_factor_one_1_rfactor(
-    A: T.Buffer[(16, 256, 256), "float32"], D: T.Buffer[(16,), "float32"]
+    A: T.Buffer((16, 256, 256), "float32"), D: T.Buffer((16,), "float32")
 ) -> None:
     C = T.alloc_buffer([16], dtype="float32")
     C_rf = T.alloc_buffer([1, 16], dtype="float32")
@@ -299,7 +299,7 @@ def transformed_square_sum_square_root_factor_one_2(a: T.handle, d: T.handle) ->
 
 @T.prim_func
 def square_sum_square_root_factor_one_2_rfactor(
-    A: T.Buffer[(16, 256, 256), "float32"], D: T.Buffer[(16,), "float32"]
+    A: T.Buffer((16, 256, 256), "float32"), D: T.Buffer((16,), "float32")
 ) -> None:
     C = T.alloc_buffer([16], dtype="float32")
     C_rf = T.alloc_buffer([16, 1], dtype="float32")
@@ -636,8 +636,8 @@ def multiple_reduction_blocks_rfactor(a: T.handle, f: T.handle) -> None:
 
 @T.prim_func
 def rfactor_spatial_only(
-    A: T.Buffer[(1, 512, 7, 7), "float32"],
-    B: T.Buffer[(1, 512, 1, 1), "float32"],
+    A: T.Buffer((1, 512, 7, 7), "float32"),
+    B: T.Buffer((1, 512, 1, 1), "float32"),
 ) -> None:
     for _i0, i1, _i2, _i3, i4, _i5 in T.grid(1, 512, 1, 1, 49, 1):
         with T.block("acc"):
@@ -658,8 +658,8 @@ def rfactor_spatial_only(
 
 @T.prim_func
 def rfactor_spatial_only_after(
-    A: T.Buffer[(1, 512, 7, 7), "float32"],
-    B: T.Buffer[(1, 512, 1, 1), "float32"],
+    A: T.Buffer((1, 512, 7, 7), "float32"),
+    B: T.Buffer((1, 512, 1, 1), "float32"),
 ) -> None:
     # body
     # with T.block("root")
@@ -686,10 +686,10 @@ def rfactor_spatial_only_after(
 
 @T.prim_func
 def argmax_split(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -708,10 +708,10 @@ def argmax_split(
 
 @T.prim_func
 def argmin_split_init_update_reordered(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmin_v0: T.Buffer[(128,), "int32"],
-    argmin_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmin_v0: T.Buffer((128,), "int32"),
+    argmin_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmin"):
@@ -730,10 +730,10 @@ def argmin_split_init_update_reordered(
 
 @T.prim_func
 def argmax_split_different_shape(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(256,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((256,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -752,10 +752,10 @@ def argmax_split_different_shape(
 
 @T.prim_func
 def argmax_split_different_indices(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -774,10 +774,10 @@ def argmax_split_different_indices(
 
 @T.prim_func
 def argmax_split_init_not_bufferstore(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -797,10 +797,10 @@ def argmax_split_init_not_bufferstore(
 
 @T.prim_func
 def argmax_split_init_buffer_duplicate(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -819,10 +819,10 @@ def argmax_split_init_buffer_duplicate(
 
 @T.prim_func
 def argmax_split_letstmt_fewer_than_init(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -840,10 +840,10 @@ def argmax_split_letstmt_fewer_than_init(
 
 @T.prim_func
 def argmax_split_letstmt_more_than_init(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -861,10 +861,10 @@ def argmax_split_letstmt_more_than_init(
 
 @T.prim_func
 def argmax_split_let_body_neither_seqstmt_nor_bufferstore(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -882,10 +882,10 @@ def argmax_split_let_body_neither_seqstmt_nor_bufferstore(
 
 @T.prim_func
 def argmax_split_init_update_inconsistent_bufferstore_number(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -905,10 +905,10 @@ def argmax_split_init_update_inconsistent_bufferstore_number(
 
 @T.prim_func
 def argmax_split_body_seq_not_bufferstore(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -927,10 +927,10 @@ def argmax_split_body_seq_not_bufferstore(
 
 @T.prim_func
 def argmax_split_body_bufferstore_value_not_var(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -949,10 +949,10 @@ def argmax_split_body_bufferstore_value_not_var(
 
 @T.prim_func
 def argmax_split_body_bufferstore_value_unbound_var(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     v_unbound = T.var("int32")
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
@@ -972,10 +972,10 @@ def argmax_split_body_bufferstore_value_unbound_var(
 
 @T.prim_func
 def argmax_split_one_let_var_used_multi_times(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "int32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "int32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "int32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "int32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -994,10 +994,10 @@ def argmax_split_one_let_var_used_multi_times(
 
 @T.prim_func
 def argmax_split_body_one_buffer_updated_multi_times(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "int32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "int32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "int32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "int32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -1016,11 +1016,11 @@ def argmax_split_body_one_buffer_updated_multi_times(
 
 @T.prim_func
 def argmax_split_init_buffer_not_match(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v0_1: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v0_1: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -1039,10 +1039,10 @@ def argmax_split_init_buffer_not_match(
 
 @T.prim_func
 def argmax_split_rfactor(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     argmax_v0_rf = T.alloc_buffer([128, 32], dtype="int32")
     argmax_v1_rf = T.alloc_buffer([128, 32], dtype="float32")
@@ -1086,10 +1086,10 @@ def argmax_split_rfactor(
 
 @T.prim_func
 def argmin_split_rfactor(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmin_v0: T.Buffer[(128,), "int32"],
-    argmin_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmin_v0: T.Buffer((128,), "int32"),
+    argmin_v1: T.Buffer((128,), "float32"),
 ) -> None:
     argmin_v0_rf = T.alloc_buffer([128, 32], dtype="int32")
     argmin_v1_rf = T.alloc_buffer([128, 32], dtype="float32")
@@ -1133,7 +1133,7 @@ def argmin_split_rfactor(
 
 @T.prim_func
 def argmax_topi_rfactor(
-    placeholder: T.Buffer[(1, 32), "int32"], placeholder_red: T.Buffer[1, "int32"]
+    placeholder: T.Buffer((1, 32), "int32"), placeholder_red: T.Buffer(1, "int32")
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     placeholder_red_temp_v0 = T.alloc_buffer([1], dtype="int32")
@@ -1194,7 +1194,7 @@ def argmax_topi_rfactor(
 
 @T.prim_func
 def argmin_topi_rfactor(
-    placeholder: T.Buffer[(1, 32), "int32"], placeholder_red: T.Buffer[1, "int32"]
+    placeholder: T.Buffer((1, 32), "int32"), placeholder_red: T.Buffer(1, "int32")
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     placeholder_red_temp_v0 = T.alloc_buffer([1], dtype="int32")
diff --git a/tests/python/unittest/test_tir_schedule_rolling_buffer.py b/tests/python/unittest/test_tir_schedule_rolling_buffer.py
index c55c41e451cc..9597a5db72fc 100644
--- a/tests/python/unittest/test_tir_schedule_rolling_buffer.py
+++ b/tests/python/unittest/test_tir_schedule_rolling_buffer.py
@@ -61,7 +61,7 @@ def _tile_nd(s, tile, block_name):
 
 def test_1d_rolling_buffer():
     @T.prim_func
-    def before(A: T.Buffer[(4, 12), "int32"], C: T.Buffer[(4, 8), "int32"]):
+    def before(A: T.Buffer((4, 12), "int32"), C: T.Buffer((4, 8), "int32")):
         B = T.alloc_buffer((4, 10), "int32")
         for c in T.serial(4):
             for i in T.serial(0, 10):
@@ -80,7 +80,7 @@ def before(A: T.Buffer[(4, 12), "int32"], C: T.Buffer[(4, 8), "int32"]):
                         C[cc, vi] = C[cc, vi] + B[cc, vi + vk]
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 12), "int32"], C: T.Buffer[(4, 8), "int32"]):
+    def expected(A: T.Buffer((4, 12), "int32"), C: T.Buffer((4, 8), "int32")):
         B = T.alloc_buffer([4, 6], dtype="int32")
         for c, i_0 in T.grid(4, 2):
             for ax0, ax1 in T.grid(6, 3):
@@ -114,7 +114,7 @@ def expected(A: T.Buffer[(4, 12), "int32"], C: T.Buffer[(4, 8), "int32"]):
 
 
 @T.prim_func
-def cascade_2_max_pool2d(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]):
+def cascade_2_max_pool2d(A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")):
     B = T.alloc_buffer([1, 10, 10, 16], dtype="int8")
     for i0, i1, i2, i3, i4, i5 in T.grid(1, 10, 10, 16, 3, 3):
         with T.block("B"):
@@ -132,7 +132,7 @@ def cascade_2_max_pool2d(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8
 
 @T.prim_func
 def cascade_3_max_pool2d_with_stride(
-    A: T.Buffer[(1, 24, 24, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]
+    A: T.Buffer((1, 24, 24, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")
 ):
     B_0 = T.alloc_buffer([1, 22, 22, 16], dtype="int8")
     B_1 = T.alloc_buffer([1, 10, 10, 16], dtype="int8")
@@ -164,7 +164,7 @@ def cascade_3_max_pool2d_with_stride(
 
 def test_cascade_max_pool2d_w_tiled():
     @T.prim_func
-    def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]):
+    def expected(A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")):
         B = T.alloc_buffer([1, 10, 6, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 1, 2, 1):
             for ax0, ax1, ax2, ax3, ax4 in T.grid(10, 6, 16, 3, 3):
@@ -205,7 +205,7 @@ def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "i
 
 def test_cascade_max_pool2d_h_tiled():
     @T.prim_func
-    def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]):
+    def expected(A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")):
         B = T.alloc_buffer([1, 6, 10, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 2, 1, 1):
             for ax0, ax1, ax2, ax3, ax4 in T.grid(6, 10, 16, 3, 3):
@@ -246,7 +246,7 @@ def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "i
 
 def test_cascade_max_pool2d_h_w_c_tiled():
     @T.prim_func
-    def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]):
+    def expected(A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")):
         B = T.alloc_buffer([1, 6, 10, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 2, 2, 2):
             for ax0, ax1, ax2, ax3, ax4 in T.grid(6, 6, 8, 3, 3):
@@ -288,7 +288,7 @@ def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "i
 
 def test_cascade_max_pool2d_non_perfect_tiled():
     @T.prim_func
-    def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]) -> None:
+    def expected(A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")) -> None:
         B = T.alloc_buffer([1, 8, 10, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 2, 2, 1):
             for ax0, ax1, ax2, ax3, ax4 in T.grid(8, 8, 16, 3, 3):
@@ -335,7 +335,7 @@ def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "i
 
 def test_cascade_3_max_pool2d_with_stride():
     @T.prim_func
-    def expected(A: T.Buffer[(1, 24, 24, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]) -> None:
+    def expected(A: T.Buffer((1, 24, 24, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")) -> None:
         B_0 = T.alloc_buffer([1, 13, 22, 16], dtype="int8")
         B_1 = T.alloc_buffer([1, 6, 10, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 2, 2, 1):
@@ -396,7 +396,7 @@ def expected(A: T.Buffer[(1, 24, 24, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "i
 
 def test_upscale():
     @T.prim_func
-    def before(A: T.Buffer[(1, 16, 16, 16), "int8"], C: T.Buffer[(1, 24, 24, 16), "int8"]) -> None:
+    def before(A: T.Buffer((1, 16, 16, 16), "int8"), C: T.Buffer((1, 24, 24, 16), "int8")) -> None:
         B = T.alloc_buffer([1, 14, 14, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 5, 5, 1):
             for ax0, ax1, ax2, ax3, ax4 in T.grid(5, 5, 16, 3, 3):
@@ -432,7 +432,7 @@ def before(A: T.Buffer[(1, 16, 16, 16), "int8"], C: T.Buffer[(1, 24, 24, 16), "i
 
     @T.prim_func
     def expected(
-        A: T.Buffer[(1, 16, 16, 16), "int8"], C: T.Buffer[(1, 24, 24, 16), "int8"]
+        A: T.Buffer((1, 16, 16, 16), "int8"), C: T.Buffer((1, 24, 24, 16), "int8")
     ) -> None:
         B = T.alloc_buffer([1, 5, 14, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 5, 5, 1):
@@ -480,7 +480,7 @@ def expected(
 def test_fail_rolling_buffer_multi_writers():
     @T.prim_func
     def func_multi_writers(
-        A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 12, 12, 16), "int8"]
+        A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 12, 12, 16), "int8")
     ):
         B = T.alloc_buffer([1, 12, 12, 16], dtype="int8")
         for i0, i1, i2, i3 in T.grid(1, 3, 3, 1):
@@ -525,7 +525,7 @@ def func_multi_writers(
 def test_fail_rolling_buffer_not_match():
     @T.prim_func
     def func_non_overlap(
-        A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 12, 12, 16), "int8"]
+        A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 12, 12, 16), "int8")
     ):
         B = T.alloc_buffer([1, 12, 12, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 3, 3, 1):
diff --git a/tests/python/unittest/test_tir_schedule_sampling.py b/tests/python/unittest/test_tir_schedule_sampling.py
index 0c2a3d27ffdb..c2f3f89e6e12 100644
--- a/tests/python/unittest/test_tir_schedule_sampling.py
+++ b/tests/python/unittest/test_tir_schedule_sampling.py
@@ -41,9 +41,9 @@ def elementwise(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def tiled_conv2d_with_padding(
-    inputs: T.Buffer[(1, 224, 224, 3), "float32"],
-    weight: T.Buffer[(7, 7, 3, 64), "float32"],
-    conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"],
+    inputs: T.Buffer((1, 224, 224, 3), "float32"),
+    weight: T.Buffer((7, 7, 3, 64), "float32"),
+    conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32"),
 ) -> None:
     PadInput = T.alloc_buffer([1, 230, 230, 3], dtype="float32")
     for i0, i1, i2, i3 in T.grid(1, 230, 230, 3):
diff --git a/tests/python/unittest/test_tir_schedule_set_axis_separator.py b/tests/python/unittest/test_tir_schedule_set_axis_separator.py
index 327df33408f2..75c650733ae0 100644
--- a/tests/python/unittest/test_tir_schedule_set_axis_separator.py
+++ b/tests/python/unittest/test_tir_schedule_set_axis_separator.py
@@ -27,7 +27,7 @@
 # pylint: disable=no-member,invalid-name,unused-variable,unexpected-keyword-arg
 
 @T.prim_func
-def element_wise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer((128, 128), dtype="float32")
 
     for i, j in T.grid(128, 128):
@@ -41,7 +41,7 @@ def element_wise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "fl
 
 
 @T.prim_func
-def element_wise_set_axis_separator(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_set_axis_separator(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer([128, 128], dtype="float32", axis_separators=[1])
 
     for i, j in T.grid(128, 128):
@@ -55,7 +55,7 @@ def element_wise_set_axis_separator(A: T.Buffer[(128, 128), "float32"], C: T.Buf
 
 
 @T.prim_func
-def element_wise_set_axis_separator_input_buffer(A: T.Buffer(shape=(128, 128), dtype="float32", axis_separators=(1,)), C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_set_axis_separator_input_buffer(A: T.Buffer(shape=(128, 128), dtype="float32", axis_separators=(1,)), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer([128, 128], dtype="float32")
 
     for i, j in T.grid(128, 128):
@@ -69,7 +69,7 @@ def element_wise_set_axis_separator_input_buffer(A: T.Buffer(shape=(128, 128), d
 
 
 @T.prim_func
-def element_wise_subregion_match(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_subregion_match(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer((128, 128), dtype="float32")
 
     for i, j in T.grid(128, 128):
@@ -85,7 +85,7 @@ def element_wise_subregion_match(A: T.Buffer[(128, 128), "float32"], C: T.Buffer
 
 
 @T.prim_func
-def element_wise_subregion_match_set_axis_separator(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_subregion_match_set_axis_separator(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer([128, 128], dtype="float32", axis_separators=[1])
 
     for i, j in T.grid(128, 128):
diff --git a/tests/python/unittest/test_tir_schedule_set_scope.py b/tests/python/unittest/test_tir_schedule_set_scope.py
index adac81e62946..e5fa25fbc362 100644
--- a/tests/python/unittest/test_tir_schedule_set_scope.py
+++ b/tests/python/unittest/test_tir_schedule_set_scope.py
@@ -26,7 +26,7 @@
 # pylint: disable=no-member,invalid-name,unused-variable,unexpected-keyword-arg
 
 @T.prim_func
-def element_wise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer((128, 128), dtype="float32")
 
     for i, j in T.grid(128, 128):
@@ -40,7 +40,7 @@ def element_wise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "fl
 
 
 @T.prim_func
-def element_wise_set_scope(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_set_scope(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B_shared = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
 
     for i, j in T.grid(128, 128):
@@ -54,7 +54,7 @@ def element_wise_set_scope(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128,
 
 
 @T.prim_func
-def element_wise_subregion_match(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_subregion_match(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer((128, 128), dtype="float32")
 
     for i, j in T.grid(128, 128):
@@ -70,7 +70,7 @@ def element_wise_subregion_match(A: T.Buffer[(128, 128), "float32"], C: T.Buffer
 
 
 @T.prim_func
-def element_wise_subregion_match_set_scope(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_subregion_match_set_scope(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B_shared = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
 
     for i, j in T.grid(128, 128):
diff --git a/tests/python/unittest/test_tir_schedule_split_fuse.py b/tests/python/unittest/test_tir_schedule_split_fuse.py
index 3ae88e0abba5..f6373fa727a1 100644
--- a/tests/python/unittest/test_tir_schedule_split_fuse.py
+++ b/tests/python/unittest/test_tir_schedule_split_fuse.py
@@ -526,9 +526,9 @@ def test_fuse_not_affine():
 def test_add_unit_loop_above_block():
     @T.prim_func
     def zero_dim(
-        A: T.Buffer[(), "int32"],
-        B: T.Buffer[(), "int32"],
-        C: T.Buffer[(), "int32"],
+        A: T.Buffer((), "int32"),
+        B: T.Buffer((), "int32"),
+        C: T.Buffer((), "int32"),
     ) -> None:
         with T.block("C"):
             vi = T.axis.spatial(1, 0)
@@ -536,9 +536,9 @@ def zero_dim(
 
     @T.prim_func
     def zero_dim_added(
-        A: T.Buffer[(), "int32"],
-        B: T.Buffer[(), "int32"],
-        C: T.Buffer[(), "int32"],
+        A: T.Buffer((), "int32"),
+        B: T.Buffer((), "int32"),
+        C: T.Buffer((), "int32"),
     ) -> None:
         for u in range(1):
             with T.block("C"):
@@ -554,9 +554,9 @@ def zero_dim_added(
 def test_add_unit_loop_above_loop():
     @T.prim_func
     def zero_dim(
-        A: T.Buffer[(), "int32"],
-        B: T.Buffer[(), "int32"],
-        C: T.Buffer[(), "int32"],
+        A: T.Buffer((), "int32"),
+        B: T.Buffer((), "int32"),
+        C: T.Buffer((), "int32"),
     ) -> None:
         for u in range(1):
             with T.block("C"):
@@ -565,9 +565,9 @@ def zero_dim(
 
     @T.prim_func
     def zero_dim_added(
-        A: T.Buffer[(), "int32"],
-        B: T.Buffer[(), "int32"],
-        C: T.Buffer[(), "int32"],
+        A: T.Buffer((), "int32"),
+        B: T.Buffer((), "int32"),
+        C: T.Buffer((), "int32"),
     ) -> None:
         for u1, u2 in T.grid(1, 1):
             with T.block("C"):
diff --git a/tests/python/unittest/test_tir_schedule_state_cached_flags.py b/tests/python/unittest/test_tir_schedule_state_cached_flags.py
index 70935814ba40..8120aa2aea31 100644
--- a/tests/python/unittest/test_tir_schedule_state_cached_flags.py
+++ b/tests/python/unittest/test_tir_schedule_state_cached_flags.py
@@ -355,7 +355,7 @@ def non_perfect_tiling_cache(a: T.handle, b: T.handle) -> None:
 
 
 @T.prim_func
-def uncovered_producer_region(A: T.Buffer[(128,), "float32"], B: T.Buffer[(128,), "float32"]):
+def uncovered_producer_region(A: T.Buffer((128,), "float32"), B: T.Buffer((128,), "float32")):
     for i in range(120):
         with T.block("producer"):
             vi = T.axis.S((0, 120), i)
@@ -367,7 +367,7 @@ def uncovered_producer_region(A: T.Buffer[(128,), "float32"], B: T.Buffer[(128,)
 
 
 @T.prim_func
-def matmul_relu_padding(A: T.Buffer[(127, 127), "float16"], B: T.Buffer[(127, 127), "float16"], compute: T.Buffer[(127, 127), "float32"]) -> None:
+def matmul_relu_padding(A: T.Buffer((127, 127), "float16"), B: T.Buffer((127, 127), "float16"), compute: T.Buffer((127, 127), "float32")) -> None:
     # function attr dict
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     # body
@@ -440,7 +440,7 @@ def matmul_relu_padding(A: T.Buffer[(127, 127), "float16"], B: T.Buffer[(127, 12
 
 @T.prim_func
 def splitted_square_sum_with_predicate(
-    A: T.Buffer[(1, 7, 7, 512), "float32"], B: T.Buffer[(1, 1, 1, 512), "float32"]
+    A: T.Buffer((1, 7, 7, 512), "float32"), B: T.Buffer((1, 1, 1, 512), "float32")
 ) -> None:
     for i0_i1_i2_i3_0_fused, ax0, ax1, ax2, ax3 in T.grid(2, 1, 1, 1, 256):
         for ax4_ax5_fused_0, ax4_ax5_fused_1 in T.grid(1, 256):
diff --git a/tests/python/unittest/test_tir_schedule_tensorize.py b/tests/python/unittest/test_tir_schedule_tensorize.py
index 4847f261a32c..143cf87d04e1 100644
--- a/tests/python/unittest/test_tir_schedule_tensorize.py
+++ b/tests/python/unittest/test_tir_schedule_tensorize.py
@@ -160,9 +160,9 @@ def outer_product_intrin(a: T.handle, b: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def matmul(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(128, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((128, 128), "float32"),
 ) -> None:
     for i, j, k in T.grid(128, 128, 128):
         with T.block("update"):
@@ -230,9 +230,9 @@ def tensorized_matmul(a: T.handle, b: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def batch_matmul(
-    A: T.Buffer[(16, 128, 128), "float32"],
-    B: T.Buffer[(16, 128, 128), "float32"],
-    C: T.Buffer[(16, 128, 128), "float32"],
+    A: T.Buffer((16, 128, 128), "float32"),
+    B: T.Buffer((16, 128, 128), "float32"),
+    C: T.Buffer((16, 128, 128), "float32"),
 ) -> None:
     for n, i, j in T.grid(16, 128, 128):
         with T.block("init"):
@@ -247,9 +247,9 @@ def batch_matmul(
 
 @T.prim_func
 def tensorized_batch_matmul_mma(
-    A: T.Buffer[(16, 128, 128), "float32"],
-    B: T.Buffer[(16, 128, 128), "float32"],
-    C: T.Buffer[(16, 128, 128), "float32"],
+    A: T.Buffer((16, 128, 128), "float32"),
+    B: T.Buffer((16, 128, 128), "float32"),
+    C: T.Buffer((16, 128, 128), "float32"),
 ) -> None:
     for n, i, j in T.grid(16, 128, 128):
         with T.block("init"):
@@ -302,9 +302,9 @@ def tensorized_batch_matmul_mma(
 
 @T.prim_func
 def tensorized_batch_matmul_dot_product(
-    A: T.Buffer[(16, 128, 128), "float32"],
-    B: T.Buffer[(16, 128, 128), "float32"],
-    C: T.Buffer[(16, 128, 128), "float32"],
+    A: T.Buffer((16, 128, 128), "float32"),
+    B: T.Buffer((16, 128, 128), "float32"),
+    C: T.Buffer((16, 128, 128), "float32"),
 ) -> None:
     for n, i, j in T.grid(16, 128, 128):
         with T.block("init"):
@@ -342,9 +342,9 @@ def tensorized_batch_matmul_dot_product(
 
 @T.prim_func
 def tensorized_batch_matmul_outer_product(
-    A: T.Buffer[(16, 128, 128), "float32"],
-    B: T.Buffer[(16, 128, 128), "float32"],
-    C: T.Buffer[(16, 128, 128), "float32"],
+    A: T.Buffer((16, 128, 128), "float32"),
+    B: T.Buffer((16, 128, 128), "float32"),
+    C: T.Buffer((16, 128, 128), "float32"),
 ) -> None:
     for n, i, j in T.grid(16, 128, 128):
         with T.block("init"):
@@ -392,9 +392,9 @@ def annotated_mma_desc(a: T.handle, b: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def annotated_matmul(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(128, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((128, 128), "float32"),
 ) -> None:
     for i, j, k in T.grid(128, 128, 128):
         with T.block("update"):
@@ -705,9 +705,9 @@ def test_tensorize_matmul_mixed_dtype():
     # fmt: off
     @T.prim_func
     def matmul_int64_shape(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        C: T.Buffer[(T.int64(128), T.int64(128)), "float32"]
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        C: T.Buffer((T.int64(128), T.int64(128)), "float32")
     ) -> None:
         for i_0, j_0 in T.grid(T.int64(8), T.int64(8)):
             for i_1_init, j_1_init in T.grid(T.int64(16), T.int64(16)):
@@ -724,9 +724,9 @@ def matmul_int64_shape(
 
     @T.prim_func
     def tensorized_matmul_int64_shape(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        C: T.Buffer[(T.int64(128), T.int64(128)), "float32"]
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        C: T.Buffer((T.int64(128), T.int64(128)), "float32")
     ) -> None:
         for i_outer, j_outer in T.grid(T.int64(8), T.int64(8)):
             for i_inner_init, j_inner_init in T.grid(T.int64(16), T.int64(16)):
diff --git a/tests/python/unittest/test_tir_schedule_transform.py b/tests/python/unittest/test_tir_schedule_transform.py
index c068385f0a46..b189d3c39e5b 100644
--- a/tests/python/unittest/test_tir_schedule_transform.py
+++ b/tests/python/unittest/test_tir_schedule_transform.py
@@ -25,9 +25,9 @@
 class DenseTIRModule:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1024, 1024), "uint8"],
-        placeholder_1: T.Buffer[(64, 256, 16, 4), "int8"],
-        compute: T.Buffer[(1024, 1024), "int32"],
+        placeholder: T.Buffer((1024, 1024), "uint8"),
+        placeholder_1: T.Buffer((64, 256, 16, 4), "int8"),
+        compute: T.Buffer((1024, 1024), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
@@ -49,9 +49,9 @@ def main(
 class DenseTIRModuleTiled:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1024, 1024), "uint8"],
-        placeholder_1: T.Buffer[(64, 256, 16, 4), "int8"],
-        compute: T.Buffer[(1024, 1024), "int32"],
+        placeholder: T.Buffer((1024, 1024), "uint8"),
+        placeholder_1: T.Buffer((64, 256, 16, 4), "int8"),
+        compute: T.Buffer((1024, 1024), "int32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -75,9 +75,9 @@ def main(
 class Conv2dNCHWcTIRModule:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i0, i1, i2, i3, i4, i5, i6, i7, i8, i9 in T.grid(1, 16, 56, 56, 16, 1, 1, 4, 4, 4):
@@ -116,9 +116,9 @@ def main(
 class Conv2dNCHWcTIRModuleTiled:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_tir_schedule_transform_layout.py b/tests/python/unittest/test_tir_schedule_transform_layout.py
index 5031ff5d6105..ace2b58acb0b 100644
--- a/tests/python/unittest/test_tir_schedule_transform_layout.py
+++ b/tests/python/unittest/test_tir_schedule_transform_layout.py
@@ -33,7 +33,7 @@ def packed_index_map_func(m, n):
 
 
 @T.prim_func
-def two_elementwise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def two_elementwise(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer((128, 128), "float32")
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -47,7 +47,7 @@ def two_elementwise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128),
 
 @T.prim_func
 def two_elementwise_transformed_intermediate_buffer(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
 ) -> None:
     B = T.alloc_buffer((8, 8, 16, 16), "float32")
     for i, j in T.grid(128, 128):
@@ -62,7 +62,7 @@ def two_elementwise_transformed_intermediate_buffer(
 
 @T.prim_func
 def two_elementwise_transformed_input_buffer(
-    A: T.Buffer[(8, 8, 16, 16), "float32"], C: T.Buffer[(128, 128), "float32"]
+    A: T.Buffer((8, 8, 16, 16), "float32"), C: T.Buffer((128, 128), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128), "float32")
     for i, j in T.grid(128, 128):
@@ -77,7 +77,7 @@ def two_elementwise_transformed_input_buffer(
 
 @T.prim_func
 def two_elementwise_transformed_output_buffer(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(8, 8, 16, 16), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((8, 8, 16, 16), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128), "float32")
     for i, j in T.grid(128, 128):
@@ -91,7 +91,7 @@ def two_elementwise_transformed_output_buffer(
 
 
 @T.prim_func
-def elementwise(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]) -> None:
+def elementwise(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
             vi, vj = T.axis.remap("SS", [i, j])
@@ -99,7 +99,7 @@ def elementwise(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "flo
 
 
 @T.prim_func
-def elementwise_transformed(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]) -> None:
+def elementwise_transformed(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")) -> None:
     for i in range(16384):
         with T.block("B"):
             vi = T.axis.remap("S", [i])
@@ -108,9 +108,9 @@ def elementwise_transformed(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128
 
 @T.prim_func
 def conv2d_nhwc(
-    Input: T.Buffer[(1, 224, 224, 3), "float32"],
-    Weight: T.Buffer[(7, 7, 3, 64), "float32"],
-    Conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"],
+    Input: T.Buffer((1, 224, 224, 3), "float32"),
+    Weight: T.Buffer((7, 7, 3, 64), "float32"),
+    Conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32"),
 ) -> None:
     PadInput = T.alloc_buffer([1, 230, 230, 3], dtype="float32")
     for i0, i1, i2, i3 in T.grid(1, 230, 230, 3):
@@ -135,9 +135,9 @@ def conv2d_nhwc(
 
 @T.prim_func
 def conv2d_nhwc_transformed(
-    Input: T.Buffer[(1, 224, 224, 3), "float32"],
-    Weight: T.Buffer[(7, 7, 3, 64), "float32"],
-    Conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"],
+    Input: T.Buffer((1, 224, 224, 3), "float32"),
+    Weight: T.Buffer((7, 7, 3, 64), "float32"),
+    Conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32"),
 ) -> None:
     PadInput = T.alloc_buffer([1, 230, 230, 3], dtype="float32")
     for i0, i1, i2, i3 in T.grid(1, 230, 230, 3):
@@ -162,7 +162,7 @@ def conv2d_nhwc_transformed(
 
 
 @T.prim_func
-def two_elementwise_unit_dim(A: T.Buffer[(1, 128), "float32"], C: T.Buffer[(1, 128), "float32"]) -> None:
+def two_elementwise_unit_dim(A: T.Buffer((1, 128), "float32"), C: T.Buffer((1, 128), "float32")) -> None:
     B = T.alloc_buffer((1, 128), "float32")
     for i, j in T.grid(1, 128):
         with T.block("B"):
@@ -277,7 +277,7 @@ def test_simplify():
     sch.transform_layout(B, ("write", 0), lambda i, j: (i // 16, j // 16, i % 16, j % 16))
 
     @T.prim_func
-    def ref(B: T.Buffer[(8, 8, 16, 16), "float32"], C: T.Buffer[(128, 128), "float32"]):
+    def ref(B: T.Buffer((8, 8, 16, 16), "float32"), C: T.Buffer((128, 128), "float32")):
         for i_0, j_0 in T.grid(8, 8):
             with T.block("C_o"):
                 vi_o, vj_o = T.axis.remap("SS", [i_0, j_0])
@@ -300,7 +300,7 @@ def ref(B: T.Buffer[(8, 8, 16, 16), "float32"], C: T.Buffer[(128, 128), "float32
 def test_var_args_sugar():
     @T.prim_func
     def summation_3d(
-        A: T.Buffer[(1024, 1024, 32), "float32"], B: T.Buffer[(1,), "float32"]
+        A: T.Buffer((1024, 1024, 32), "float32"), B: T.Buffer((1,), "float32")
     ) -> None:
         B[0] = 0
         for i, j, k in T.grid(1024, 1024, 32):
@@ -310,7 +310,7 @@ def summation_3d(
 
     @T.prim_func
     def summation_3d_split(
-        A: T.Buffer[(1024, 1024, 8, 4), "float32"], B: T.Buffer[(1,), "float32"]
+        A: T.Buffer((1024, 1024, 8, 4), "float32"), B: T.Buffer((1,), "float32")
     ) -> None:
         B[0] = 0
         for i, j, k in T.grid(1024, 1024, 32):
@@ -351,7 +351,7 @@ def test_transform_block_layout_unit_dim(use_block_name):
 
     @T.prim_func
     def two_elementwise_unit_dim_transformed(
-        A: T.Buffer[(1, 128), "float32"], C: T.Buffer[(1, 128), "float32"]
+        A: T.Buffer((1, 128), "float32"), C: T.Buffer((1, 128), "float32")
     ) -> None:
         B = T.alloc_buffer((1, 128), "float32")
         for j, i in T.grid(128, 1):
@@ -387,8 +387,8 @@ def test_transform_block_layout_fail_mixed_iter_type(use_block_name):
 def test_transform_block_layout_int64_extent(use_block_name):
     @T.prim_func
     def elementwise_int64_extent(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
     ) -> None:
         for i, j in T.grid(T.int64(128), T.int64(128)):
             with T.block("B"):
@@ -397,8 +397,8 @@ def elementwise_int64_extent(
 
     @T.prim_func
     def elementwise_int64_extent_transformed(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
     ) -> None:
         for i in range(T.int64(16384)):
             with T.block("B"):
@@ -575,7 +575,7 @@ class TestPaddedTransformIfThenElse(BasePaddingCompare):
     @tvm.testing.fixture
     def before(self, dtype):
         @T.prim_func
-        def func(A: T.Buffer[14, dtype]):
+        def func(A: T.Buffer(14, dtype)):
             B = T.alloc_buffer(14, dtype)
             for i in T.serial(14):
                 with T.block("block"):
@@ -589,7 +589,7 @@ def expected(self, dtype, pad_value):
         pad_value = tir.IntImm(dtype, pad_value)
 
         @T.prim_func
-        def func(A: T.Buffer[14, dtype]):
+        def func(A: T.Buffer(14, dtype)):
             B = T.alloc_buffer([4, 4], dtype)
             for i, j in T.grid(4, 4):
                 with T.block("block"):
@@ -610,14 +610,14 @@ class TestPaddedTransformWithoutLoop(BasePaddingCompare):
 
     pad_value = tvm.testing.parameter(0)
 
-    def before(A: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32")):
         with T.block("root"):
             T.reads()
             T.writes()
             with T.block("block"):
                 A[0] = 0
 
-    def expected(A: T.Buffer[(4, 4), "int32"]):
+    def expected(A: T.Buffer((4, 4), "int32")):
         with T.block("block"):
             A[0, 0] = 0
 
@@ -634,7 +634,7 @@ class TestPaddedTransformIfThenElseReduction(BasePaddingCompare):
     pad_value = tvm.testing.parameter(0)
     transformed_buffer = tvm.testing.parameter("B")
 
-    def before(A: T.Buffer[(14, 32), "int32"]):
+    def before(A: T.Buffer((14, 32), "int32")):
         B = T.alloc_buffer(14, "int32")
         for i, k in T.grid(14, 32):
             with T.block("block"):
@@ -643,7 +643,7 @@ def before(A: T.Buffer[(14, 32), "int32"]):
                     B[vi] = 0
                 B[vi] = B[vi] + A[vi, vk]
 
-    def expected(A: T.Buffer[(14, 32), "int32"]):
+    def expected(A: T.Buffer((14, 32), "int32")):
         B = T.alloc_buffer([4, 4], "int32")
         for i, j, k in T.grid(4, 4, 32):
             with T.block("block"):
@@ -661,7 +661,7 @@ class TestPaddedTransformIfThenElseReductionOpaque(BasePaddingCompare):
     pad_value = tvm.testing.parameter(0)
     transformed_buffer = tvm.testing.parameter("B")
 
-    def before(A: T.Buffer[(14, 32), "int32"]):
+    def before(A: T.Buffer((14, 32), "int32")):
         B = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
             B[i] = 0
@@ -669,7 +669,7 @@ def before(A: T.Buffer[(14, 32), "int32"]):
                 with T.block("block"):
                     B[i] = B[i] + A[i, k]
 
-    def expected(A: T.Buffer[(14, 32), "int32"]):
+    def expected(A: T.Buffer((14, 32), "int32")):
         B = T.alloc_buffer([4, 4], "int32")
         for i, j in T.grid(4, 4):
             B[i, j] = T.if_then_else(i == 3 and 2 <= j, 0, 0, dtype="int32")
@@ -690,7 +690,7 @@ class TestPaddedTransformPostProcIfRequiredDueToSideEffects(BasePaddingCompare):
     pad_value = tvm.testing.parameter(0)
     transformed_buffer = tvm.testing.parameter("B")
 
-    def before(A: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer(14, "int32")
         C = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
@@ -699,7 +699,7 @@ def before(A: T.Buffer[14, "int32"]):
                 B[vi] = A[vi]
                 C[vi] = 0
 
-    def expected(A: T.Buffer[14, "int32"]):
+    def expected(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer([4, 4], "int32")
         C = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
@@ -720,13 +720,13 @@ class TestPaddedTransformOfInputCreatesAssumption(BasePaddingCompare):
 
     pad_value = tvm.testing.parameter(42)
 
-    def before(A: T.Buffer[14, "int32"], B: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32"), B: T.Buffer(14, "int32")):
         for i in T.serial(14):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
                 B[vi] = A[vi]
 
-    def expected(A: T.Buffer[(4, 4), "int32"], B: T.Buffer[14, "int32"]):
+    def expected(A: T.Buffer((4, 4), "int32"), B: T.Buffer(14, "int32")):
         for i, j in T.grid(4, 4):
             with T.block("buffer_A_assumption"):
                 vi, vj = T.axis.remap("SS", [i, j])
@@ -759,14 +759,14 @@ def transform(mod):
 
         return transform
 
-    def before(A: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
                 B[vi] = A[vi]
 
-    def expected(A: T.Buffer[14, "int32"]):
+    def expected(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer([4, 4], "int32")
         for i, j in T.grid(4, 4):
             with T.block("block"):
@@ -802,14 +802,14 @@ def transform(mod):
 
         return transform
 
-    def before(A: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
                 B[vi] = A[vi]
 
-    def expected(A: T.Buffer[(4, 4), "int32"]):
+    def expected(A: T.Buffer((4, 4), "int32")):
         for i, j in T.grid(4, 4):
             with T.block("buffer_A_assumption"):
                 vi, vj = T.axis.remap("SS", [i, j])
@@ -851,7 +851,7 @@ def transform(mod):
 
         return transform
 
-    def before(A: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
             with T.block("block"):
@@ -881,14 +881,14 @@ def transform(mod):
 
         return transform
 
-    def before(A: T.Buffer[16, "int32"], n: T.int32):
+    def before(A: T.Buffer(16, "int32"), n: T.int32):
         B = T.alloc_buffer(16, "int32")
         for i in T.serial(16):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
                 B[vi] = A[vi]
 
-    def expected(A: T.Buffer[16, "int32"], n: T.int32):
+    def expected(A: T.Buffer(16, "int32"), n: T.int32):
         B = T.alloc_buffer([(-16 % n + 16) // n, n], dtype="int32")
         for i, j in T.grid((-16 % n + 16) // n, n):
             with T.block("block"):
@@ -951,7 +951,7 @@ def test_index_map_dtype_legalize():
     """Test dtype legalization of the index map indices."""
 
     @T.prim_func
-    def func(A: T.Buffer[T.int64(58), "int32"]):
+    def func(A: T.Buffer(T.int64(58), "int32")):
         for i in T.serial(T.int64(58)):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
diff --git a/tests/python/unittest/test_tir_schedule_utilities.py b/tests/python/unittest/test_tir_schedule_utilities.py
index 2f6c2f6a5120..53ee6a58cd9a 100644
--- a/tests/python/unittest/test_tir_schedule_utilities.py
+++ b/tests/python/unittest/test_tir_schedule_utilities.py
@@ -105,8 +105,8 @@ def matmul_relu_ann2(a: T.handle, b: T.handle, d: T.handle) -> None:
 class ModuleWithMultipleFuncs:
     @T.prim_func
     def vector_add(
-        A: T.Buffer[128, "float32"],
-        B: T.Buffer[128, "float32"],
+        A: T.Buffer(128, "float32"),
+        B: T.Buffer(128, "float32"),
     ) -> None:
         for i in range(128):
             with T.block("init"):
@@ -115,8 +115,8 @@ def vector_add(
 
     @T.prim_func
     def vector_add_2(
-        A: T.Buffer[128, "float32"],
-        B: T.Buffer[128, "float32"],
+        A: T.Buffer(128, "float32"),
+        B: T.Buffer(128, "float32"),
     ) -> None:
         for i in range(128):
             with T.block("init"):
@@ -125,7 +125,7 @@ def vector_add_2(
 
 
 @T.prim_func
-def tuple_reduction(data: T.Buffer[(4, 32), "float32"], T_add: T.Buffer[(4,), "float32"]) -> None:
+def tuple_reduction(data: T.Buffer((4, 32), "float32"), T_add: T.Buffer((4,), "float32")) -> None:
     # function attr dict
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     # body
diff --git a/tests/python/unittest/test_tir_te_extern_primfunc.py b/tests/python/unittest/test_tir_te_extern_primfunc.py
index f6eb2e8a9b86..45ca7a1c7256 100644
--- a/tests/python/unittest/test_tir_te_extern_primfunc.py
+++ b/tests/python/unittest/test_tir_te_extern_primfunc.py
@@ -31,7 +31,7 @@
 
 
 @T.prim_func
-def func_1(A: T.Buffer[(16,), "float32"], C: T.Buffer[(1,), "float32"]):
+def func_1(A: T.Buffer((16,), "float32"), C: T.Buffer((1,), "float32")):
     for i in T.serial(
         0,
         16,
@@ -59,7 +59,7 @@ def verify_func_1(module):
 
 @T.prim_func
 def func_2(
-    C: T.Buffer[(1,), "float32"], A: T.Buffer[(16,), "float32"], D: T.Buffer[(2,), "float32"]
+    C: T.Buffer((1,), "float32"), A: T.Buffer((16,), "float32"), D: T.Buffer((2,), "float32")
 ):
     for i in T.serial(
         0,
@@ -89,11 +89,11 @@ def verify_func_2(module):
 
 @T.prim_func
 def func_3(
-    C: T.Buffer[(1,), "float32"],
-    A: T.Buffer[(16,), "float32"],
-    D: T.Buffer[(2,), "float32"],
-    E: T.Buffer[(16,), "float32"],
-    F: T.Buffer[(16,), "float32"],
+    C: T.Buffer((1,), "float32"),
+    A: T.Buffer((16,), "float32"),
+    D: T.Buffer((2,), "float32"),
+    E: T.Buffer((16,), "float32"),
+    F: T.Buffer((16,), "float32"),
 ):
     for i in T.serial(
         0,
@@ -131,11 +131,11 @@ def verify_func_3(module):
 
 @T.prim_func
 def func_4(
-    C: T.Buffer[(1,), "float32"],
-    A: T.Buffer[(16,), "float32"],
-    F: T.Buffer[(16,), "float32"],
-    D: T.Buffer[(2,), "float32"],
-    E: T.Buffer[(16,), "float32"],
+    C: T.Buffer((1,), "float32"),
+    A: T.Buffer((16,), "float32"),
+    F: T.Buffer((16,), "float32"),
+    D: T.Buffer((2,), "float32"),
+    E: T.Buffer((16,), "float32"),
 ):
     for i in T.serial(
         0,
diff --git a/tests/python/unittest/test_tir_transform_compact_buffer_region.py b/tests/python/unittest/test_tir_transform_compact_buffer_region.py
index 34b3190b9aa2..1a2a47a17043 100644
--- a/tests/python/unittest/test_tir_transform_compact_buffer_region.py
+++ b/tests/python/unittest/test_tir_transform_compact_buffer_region.py
@@ -447,7 +447,7 @@ def padding_pattern_inlined(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def compacted_padding_pattern_inlined(
-    X: T.Buffer[(224, 224), "float32"], Y: T.Buffer[(224, 224), "float32"]
+    X: T.Buffer((224, 224), "float32"), Y: T.Buffer((224, 224), "float32")
 ) -> None:
     cache = T.alloc_buffer([224, 224], dtype="float32")
     for h, w in T.grid(224, 224):
@@ -561,10 +561,10 @@ def compacted_opaque_access_annotated_func(a: T.handle) -> None:
 
 @T.prim_func
 def sparse_read_cache(
-    A_data: T.Buffer[(819,), "float32"],
-    B: T.Buffer[(128,), "float32"],
-    A_indptr: T.Buffer[(129,), "int32"],
-    A_indices: T.Buffer[(819,), "int32"],
+    A_data: T.Buffer((819,), "float32"),
+    B: T.Buffer((128,), "float32"),
+    A_indptr: T.Buffer((129,), "int32"),
+    A_indices: T.Buffer((819,), "int32"),
 ) -> None:
     for i in T.serial(128):
         with T.block("rowsum_outer"):
@@ -594,10 +594,10 @@ def sparse_read_cache(
 
 @T.prim_func
 def compacted_sparse_read_cache(
-    A_data: T.Buffer[(819,), "float32"],
-    B: T.Buffer[(128,), "float32"],
-    A_indptr: T.Buffer[(129,), "int32"],
-    A_indices: T.Buffer[(819,), "int32"],
+    A_data: T.Buffer((819,), "float32"),
+    B: T.Buffer((128,), "float32"),
+    A_indptr: T.Buffer((129,), "int32"),
+    A_indices: T.Buffer((819,), "int32"),
 ) -> None:
     for i in T.serial(128):
         with T.block("rowsum_outer"):
@@ -626,7 +626,7 @@ def compacted_sparse_read_cache(
 
 
 @T.prim_func
-def narrow_shape(A: T.Buffer[(10,), "float32"], B: T.Buffer[(10,), "float32"]) -> None:
+def narrow_shape(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")) -> None:
     B_cache = T.alloc_buffer(10, "float32")
     for j in T.serial(3):
         for k in T.serial(4):
@@ -638,7 +638,7 @@ def narrow_shape(A: T.Buffer[(10,), "float32"], B: T.Buffer[(10,), "float32"]) -
 
 
 @T.prim_func
-def compacted_narrow_shape(A: T.Buffer[(10,), "float32"], B: T.Buffer[(10,), "float32"]) -> None:
+def compacted_narrow_shape(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")) -> None:
     # body
     # with T.block("root")
     B_cache = T.alloc_buffer([10], dtype="float32")
@@ -756,7 +756,7 @@ def func_with_non_index_let_binding():
 def test_compact_spatial_tiled_pad_and_pooling():
     @T.prim_func
     def spatial_tiled_pad_and_pooling(
-        X: T.Buffer[(64, 112, 112), "int32"], Y: T.Buffer[(64, 56, 56), "int32"]
+        X: T.Buffer((64, 112, 112), "int32"), Y: T.Buffer((64, 56, 56), "int32")
     ) -> None:
         for h_o, w_o in T.grid(14, 14):
             with T.block():
@@ -796,7 +796,7 @@ def spatial_tiled_pad_and_pooling(
 
     @T.prim_func
     def compacted_spatial_tiled_pad_and_pooling(
-        X: T.Buffer[(64, 112, 112), "int32"], Y: T.Buffer[(64, 56, 56), "int32"]
+        X: T.Buffer((64, 112, 112), "int32"), Y: T.Buffer((64, 56, 56), "int32")
     ) -> None:
         for h_o, w_o in T.grid(14, 14):
             with T.block():
@@ -854,7 +854,7 @@ def test_complex_case_1():
 
     # fmt: off
     @T.prim_func
-    def func(A: T.Buffer[(960, 770), "float32"], B: T.Buffer[(770, 2304), "float32"], C: T.Buffer[(960, 2304), "float32"]) -> None:
+    def func(A: T.Buffer((960, 770), "float32"), B: T.Buffer((770, 2304), "float32"), C: T.Buffer((960, 2304), "float32")) -> None:
         for bx in T.thread_binding(144, thread="blockIdx.x"):
             for vx in T.thread_binding(2, thread="vthread.x"):
                 for tx_p in T.thread_binding(256, thread="threadIdx.x"):
@@ -880,7 +880,7 @@ def func(A: T.Buffer[(960, 770), "float32"], B: T.Buffer[(770, 2304), "float32"]
                                         C[(((bx // 18 + 0) * 8 + tx_p // 32) * 8 + i_3) * 2 + i_4, ((bx % 18 * 2 + vx % 2) * 32 + tx_p % 32 + j_3) * 2 + j_4] = C[(((bx // 18 + 0) * 8 + tx_p // 32) * 8 + i_3) * 2 + i_4, ((bx % 18 * 2 + vx % 2) * 32 + tx_p % 32 + j_3) * 2 + j_4] + A_shared[(((bx // 18 + 0) * 8 + tx_p // 32) * 8 + i_3) * 2 + i_4, (k_0 + k_1) * 4 + k_2] * B_shared[(k_0 + k_1) * 4 + k_2, ((bx % 18 * 2 + vx % 2) * 32 + tx_p % 32 + j_3) * 2 + j_4]
 
     @T.prim_func
-    def compacted_func(A: T.Buffer[(960, 770), "float32"], B: T.Buffer[(770, 2304), "float32"], C: T.Buffer[(960, 2304), "float32"]) -> None:
+    def compacted_func(A: T.Buffer((960, 770), "float32"), B: T.Buffer((770, 2304), "float32"), C: T.Buffer((960, 2304), "float32")) -> None:
         for bx in T.thread_binding(144, thread="blockIdx.x"):
             for vx in T.thread_binding(2, thread="vthread.x"):
                 for tx_p in T.thread_binding(256, thread="threadIdx.x"):
@@ -940,9 +940,9 @@ def test_compact_dependent_buffer_indices_of_packed_matmul():
 
     @T.prim_func
     def nonuniform_packed_matmul_write_cache(
-        A: T.Buffer[(1020, 64), "float32"],
-        B: T.Buffer[(1000, 64), "float32"],
-        C: T.Buffer[(1020, 1000), "float32"],
+        A: T.Buffer((1020, 64), "float32"),
+        B: T.Buffer((1000, 64), "float32"),
+        C: T.Buffer((1020, 1000), "float32"),
     ):
         for i0, i1 in T.grid(4, 1):
             with T.block():
@@ -977,9 +977,9 @@ def nonuniform_packed_matmul_write_cache(
 
     @T.prim_func
     def nonuniform_packed_matmul_write_cache_compacted(
-        A: T.Buffer[(1020, 64), "float32"],
-        B: T.Buffer[(1000, 64), "float32"],
-        C: T.Buffer[(1020, 1000), "float32"],
+        A: T.Buffer((1020, 64), "float32"),
+        B: T.Buffer((1000, 64), "float32"),
+        C: T.Buffer((1020, 1000), "float32"),
     ) -> None:
         for i0, i1 in T.grid(4, 1):
             with T.block():
diff --git a/tests/python/unittest/test_tir_transform_convert_blocks_to_opaque.py b/tests/python/unittest/test_tir_transform_convert_blocks_to_opaque.py
index 297943bc1381..73b5203b56f0 100644
--- a/tests/python/unittest/test_tir_transform_convert_blocks_to_opaque.py
+++ b/tests/python/unittest/test_tir_transform_convert_blocks_to_opaque.py
@@ -85,7 +85,7 @@ def test_lower_te():
 class TestErrorIfPredicateUsesBlockVariables(tvm.testing.CompareBeforeAfter):
     transform = tvm.tir.transform.ConvertBlocksToOpaque()
 
-    def before(A: T.Buffer[8, "int32"]):
+    def before(A: T.Buffer(8, "int32")):
         for i in T.serial(8):
             with T.block():
                 vi = T.axis.remap("S", [i])
diff --git a/tests/python/unittest/test_tir_transform_flatten_buffer.py b/tests/python/unittest/test_tir_transform_flatten_buffer.py
index 12523fbdb2ae..c68dbd9ada6d 100644
--- a/tests/python/unittest/test_tir_transform_flatten_buffer.py
+++ b/tests/python/unittest/test_tir_transform_flatten_buffer.py
@@ -32,7 +32,7 @@ class BaseCompare(tvm.testing.CompareBeforeAfter):
 class TestElementwise(BaseCompare):
     """2-d buffers are flattened to 1-d"""
 
-    def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         for i in T.serial(0, 16):
             B_new = T.decl_buffer([1, 16], "float32")
             for j in T.serial(0, 16):
@@ -40,7 +40,7 @@ def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
             for j in T.serial(0, 16):
                 C[i, j] = B_new[0, j] * 2.0
 
-    def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
+    def expected(input_A: T.Buffer((16, 16), "float32"), input_C: T.Buffer((16, 16), "float32")):
         A = T.Buffer(256, dtype="float32", data=input_A.data)
         C = T.Buffer(256, dtype="float32", data=input_C.data)
         for i in T.serial(0, 16):
@@ -62,7 +62,7 @@ class TestElementwiseWithoutDeclBuffer(BaseCompare):
     memory, and should be flattened to a 1-d allocation.
     """
 
-    def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         for i in T.serial(0, 16):
             B_new_data = T.allocate([1, 16], "float32", "global")
             B_new = T.Buffer([1, 16], "float32", data=B_new_data)
@@ -71,7 +71,7 @@ def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
             for j in T.serial(0, 16):
                 C[i, j] = B_new[0, j] * 2.0
 
-    def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
+    def expected(input_A: T.Buffer((16, 16), "float32"), input_C: T.Buffer((16, 16), "float32")):
         A = T.Buffer(256, dtype="float32", data=input_A.data)
         C = T.Buffer(256, dtype="float32", data=input_C.data)
         for i in T.serial(0, 16):
@@ -86,7 +86,7 @@ def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16),
 class TestGPU(BaseCompare):
     """Buffer flattening may have indices based on GPU thread vars"""
 
-    def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         i0 = T.env_thread("blockIdx.x")
         i1 = T.env_thread("threadIdx.x")
         i2 = T.env_thread("vthread")
@@ -100,7 +100,7 @@ def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
         for j in range(0, 16):
             C[i0 * 4 + i1 * 2 + i2, j] = B[0, j] * 2.0
 
-    def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
+    def expected(input_A: T.Buffer((16, 16), "float32"), input_C: T.Buffer((16, 16), "float32")):
         A = T.Buffer(256, dtype="float32", data=input_A.data)
         C = T.Buffer(256, dtype="float32", data=input_C.data)
 
@@ -151,7 +151,7 @@ def expected(a: T.handle, c: T.handle, n: T.int32, m: T.int32) -> None:
 class TestMultiAlloc(BaseCompare):
     """If multiple allocations occur, all are flattened."""
 
-    def before(A: T.Buffer[(4, 32), "float32"], D: T.Buffer[(4, 32), "float32"]):
+    def before(A: T.Buffer((4, 32), "float32"), D: T.Buffer((4, 32), "float32")):
         for i, j in T.grid(4, 32):
             B = T.decl_buffer((4, 32), "float32", scope="global")
             C = T.decl_buffer((4, 32), "float32", scope="global")
@@ -159,7 +159,7 @@ def before(A: T.Buffer[(4, 32), "float32"], D: T.Buffer[(4, 32), "float32"]):
             C[i, j] = A[i, j] + B[i, j]
             D[i, j] = C[i, j] * 2.0
 
-    def expected(input_A: T.Buffer[(4, 32), "float32"], input_D: T.Buffer[(4, 32), "float32"]):
+    def expected(input_A: T.Buffer((4, 32), "float32"), input_D: T.Buffer((4, 32), "float32")):
         A = T.Buffer(128, "float32", data=input_A.data)
         D = T.Buffer(128, "float32", data=input_D.data)
 
@@ -176,7 +176,7 @@ def expected(input_A: T.Buffer[(4, 32), "float32"], input_D: T.Buffer[(4, 32), "
 class TestStrided(BaseCompare):
     """Indices for flattened buffers use the specified striding."""
 
-    def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         for i0 in T.serial(4):
             B = T.decl_buffer([4, 17], "float32")
             B_1 = T.Buffer([4, 16], dtype="float32", data=B.data, strides=[17, 1])
@@ -185,7 +185,7 @@ def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
             for i1, j in T.grid(4, 16):
                 C[i0 * 4 + i1, j] = B_1[i1, j] * 2.0
 
-    def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
+    def expected(input_A: T.Buffer((16, 16), "float32"), input_C: T.Buffer((16, 16), "float32")):
         A = T.Buffer(256, dtype="float32", data=input_A.data)
         C = T.Buffer(256, dtype="float32", data=input_C.data)
         for i0 in T.serial(0, 4):
@@ -202,11 +202,11 @@ def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16),
 class TestBoolean(BaseCompare):
     """Boolean buffers should be replaced by a backing int8 array"""
 
-    def before(A: T.Buffer[10, "bool"], B: T.Buffer[10, "bool"]) -> None:
+    def before(A: T.Buffer(10, "bool"), B: T.Buffer(10, "bool")) -> None:
         for i0 in T.serial(10):
             B[i0] = A[i0]
 
-    def expected(input_A: T.Buffer[10, "bool"], input_B: T.Buffer[10, "bool"]) -> None:
+    def expected(input_A: T.Buffer(10, "bool"), input_B: T.Buffer(10, "bool")) -> None:
         A = T.Buffer(10, dtype="int8", data=input_A.data)
         B = T.Buffer(10, dtype="int8", data=input_B.data)
         # body
diff --git a/tests/python/unittest/test_tir_transform_helpers.py b/tests/python/unittest/test_tir_transform_helpers.py
index 01496e0e0fc1..f8dc0f682d06 100644
--- a/tests/python/unittest/test_tir_transform_helpers.py
+++ b/tests/python/unittest/test_tir_transform_helpers.py
@@ -25,7 +25,7 @@ def test_annotate_entry_func_single_primfunc():
     @tvm.script.ir_module
     class MockModule:
         @T.prim_func
-        def func1(A: T.Buffer[(16,), "float32"]):
+        def func1(A: T.Buffer((16,), "float32")):
             for i in T.serial(16):
                 if i == 5:
                     if i == 5:
@@ -46,14 +46,14 @@ def func1(A: T.Buffer[(16,), "float32"]):
 @tvm.script.ir_module
 class MockModule:
     @T.prim_func
-    def func1(A: T.Buffer[(16,), "float32"]):
+    def func1(A: T.Buffer((16,), "float32")):
         for i in T.serial(16):
             if i == 5:
                 if i == 5:
                     A[i] = 0.0
 
     @T.prim_func
-    def func2(A: T.Buffer[(32,), "float32"]):
+    def func2(A: T.Buffer((32,), "float32")):
         for i in T.serial(32):
             if i == 15:
                 if i == 15:
diff --git a/tests/python/unittest/test_tir_transform_hoist_expression.py b/tests/python/unittest/test_tir_transform_hoist_expression.py
index 8b7fc98bfdcf..77862f64d629 100644
--- a/tests/python/unittest/test_tir_transform_hoist_expression.py
+++ b/tests/python/unittest/test_tir_transform_hoist_expression.py
@@ -59,13 +59,13 @@ class TestHoistToTop(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(16,), "float32"], n: T.int32):
+    def before(A: T.Buffer((16,), "float32"), n: T.int32):
         for i in T.serial(16):
             if n != 0:
                 A[i] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(16,), "float32"], n: T.int32):
+    def expected(A: T.Buffer((16,), "float32"), n: T.int32):
         if n != 0:
             for i in T.serial(16):
                 A[i] = 0.0
@@ -78,7 +78,7 @@ class TestSuppressHoistIfElse(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(16,), "float32"], n: T.int32):
+    def before(A: T.Buffer((16,), "float32"), n: T.int32):
         for i in T.serial(16):
             if n != 0:
                 A[i] = 0.0
@@ -88,7 +88,7 @@ def before(A: T.Buffer[(16,), "float32"], n: T.int32):
 
 class TestHoistBlockVar(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def before(A: T.Buffer((128, 16), "float32"), n: T.int32):
         i = T.env_thread("threadIdx.x")
         T.launch_thread(i, 128)
 
@@ -97,7 +97,7 @@ def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
                 A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def expected(A: T.Buffer((128, 16), "float32"), n: T.int32):
         i = T.env_thread("threadIdx.x")
         T.launch_thread(i, 128)
 
@@ -112,7 +112,7 @@ class TestSuppressHoistBlockVar(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def before(A: T.Buffer((128, 16), "float32"), n: T.int32):
         thread_x = T.env_thread("threadIdx.x")
         T.launch_thread(thread_x, 128)
 
@@ -126,7 +126,7 @@ def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
 
 class TestHoistAcrossBlockVar(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def before(A: T.Buffer((128, 16), "float32"), n: T.int32):
         thread_x = T.env_thread("threadIdx.x")
         T.launch_thread(thread_x, 128)
 
@@ -136,7 +136,7 @@ def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
                     A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def expected(A: T.Buffer((128, 16), "float32"), n: T.int32):
         thread_x = T.env_thread("threadIdx.x")
 
         if n == 0:
@@ -152,7 +152,7 @@ class TestSuppressHoistAcrossBlockVar(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def before(A: T.Buffer((128, 16), "float32"), n: T.int32):
         thread_x = T.env_thread("threadIdx.x")
         T.launch_thread(thread_x, 128)
 
@@ -162,7 +162,7 @@ def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
                     A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def expected(A: T.Buffer((128, 16), "float32"), n: T.int32):
         thread_x = T.env_thread("threadIdx.x")
 
         T.launch_thread(thread_x, 128)
@@ -174,14 +174,14 @@ def expected(A: T.Buffer[(128, 16), "float32"], n: T.int32):
 
 class TestHoistToMiddle(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 if i < 3:
                     A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 3:
                 for j in T.serial(4):
@@ -190,7 +190,7 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistWithLet(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 condition = i < 3
@@ -198,7 +198,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
                     A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             condition = i < 3
             if condition:
@@ -216,7 +216,7 @@ class TestHoistDisableLet(BaseBeforeAfter):
     hoisted_let_bindings = tvm.testing.parameter(HoistedLetBindings.Never)
 
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 condition = i < 3
@@ -228,7 +228,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistIfElse(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 if i < 3:
@@ -237,7 +237,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
                     A[i, j] = 1.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 3:
                 for j in T.serial(4):
@@ -249,7 +249,7 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistSequentialAssign(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"], B: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32"), B: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 if i < 3:
@@ -260,7 +260,7 @@ def before(A: T.Buffer[(4, 4), "float32"], B: T.Buffer[(4, 4), "float32"]):
                     B[i, j] = 1.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"], B: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32"), B: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 3:
                 for j in T.serial(4):
@@ -274,7 +274,7 @@ def expected(A: T.Buffer[(4, 4), "float32"], B: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistMultiIf(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 for k in T.serial(4):
@@ -283,7 +283,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
                             A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 2:
                 for j in T.serial(4):
@@ -294,13 +294,13 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistComplexConditional(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j, k in T.grid(4, 4, 4):
             if j < 3 and i < 2:
                 A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 2:
                 for j in T.serial(4):
@@ -315,13 +315,13 @@ class TestSuppressSplittingConditional(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j, k in T.grid(4, 4, 4):
             if j < 3 and i < 2:
                 A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i, j in T.grid(4, 4):
             if j < 3 and i < 2:
                 for k in T.serial(4):
@@ -330,7 +330,7 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistMultiIfElse(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 for k in T.serial(4):
@@ -346,7 +346,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
                             A[i, j] = 3.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 2:
                 for j in T.serial(4):
@@ -368,7 +368,7 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistMultiIfElseDifferentBranches(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 for k in T.serial(4):
@@ -384,7 +384,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
                             A[i, j] = 3.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 2:
                 if i < 1:
@@ -415,12 +415,12 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistIfElseExpr(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j in T.grid(4, 4):
             A[i, j] = T.if_then_else(i < 2, 1.0, 2.0, dtype="float32")
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 2:
                 for j in T.serial(4):
@@ -436,7 +436,7 @@ class TestSuppressHoistIfElseExpr(TestHoistIfElseExpr):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j in T.grid(4, 4):
             A[i, j] = T.if_then_else(i < 2, 1.0, 2.0, dtype="float32")
 
@@ -445,13 +445,13 @@ def before(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistLetExpr(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j in T.grid(4, 4):
             x = T.var("float32")
             A[i, j] = T.Let(x, T.cast(i + 1, "float32"), 5.0 * x + T.cast(j, "float32"))
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             x = T.cast(i + 1, "float32")
             for j in T.serial(4):
@@ -464,7 +464,7 @@ class TestSuppressHoistLetExpr(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j in T.grid(4, 4):
             x = T.var("float32")
             A[i, j] = T.Let(x, T.cast(i + 1, "float32"), 5.0 * x + T.cast(j, "float32"))
diff --git a/tests/python/unittest/test_tir_transform_inject_ptx_async_copy.py b/tests/python/unittest/test_tir_transform_inject_ptx_async_copy.py
index adf3d9da05ce..fca88594c0e0 100644
--- a/tests/python/unittest/test_tir_transform_inject_ptx_async_copy.py
+++ b/tests/python/unittest/test_tir_transform_inject_ptx_async_copy.py
@@ -37,7 +37,7 @@ def generate_global_to_shared_vectorized_copy(dtype, vector_size):
 
     @T.prim_func
     def ptx_global_to_shared_copy(
-        A: T.Buffer[(32, 128), dtype], B: T.Buffer[(32, 128), dtype]
+        A: T.Buffer((32, 128), dtype), B: T.Buffer((32, 128), dtype)
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         bx = T.env_thread("blockIdx.x")
@@ -65,7 +65,7 @@ def ptx_global_to_shared_copy(
 
 @T.prim_func
 def ptx_global_to_shared_copy_fp32x1(
-    A: T.Buffer[(32, 128), "float32"], B: T.Buffer[(32, 128), "float32"]
+    A: T.Buffer((32, 128), "float32"), B: T.Buffer((32, 128), "float32")
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     bx = T.env_thread("blockIdx.x")
@@ -90,9 +90,9 @@ def ptx_global_to_shared_copy_fp32x1(
 
 @T.prim_func
 def ptx_global_to_shared_dyn_copy_fp16x8(
-    A: T.Buffer[(32, 128), "float16"],
-    B: T.Buffer[(32, 128), "float16"],
-    C: T.Buffer[(32, 128), "float16"],
+    A: T.Buffer((32, 128), "float16"),
+    B: T.Buffer((32, 128), "float16"),
+    C: T.Buffer((32, 128), "float16"),
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     bx = T.env_thread("blockIdx.x")
diff --git a/tests/python/unittest/test_tir_transform_inject_software_pipeline.py b/tests/python/unittest/test_tir_transform_inject_software_pipeline.py
index cf01d7700725..1e5fd8843ba3 100644
--- a/tests/python/unittest/test_tir_transform_inject_software_pipeline.py
+++ b/tests/python/unittest/test_tir_transform_inject_software_pipeline.py
@@ -50,7 +50,7 @@ def _check_error(func):
 
 
 @T.prim_func
-def trivial_pipeline(A: T.Buffer[(16, 1), "float32"], C: T.Buffer[(16, 1), "float32"]):
+def trivial_pipeline(A: T.Buffer((16, 1), "float32"), C: T.Buffer((16, 1), "float32")):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
             0, 1, annotations={"software_pipeline_stage": [0, 1], "software_pipeline_order": [0, 1]}
@@ -71,7 +71,7 @@ def trivial_pipeline(A: T.Buffer[(16, 1), "float32"], C: T.Buffer[(16, 1), "floa
 
 @T.prim_func
 def transformed_trivial_pipeline(
-    A: T.Buffer[(16, 1), "float32"], C: T.Buffer[(16, 1), "float32"]
+    A: T.Buffer((16, 1), "float32"), C: T.Buffer((16, 1), "float32")
 ) -> None:
     for tx in T.thread_binding(16, thread="threadIdx.x"):
         with T.block():
@@ -94,7 +94,7 @@ def transformed_trivial_pipeline(
 
 def gen_simple_compute(num_stages):
     @T.prim_func
-    def simple_compute(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def simple_compute(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
             for i in T.serial(
                 0,
@@ -122,7 +122,7 @@ def simple_compute(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "floa
 
 @T.prim_func
 def transformed_simple_compute(
-    A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -153,7 +153,7 @@ def transformed_simple_compute(
 
 @T.prim_func
 def simple_compute_with_other_annotation(
-    A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -181,7 +181,7 @@ def simple_compute_with_other_annotation(
 
 @T.prim_func
 def transformed_simple_compute_with_other_annotation(
-    A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -215,7 +215,7 @@ def transformed_simple_compute_with_other_annotation(
 
 
 @T.prim_func
-def three_stage_compute(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]):
+def three_stage_compute(A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
             0,
@@ -246,7 +246,7 @@ def three_stage_compute(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16),
 
 @T.prim_func
 def transformed_three_stage_compute(
-    A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(16, thread="threadIdx.x"):
         with T.block():
@@ -300,9 +300,9 @@ def transformed_three_stage_compute(
 
 @T.prim_func
 def dag_interleaving(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -344,9 +344,9 @@ def dag_interleaving(
 
 @T.prim_func
 def transformed_dag_interleaving(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ) -> None:
     for tx in T.thread_binding(16, thread="threadIdx.x"):
         with T.block():
@@ -409,7 +409,7 @@ def transformed_dag_interleaving(
 
 @T.prim_func
 def nested_pipeline_simple(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -453,7 +453,7 @@ def nested_pipeline_simple(
 
 @T.prim_func
 def transformed_nested_pipeline_simple(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -530,7 +530,7 @@ def transformed_nested_pipeline_simple(
 
 @T.prim_func
 def nested_pipeline_prefetch_inner(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -574,7 +574,7 @@ def nested_pipeline_prefetch_inner(
 
 @T.prim_func
 def transformed_nested_pipeline_prefetch_inner(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -654,7 +654,7 @@ def transformed_nested_pipeline_prefetch_inner(
 
 @T.prim_func
 def nested_pipeline_interleaving(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -704,7 +704,7 @@ def nested_pipeline_interleaving(
 
 @T.prim_func
 def transformed_nested_pipeline_interleaving(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -813,7 +813,7 @@ def transformed_nested_pipeline_interleaving(
 
 @T.prim_func
 def nested_pipeline_double_buffer(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -864,7 +864,7 @@ def nested_pipeline_double_buffer(
 
 @T.prim_func
 def transformed_nested_pipeline_double_buffer(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -977,7 +977,7 @@ def transformed_nested_pipeline_double_buffer(
 
 @T.prim_func
 def simple_compute_incorrect_reorder(
-    A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -1009,7 +1009,7 @@ def simple_compute_incorrect_reorder(
 
 @T.prim_func
 def simple_compute_conflicting_order(
-    A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -1041,7 +1041,7 @@ def simple_compute_conflicting_order(
 
 @T.prim_func
 def simple_compute_missing_annotation(
-    A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(0, 16, annotations={"software_pipeline_stage": [0, 1]}):
@@ -1116,7 +1116,7 @@ def test_simple_compute_async():
     mod = tvm.tir.transform.InjectSoftwarePipeline()(sch.mod)
 
     @T.prim_func
-    def ref(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def ref(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         for tx in T.thread_binding(16, thread="threadIdx.x"):
             with T.block():
                 T.reads(A[tx, 0:16])
@@ -1163,7 +1163,7 @@ def ref(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
     mod = tvm.tir.transform.InjectSoftwarePipeline()(sch.mod)
 
     @T.prim_func
-    def ref(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]) -> None:
+    def ref(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")) -> None:
         for tx in T.thread_binding(16, thread="threadIdx.x"):
             with T.block():
                 T.reads(A[tx, 0:16])
@@ -1216,9 +1216,9 @@ def ref(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]) -> N
 def test_async_producer_interleaving():
     @T.prim_func
     def simple_compute(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 16), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 16), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ):
         for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
             for i in range(16):
@@ -1251,9 +1251,9 @@ def simple_compute(
 
     @T.prim_func
     def ref(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 16), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 16), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ) -> None:
         for tx in T.thread_binding(16, thread="threadIdx.x"):
             with T.block():
@@ -1330,7 +1330,7 @@ def test_three_stage_compute_two_stage_async():
     mod = tvm.tir.transform.InjectSoftwarePipeline()(sch.mod)
 
     @T.prim_func
-    def ref(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]) -> None:
+    def ref(A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")) -> None:
         for tx in T.thread_binding(16, thread="threadIdx.x"):
             with T.block():
                 T.reads(A[tx, 0:16])
diff --git a/tests/python/unittest/test_tir_transform_loop_partition.py b/tests/python/unittest/test_tir_transform_loop_partition.py
index 1a40f52140ee..b88f8d1e3e72 100644
--- a/tests/python/unittest/test_tir_transform_loop_partition.py
+++ b/tests/python/unittest/test_tir_transform_loop_partition.py
@@ -541,7 +541,7 @@ def test_simple_rfactor():
 
 @T.prim_func
 def partitioned_concat(
-    A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"], C: T.Buffer[(32,), "float32"]
+    A: T.Buffer((16,), "float32"), B: T.Buffer((16,), "float32"), C: T.Buffer((32,), "float32")
 ) -> None:
     T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
     for i in T.serial(0, 16):
@@ -578,10 +578,10 @@ def partition_from_scheduled_tir(prim_func, pass_cfg):
 
 @T.prim_func
 def partitioned_concat_3(
-    placeholder: T.Buffer[(1, 64, 28, 28), "int8"],
-    placeholder_1: T.Buffer[(1, 32, 28, 28), "int8"],
-    placeholder_2: T.Buffer[(1, 32, 28, 28), "int8"],
-    T_concat: T.Buffer[(1, 128, 28, 28), "int8"],
+    placeholder: T.Buffer((1, 64, 28, 28), "int8"),
+    placeholder_1: T.Buffer((1, 32, 28, 28), "int8"),
+    placeholder_2: T.Buffer((1, 32, 28, 28), "int8"),
+    T_concat: T.Buffer((1, 128, 28, 28), "int8"),
 ) -> None:
     placeholder_flat = T.Buffer([50176], "int8", data=placeholder.data)
     placeholder_1_flat = T.Buffer([25088], "int8", data=placeholder_1.data)
@@ -597,10 +597,10 @@ def partitioned_concat_3(
 
 @T.prim_func
 def concat_func_3(
-    placeholder: T.Buffer[(1, 64, 28, 28), "int8"],
-    placeholder_1: T.Buffer[(1, 32, 28, 28), "int8"],
-    placeholder_2: T.Buffer[(1, 32, 28, 28), "int8"],
-    T_concat: T.Buffer[(1, 128, 28, 28), "int8"],
+    placeholder: T.Buffer((1, 64, 28, 28), "int8"),
+    placeholder_1: T.Buffer((1, 32, 28, 28), "int8"),
+    placeholder_2: T.Buffer((1, 32, 28, 28), "int8"),
+    T_concat: T.Buffer((1, 128, 28, 28), "int8"),
 ) -> None:
     placeholder_flat = T.Buffer([50176], "int8", data=placeholder.data)
     placeholder_1_flat = T.Buffer([25088], "int8", data=placeholder_1.data)
@@ -630,7 +630,7 @@ def test_condition_mutually_exclusive():
 def test_loop_partition_unroll_hint():
     @T.prim_func
     def main(
-        A_arg: T.Buffer[(1, 3, 224, 224), "int8"], B_arg: T.Buffer[(1, 224, 7, 16), "int8"]
+        A_arg: T.Buffer((1, 3, 224, 224), "int8"), B_arg: T.Buffer((1, 224, 7, 16), "int8")
     ) -> None:
         A = T.Buffer(150528, "int8", data=A_arg.data)
         B = T.Buffer(25088, "int8", data=B_arg.data)
@@ -644,7 +644,7 @@ def main(
 
     @T.prim_func
     def partitioned_main(
-        A_arg: T.Buffer[(1, 3, 224, 224), "int8"], B_arg: T.Buffer[(1, 224, 7, 16), "int8"]
+        A_arg: T.Buffer((1, 3, 224, 224), "int8"), B_arg: T.Buffer((1, 224, 7, 16), "int8")
     ) -> None:
         A = T.Buffer(150528, dtype="int8", data=A_arg.data)
         B = T.Buffer(25088, dtype="int8", data=B_arg.data)
@@ -748,7 +748,7 @@ def partitioned_main():
 
 def test_loop_partition_keep_loop_annotations():
     @T.prim_func
-    def before(A: T.Buffer[160, "int32"], B: T.Buffer[160, "int32"]) -> None:
+    def before(A: T.Buffer(160, "int32"), B: T.Buffer(160, "int32")) -> None:
         for i in T.serial(
             160,
             annotations={"pragma_loop_partition_hint": True, "key": "value"},
@@ -761,7 +761,7 @@ def before(A: T.Buffer[160, "int32"], B: T.Buffer[160, "int32"]) -> None:
                 B[i] = A[i] + 3
 
     @T.prim_func
-    def after(A: T.Buffer[160, "int32"], B: T.Buffer[160, "int32"]) -> None:
+    def after(A: T.Buffer(160, "int32"), B: T.Buffer(160, "int32")) -> None:
         for i in T.serial(10, annotations={"key": "value"}):
             B[i] = A[i] + 1
         for i in T.serial(140, annotations={"key": "value"}):
@@ -783,10 +783,10 @@ def after(A: T.Buffer[160, "int32"], B: T.Buffer[160, "int32"]) -> None:
 def test_loop_partition_with_unit_loop_in_condition():
     @T.prim_func
     def before(
-        placeholder: T.Buffer[(50176,), "int8"],
-        placeholder_1: T.Buffer[(25088,), "int8"],
-        placeholder_2: T.Buffer[(25088,), "int8"],
-        T_concat: T.Buffer[(100352,), "int8"],
+        placeholder: T.Buffer((50176,), "int8"),
+        placeholder_1: T.Buffer((25088,), "int8"),
+        placeholder_2: T.Buffer((25088,), "int8"),
+        T_concat: T.Buffer((100352,), "int8"),
     ) -> None:
         for k in range(1, annotations={"preserve_unit_loop": True}):
             for i1 in T.serial(128, annotations={"pragma_loop_partition_hint": 1}):
@@ -804,10 +804,10 @@ def before(
 
     @T.prim_func
     def after(
-        placeholder: T.Buffer[50176, "int8"],
-        placeholder_1: T.Buffer[25088, "int8"],
-        placeholder_2: T.Buffer[25088, "int8"],
-        T_concat: T.Buffer[100352, "int8"],
+        placeholder: T.Buffer(50176, "int8"),
+        placeholder_1: T.Buffer(25088, "int8"),
+        placeholder_2: T.Buffer(25088, "int8"),
+        T_concat: T.Buffer(100352, "int8"),
     ) -> None:
         for _ in T.serial(1, annotations={"preserve_unit_loop": True}):
             for i1, i2, i3 in T.grid(64, 28, 28):
diff --git a/tests/python/unittest/test_tir_transform_lower_cross_thread_reduction.py b/tests/python/unittest/test_tir_transform_lower_cross_thread_reduction.py
index 2bf898e66b08..dc0a6ce62a66 100644
--- a/tests/python/unittest/test_tir_transform_lower_cross_thread_reduction.py
+++ b/tests/python/unittest/test_tir_transform_lower_cross_thread_reduction.py
@@ -333,7 +333,7 @@ def lowered_with_block_predicate(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def single_reduction_loop_with_block_predicate(
-    A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+    A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
 ) -> None:
     T_softmax_maxelem_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
     T_softmax_expsum_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
@@ -383,7 +383,7 @@ def single_reduction_loop_with_block_predicate(
 
 @T.prim_func
 def lowered_single_reduction_loop_with_block_predicate(
-    A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+    A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
 ) -> None:
     T_softmax_maxelem_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
     T_softmax_expsum_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
@@ -489,9 +489,9 @@ def lowered_single_reduction_loop_with_block_predicate(
 
 @T.prim_func
 def single_reduction_loop_with_tensorize(
-    input_A: T.Buffer[(1, 64, 7, 7, 32), "uint8"],
-    input_B: T.Buffer[(16, 64, 1, 1, 8, 32, 4), "int8"],
-    output: T.Buffer[(1, 16, 7, 7, 32), "int32"],
+    input_A: T.Buffer((1, 64, 7, 7, 32), "uint8"),
+    input_B: T.Buffer((16, 64, 1, 1, 8, 32, 4), "int8"),
+    output: T.Buffer((1, 16, 7, 7, 32), "int32"),
 ) -> None:
     # body
     # with T.block("root")
@@ -550,9 +550,9 @@ def single_reduction_loop_with_tensorize(
 
 @T.prim_func
 def nested_reduction_loop_with_inner_match_buffers(
-    in0: T.Buffer[(4, 16), "int8"],
-    in1: T.Buffer[(4, 16), "int8"],
-    out: T.Buffer[(4, 4), "int32"],
+    in0: T.Buffer((4, 16), "int8"),
+    in1: T.Buffer((4, 16), "int8"),
+    out: T.Buffer((4, 4), "int32"),
 ) -> None:
     # body
     # with T.block("root")
@@ -935,10 +935,10 @@ def lowered_softmax(var_A: T.handle, var_T_softmax_norm: T.handle) -> None:
 
 @T.prim_func
 def argmax_split(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0 in T.grid(128, 4):
         for i1_1 in T.thread_binding(32, thread="threadIdx.x"):
@@ -960,10 +960,10 @@ def argmax_split(
 
 @T.prim_func
 def lowered_argmax_split(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     cross_thread_argmax_v0 = T.alloc_buffer([1], dtype="int32", strides=[1], scope="local")
     cross_thread_argmax_v1 = T.alloc_buffer([1], dtype="float32", strides=[1], scope="local")
@@ -1026,10 +1026,10 @@ def lowered_argmax_split(
 
 @T.prim_func
 def argmin_split_init_update_reordered(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmin_v0: T.Buffer[(128,), "int32"],
-    argmin_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmin_v0: T.Buffer((128,), "int32"),
+    argmin_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0 in T.grid(128, 4):
         for i1_1 in T.thread_binding(32, thread="threadIdx.x"):
@@ -1051,10 +1051,10 @@ def argmin_split_init_update_reordered(
 
 @T.prim_func
 def lowered_argmin_split_init_update_reordered(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmin_v0: T.Buffer[(128,), "int32"],
-    argmin_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmin_v0: T.Buffer((128,), "int32"),
+    argmin_v1: T.Buffer((128,), "float32"),
 ) -> None:
     cross_thread_argmin_v0 = T.alloc_buffer([1], dtype="int32", strides=[1], scope="local")
     cross_thread_argmin_v1 = T.alloc_buffer([1], dtype="float32", strides=[1], scope="local")
@@ -1117,10 +1117,10 @@ def lowered_argmin_split_init_update_reordered(
 
 @T.prim_func
 def layer_norm_tuple_sum(
-    data: T.Buffer[(128, 768), "float32"],
-    gamma: T.Buffer[768, "float32"],
-    bias: T.Buffer[768, "float32"],
-    T_layer_norm: T.Buffer[(128, 768), "float32"],
+    data: T.Buffer((128, 768), "float32"),
+    gamma: T.Buffer(768, "float32"),
+    bias: T.Buffer(768, "float32"),
+    T_layer_norm: T.Buffer((128, 768), "float32"),
 ) -> None:
     data_red_temp_v0 = T.alloc_buffer([128], dtype="float32")
     data_red_temp_v1 = T.alloc_buffer([128], dtype="float32")
@@ -1172,10 +1172,10 @@ def layer_norm_tuple_sum(
 
 @T.prim_func
 def lowered_layer_norm_tuple_sum(
-    data: T.Buffer[(128, 768), "float32"],
-    gamma: T.Buffer[768, "float32"],
-    bias: T.Buffer[768, "float32"],
-    T_layer_norm: T.Buffer[(128, 768), "float32"],
+    data: T.Buffer((128, 768), "float32"),
+    gamma: T.Buffer(768, "float32"),
+    bias: T.Buffer(768, "float32"),
+    T_layer_norm: T.Buffer((128, 768), "float32"),
 ) -> None:
     # with T.block("root")
     data_red_temp_v0 = T.alloc_buffer([128], dtype="float32")
diff --git a/tests/python/unittest/test_tir_transform_lower_opaque_block.py b/tests/python/unittest/test_tir_transform_lower_opaque_block.py
index 824cef174055..a7502edd31ab 100644
--- a/tests/python/unittest/test_tir_transform_lower_opaque_block.py
+++ b/tests/python/unittest/test_tir_transform_lower_opaque_block.py
@@ -236,7 +236,7 @@ def compacted_strided_buffer_func(a: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def transformed_strided_buffer_func(
-    A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")
 ) -> None:
     # body
     for i0 in T.serial(4):
@@ -256,7 +256,7 @@ def annotated_loops(a: T.handle) -> None:
 
 
 @T.prim_func
-def boolean_handling_before(a: T.Buffer[10, "bool"], b: T.Buffer[10, "bool"]) -> None:
+def boolean_handling_before(a: T.Buffer(10, "bool"), b: T.Buffer(10, "bool")) -> None:
     for i0 in T.serial(10):
         with T.block("b"):
             T.reads(a[i0])
@@ -265,7 +265,7 @@ def boolean_handling_before(a: T.Buffer[10, "bool"], b: T.Buffer[10, "bool"]) ->
 
 
 @T.prim_func
-def boolean_handling_after(a: T.Buffer[10, "bool"], b: T.Buffer[10, "bool"]) -> None:
+def boolean_handling_after(a: T.Buffer(10, "bool"), b: T.Buffer(10, "bool")) -> None:
     # body
     for i0 in T.serial(10):
         b[i0] = a[i0]
@@ -342,14 +342,14 @@ def annotated_block() -> None:
 
 def test_preserved_annotations():
     @T.prim_func
-    def before(A: T.Buffer[8, "float32"], B: T.Buffer[8, "float32"]):
+    def before(A: T.Buffer(8, "float32"), B: T.Buffer(8, "float32")):
         for i in T.serial(8, annotations={"k_0": 1, "k_1": [2, 3], "k_2": 3.14}):
             with T.block("block"):
                 T.block_attr({"k_3": "oops"})
                 B[i] = A[i] + 1.0
 
     @T.prim_func
-    def after(A: T.Buffer[8, "float32"], B: T.Buffer[8, "float32"]):
+    def after(A: T.Buffer(8, "float32"), B: T.Buffer(8, "float32")):
         for i in T.serial(8, annotations={"k_0": 1, "k_1": [2, 3], "k_2": 3.14}):
             B[i] = A[i] + 1.0
 
diff --git a/tests/python/unittest/test_tir_transform_manifest_shared_memory_local_stage.py b/tests/python/unittest/test_tir_transform_manifest_shared_memory_local_stage.py
index 111b91d5fd54..15d7118fb8a9 100644
--- a/tests/python/unittest/test_tir_transform_manifest_shared_memory_local_stage.py
+++ b/tests/python/unittest/test_tir_transform_manifest_shared_memory_local_stage.py
@@ -26,7 +26,7 @@
 @tvm.script.ir_module
 class MatmulBefore:
     @T.prim_func
-    def main(A: T.Buffer[(1024, 1024), "float32"], B: T.Buffer[(1024, 1024), "float32"], C: T.Buffer[(1024, 1024), "float32"]) -> None:
+    def main(A: T.Buffer((1024, 1024), "float32"), B: T.Buffer((1024, 1024), "float32"), C: T.Buffer((1024, 1024), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "default_function", "tir.noalias": True})
         # body
@@ -67,7 +67,7 @@ def main(A: T.Buffer[(1024, 1024), "float32"], B: T.Buffer[(1024, 1024), "float3
 @tvm.script.ir_module
 class MatmulAfter:
     @T.prim_func
-    def main(A: T.Buffer[(1024, 1024), "float32"], B: T.Buffer[(1024, 1024), "float32"], C: T.Buffer[(1024, 1024), "float32"]) -> None:
+    def main(A: T.Buffer((1024, 1024), "float32"), B: T.Buffer((1024, 1024), "float32"), C: T.Buffer((1024, 1024), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "default_function", "tir.noalias": True})
         # body
diff --git a/tests/python/unittest/test_tir_transform_narrow_datatype.py b/tests/python/unittest/test_tir_transform_narrow_datatype.py
index c9c513378595..56b63c889335 100644
--- a/tests/python/unittest/test_tir_transform_narrow_datatype.py
+++ b/tests/python/unittest/test_tir_transform_narrow_datatype.py
@@ -117,7 +117,7 @@ def test_thread_axis_2():
     @tvm.script.ir_module
     class Before:
         @T.prim_func
-        def main(T_reshape: T.Buffer[(1, 12, 384, 384), "float32"], placeholder_1: T.Buffer[(T.int64(1), T.int64(12), T.int64(384), 384), "bool"], T_where: T.Buffer[(T.int64(1), T.int64(12), T.int64(384), 384), "float32"]) -> None:
+        def main(T_reshape: T.Buffer((1, 12, 384, 384), "float32"), placeholder_1: T.Buffer((T.int64(1), T.int64(12), T.int64(384), 384), "bool"), T_where: T.Buffer((T.int64(1), T.int64(12), T.int64(384), 384), "float32")) -> None:
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
             # body
@@ -299,7 +299,7 @@ def test_ramp_dtype_consistency():
 
 def test_condition():
     @T.prim_func
-    def before(A: T.Buffer[(128,), "float32"], B: T.Buffer[(130,), "float32"]):
+    def before(A: T.Buffer((128,), "float32"), B: T.Buffer((130,), "float32")):
         for i, j in T.grid(T.int64(2), T.int64(65)):
             if i * T.int64(65) + j >= T.int64(0) and i * T.int64(65) + j < T.int64(128):
                 A[i * T.int64(65) + j] = 0.0
@@ -312,7 +312,7 @@ def before(A: T.Buffer[(128,), "float32"], B: T.Buffer[(130,), "float32"]):
             )
 
     @T.prim_func
-    def expected_after(A: T.Buffer[128, "float32"], B: T.Buffer[130, "float32"]):
+    def expected_after(A: T.Buffer(128, "float32"), B: T.Buffer(130, "float32")):
         for i, j in T.grid(2, 65):
             if i * 65 + j >= 0 and i * 65 + j < 128:
                 A[i * 65 + j] = T.float32(0)
@@ -327,7 +327,7 @@ def expected_after(A: T.Buffer[128, "float32"], B: T.Buffer[130, "float32"]):
 
 def test_block():
     @T.prim_func
-    def before(A: T.Buffer[(128,), "float32"], B: T.Buffer[(128,), "float32"]):
+    def before(A: T.Buffer((128,), "float32"), B: T.Buffer((128,), "float32")):
         for i in T.serial(0, T.int64(16)):
             for j in T.serial(0, T.int64(8)):
                 with T.block():
@@ -335,7 +335,7 @@ def before(A: T.Buffer[(128,), "float32"], B: T.Buffer[(128,), "float32"]):
                     B[vi] = A[vi] + T.float32(1)
 
     @T.prim_func
-    def expected_after(A: T.Buffer[(128,), "float32"], B: T.Buffer[(128,), "float32"]):
+    def expected_after(A: T.Buffer((128,), "float32"), B: T.Buffer((128,), "float32")):
         for i in T.serial(0, T.int32(16)):
             for j in T.serial(0, T.int32(8)):
                 with T.block():
diff --git a/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py b/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py
index e1750e73b43e..05d71de5bca6 100644
--- a/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py
+++ b/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py
@@ -254,7 +254,7 @@ def test_loop_carried_dependency():
     and the allocate buffer should keep the order."""
 
     @T.prim_func
-    def before(A: T.Buffer[(8, 8, 8), "int32"], B: T.Buffer[(8, 8, 8), "int32"]):
+    def before(A: T.Buffer((8, 8, 8), "int32"), B: T.Buffer((8, 8, 8), "int32")):
         C = T.alloc_buffer([8, 8, 8], dtype="int32")
         D = T.alloc_buffer([8, 8, 8], dtype="int32")
         for i in T.serial(8):
@@ -278,7 +278,7 @@ def before(A: T.Buffer[(8, 8, 8), "int32"], B: T.Buffer[(8, 8, 8), "int32"]):
                         )
 
     @T.prim_func
-    def after(A: T.Buffer[(8, 8, 8), "int32"], B: T.Buffer[(8, 8, 8), "int32"]) -> None:
+    def after(A: T.Buffer((8, 8, 8), "int32"), B: T.Buffer((8, 8, 8), "int32")) -> None:
         for i in T.serial(8):
             with T.block():
                 T.reads(A[i, 0:8, 0:8])
@@ -312,7 +312,7 @@ def test_1D_cascade_op_rolling_buffer():
     which is marked as opaque in consumer block's iter mappings."""
 
     @T.prim_func
-    def before(A: T.Buffer[(4, 16), "int32"], C: T.Buffer[(4, 8), "int32"]):
+    def before(A: T.Buffer((4, 16), "int32"), C: T.Buffer((4, 8), "int32")):
         B = T.alloc_buffer((4, 6), "int32")
         for c in T.serial(4):
             for i in T.serial(0, 2):
@@ -338,7 +338,7 @@ def before(A: T.Buffer[(4, 16), "int32"], C: T.Buffer[(4, 8), "int32"]):
                             )
 
     @T.prim_func
-    def after(A: T.Buffer[(4, 16), "int32"], C: T.Buffer[(4, 8), "int32"]):
+    def after(A: T.Buffer((4, 16), "int32"), C: T.Buffer((4, 8), "int32")):
         for c in T.serial(4):
             with T.block():
                 T.reads(A[c, 0:12], C[c, 0:8])
diff --git a/tests/python/unittest/test_tir_transform_reduce_branching_through_overcompute.py b/tests/python/unittest/test_tir_transform_reduce_branching_through_overcompute.py
index 13fbcc7594ec..ae82afa650e3 100644
--- a/tests/python/unittest/test_tir_transform_reduce_branching_through_overcompute.py
+++ b/tests/python/unittest/test_tir_transform_reduce_branching_through_overcompute.py
@@ -46,7 +46,7 @@ class TestIntroduceNoOp(BaseBeforeAfter):
     to the then_case, then the conditional can be removed.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 14:
                 A[i] = 1
@@ -54,7 +54,7 @@ def before(A: T.Buffer[16, "int32"]):
             else:
                 A[i] = 1
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 1
             T.evaluate(0)
@@ -70,12 +70,12 @@ class TestIntroduceAdditionOfZero(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         for i in T.serial(16):
             if i > 0:
                 A[0] = A[0] + i * i
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         for i in T.serial(16):
             A[0] = A[0] + i * i
 
@@ -89,7 +89,7 @@ class TestIntroduceAdditionOfKnownZeroInBuffer(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"], B: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(16, "int32"), B: T.Buffer(1, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(i < 14 or A[i] == 0))
 
@@ -98,7 +98,7 @@ def before(A: T.Buffer[16, "int32"], B: T.Buffer[1, "int32"]):
             if i < 14:
                 B[0] = B[0] + A[i]
 
-    def expected(A: T.Buffer[16, "int32"], B: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(16, "int32"), B: T.Buffer(1, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(i < 14 or A[i] == 0))
 
@@ -118,7 +118,7 @@ class TestIntroduceOverwrittenWrite(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 14:
                 A[i] = 1
@@ -127,7 +127,7 @@ def before(A: T.Buffer[16, "int32"]):
             if i >= 14:
                 A[i] = 2
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 1
 
@@ -145,7 +145,7 @@ class TestMaintainValuesUsedLater(BaseBeforeAfter):
     not be valid.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 14:
                 A[i] = 1
@@ -167,7 +167,7 @@ class TestIdentifyOverwrittenWriteFromEquivalentExpressions(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 14:
                 A[i] = 1
@@ -176,7 +176,7 @@ def before(A: T.Buffer[16, "int32"]):
             if io == 3 and ii >= 2:
                 A[4 * io + ii] = 2
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 1
 
@@ -197,7 +197,7 @@ class TestIntroduceSupersetOverwrittenWrite(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 14:
                 A[i] = 1
@@ -206,7 +206,7 @@ def before(A: T.Buffer[16, "int32"]):
             if i >= 14:
                 A[i] = 2
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 1
 
diff --git a/tests/python/unittest/test_tir_transform_remove_assume.py b/tests/python/unittest/test_tir_transform_remove_assume.py
index a2d68a075790..542d5c7a1020 100644
--- a/tests/python/unittest/test_tir_transform_remove_assume.py
+++ b/tests/python/unittest/test_tir_transform_remove_assume.py
@@ -30,25 +30,25 @@ def transform(self):
 class TestRemoveAssume(BaseBeforeAfter):
     """Remove any instance of T.assume"""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 5))
         A[0] = 10
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         A[0] = 10
 
 
 class TestRemoveAssumeLoop(BaseBeforeAfter):
     """Loops containing only T.assume should be removed"""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == 0))
 
         for i in T.serial(16):
             A[i] = 10
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 10
 
diff --git a/tests/python/unittest/test_tir_transform_remove_no_op.py b/tests/python/unittest/test_tir_transform_remove_no_op.py
index 06d9289aa795..15c5a577f9f5 100644
--- a/tests/python/unittest/test_tir_transform_remove_no_op.py
+++ b/tests/python/unittest/test_tir_transform_remove_no_op.py
@@ -74,7 +74,7 @@ def test_remove_no_op():
 
 def test_remove_no_op_with_invalid_extent():
     @T.prim_func
-    def main(A: T.Buffer[(16), "int32"], B: T.Buffer[(16), "int32"]) -> None:
+    def main(A: T.Buffer((16), "int32"), B: T.Buffer((16), "int32")) -> None:
         for i in T.serial(16):
             for j in T.serial(i - 20):
                 B[i] = A[i] + j
@@ -115,23 +115,23 @@ def expected():
 class TestRemoveZeroExtentLoop(BaseBeforeAfter):
     """A for-loop with no extent is a no-op."""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(0):
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         T.evaluate(0)
 
 
 class TestRemoveUnusedLet(BaseBeforeAfter):
     """A let statement that is never used is a no-op."""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         x = 5
         for i in T.serial(16):
             A[i] = 0
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 0
 
@@ -143,12 +143,12 @@ class TestRemoveLetUsedOnlyInNoOp(BaseBeforeAfter):
     may have been removed by an earlier removal of another no-op.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         x = 5
         for i in T.serial(0):
             A[i] = x
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         T.evaluate(0)
 
 
@@ -166,14 +166,14 @@ def expected():
 class TestRemoveEmptyThenCase(BaseBeforeAfter):
     """A no-op then_case can be removed."""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 8:
                 T.evaluate(0)
             else:
                 A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if not (i < 8):
                 A[i] = 42
@@ -182,14 +182,14 @@ def expected(A: T.Buffer[16, "int32"]):
 class TestRemoveEmptyElseCase(BaseBeforeAfter):
     """A no-op else_case can be removed."""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 8:
                 A[i] = 42
             else:
                 T.evaluate(0)
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 8:
                 A[i] = 42
@@ -200,12 +200,12 @@ class TestRemoveUnusedWrite(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 42
 
@@ -218,7 +218,7 @@ class TestSuppressRemovalOfUnusedWrite(BaseBeforeAfter):
 
     use_dataflow_analysis = False
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
             A[i] = 42
@@ -231,12 +231,12 @@ class TestKeepSideEffectsOfUnusedWrite(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = T.call_extern("extern_func", dtype="int32")
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.call_extern("extern_func", dtype="int32"))
             A[i] = 42
@@ -245,7 +245,7 @@ def expected(A: T.Buffer[16, "int32"]):
 class TestKeepFirstWriteWhenUsed(BaseBeforeAfter):
     """For two sequential writes, keep the first if it is used"""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
             A[i] = A[i] + 1
@@ -261,14 +261,14 @@ class TestRemoveOverwrittenLoop(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
 
         for i in T.serial(16):
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 42
 
@@ -283,14 +283,14 @@ class TestRemoveOverwrittenSubloop(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(4, 12):
             A[i] = 100
 
         for i in T.serial(16):
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 42
 
@@ -302,7 +302,7 @@ class TestKeepPartiallyOverwrittenLoop(BaseBeforeAfter):
     may not be removed be kept.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
 
@@ -323,7 +323,7 @@ class TestRemoveOverwrittenPredicatedLoopWithIdenticalCondition(BaseBeforeAfter)
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 12:
                 A[i] = 100
@@ -332,7 +332,7 @@ def before(A: T.Buffer[16, "int32"]):
             if i < 12:
                 A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 12:
                 A[i] = 42
@@ -351,7 +351,7 @@ class TestRemoveOverwrittenPredicatedLoopWithProvableCondition(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 10:
                 A[i] = 100
@@ -360,7 +360,7 @@ def before(A: T.Buffer[16, "int32"]):
             if i // 4 < 3:
                 A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i // 4 < 3:
                 A[i] = 42
@@ -375,7 +375,7 @@ class TestRemoveSeparatedOverwrites(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
 
@@ -385,7 +385,7 @@ def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
         for i in T.serial(16):
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             B[i] = 0
 
@@ -404,7 +404,7 @@ class TestRemoveSeparatedOverwriteOfPredicatedLoop(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 12:
                 A[i] = 100
@@ -417,7 +417,7 @@ def before(A: T.Buffer[16, "int32"]):
             if i < 12:
                 A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i > 12:
                 A[i] = 15
@@ -430,17 +430,17 @@ def expected(A: T.Buffer[16, "int32"]):
 class TestRemoveReadWrite(BaseBeforeAfter):
     """Writing a value to the same location as was just read is a no-op."""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = A[0]
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(0)
 
 
 class TestKeepReadWriteToDifferentIndices(BaseBeforeAfter):
     """Writing a value to a different index should not be removed"""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(15):
             A[i] = A[i + 1]
 
@@ -455,12 +455,12 @@ class TestRemoveReadWriteSameIndexDifferentExpression(BaseBeforeAfter):
     expression.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for io, ii in T.grid(4, 4):
             i = 4 * io + ii
             A[4 * io + ii] = A[i]
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         T.evaluate(0)
 
 
@@ -472,14 +472,14 @@ class TestRemoveReadWriteSameIndexUsingConstraint(BaseBeforeAfter):
     that is known from a conditional containing the read/write.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i != 0:
                 A[i] = A[i - 1]
             else:
                 A[i] = A[0]
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i != 0:
                 A[i] = A[i - 1]
@@ -490,13 +490,13 @@ class TestRemoveWritingOfKnownValue(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
         A[4] = 4
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
@@ -513,14 +513,14 @@ class TestKeepOneOfDuplicateLoops(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
         for i in T.serial(16):
             A[i] = i
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
@@ -540,12 +540,12 @@ def expected():
 class TestRemoveUnusedTemporary(BaseBeforeAfter):
     """An unused allocation is a no-op."""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         B = T.allocate([16], "int32", "local")
         for i in T.serial(16):
             A[i] = 1
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 1
 
@@ -566,7 +566,7 @@ def expected():
 class TestKeepUsedWriteIntoTemporary(BaseBeforeAfter):
     """A write into a temporary that is used later must be kept."""
 
-    def before(B: T.Buffer[16, "int32"]):
+    def before(B: T.Buffer(16, "int32")):
         A = T.decl_buffer([16], "int32", scope="local")
         for i in T.serial(16):
             A[i] = 0
@@ -581,7 +581,7 @@ def before(B: T.Buffer[16, "int32"]):
 class TestRemoveWriteIntoTemporary(BaseBeforeAfter):
     """A write that only impacts a temporary allocation is a no-op."""
 
-    def before(A: T.Buffer[16, "int32"], C: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(16, "int32"), C: T.Buffer(1, "int32")):
         B = T.decl_buffer([16], "int32", scope="local")
         for i in T.serial(16):
             B[i] = A[i]
@@ -593,7 +593,7 @@ def before(A: T.Buffer[16, "int32"], C: T.Buffer[1, "int32"]):
         for i in T.serial(16):
             B[i] = 0
 
-    def expected(A: T.Buffer[16, "int32"], C: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(16, "int32"), C: T.Buffer(1, "int32")):
         B = T.decl_buffer([16], "int32", scope="local")
         for i in T.serial(16):
             B[i] = A[i]
diff --git a/tests/python/unittest/test_tir_transform_remove_undef.py b/tests/python/unittest/test_tir_transform_remove_undef.py
index c634bf5e9da8..66a6198723c1 100644
--- a/tests/python/unittest/test_tir_transform_remove_undef.py
+++ b/tests/python/unittest/test_tir_transform_remove_undef.py
@@ -30,27 +30,27 @@ def transform(self):
 class TestRemoveStoreUndef(BaseBeforeAfter):
     """Remove a store whose value is T.undef()"""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = T.undef(dtype="int32")
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(0)
 
 
 class TestRemoveStoreUndefExpression(BaseBeforeAfter):
     """Expressions containing T.undef() are removed"""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = 1 + T.undef(dtype="int32")
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(0)
 
 
 class TestKeepOtherCallNodes(BaseBeforeAfter):
     """Expressions containing other CallNodes are not removed"""
 
-    def before(A: T.Buffer[1, "int32"], n: T.int32):
+    def before(A: T.Buffer(1, "int32"), n: T.int32):
         A[0] = T.shift_left(n, 1, dtype="int32")
 
     expected = before
@@ -59,18 +59,18 @@ def before(A: T.Buffer[1, "int32"], n: T.int32):
 class TestRemoveLetUndef(BaseBeforeAfter):
     """Remove a store whose value is bound to T.undef()"""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         val = T.undef(dtype="int32")
         A[0] = val
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(0)
 
 
 class TestRaiseErrorForUndefAsStoreIndices(BaseBeforeAfter):
     """Use of T.undef() as buffer indices is an error"""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         val = T.undef(dtype="int32")
         A[val] = 5
 
@@ -84,7 +84,7 @@ class TestRaiseErrorForUndefAsLoadIndices(BaseBeforeAfter):
     T.undef() may not appear in a buffer's indices.
     """
 
-    def before(A: T.Buffer[1, "int32"], B: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32"), B: T.Buffer(1, "int32")):
         B[0] = A[T.undef(dtype="int32")]
 
     expected = TVMError
diff --git a/tests/python/unittest/test_tir_transform_remove_weight_layout_rewrite_block.py b/tests/python/unittest/test_tir_transform_remove_weight_layout_rewrite_block.py
index 7a014283816f..6d6e0da71cc5 100644
--- a/tests/python/unittest/test_tir_transform_remove_weight_layout_rewrite_block.py
+++ b/tests/python/unittest/test_tir_transform_remove_weight_layout_rewrite_block.py
@@ -35,9 +35,9 @@ def _check(before, expect):
 def test_matmul():
     @T.prim_func
     def before(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 16), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 16), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [1]})
         B_ = T.alloc_buffer([16, 4, 4], dtype="float32")
@@ -61,9 +61,9 @@ def before(
 
     @T.prim_func
     def after(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 4, 4), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 4, 4), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [1]})
         for i0_o, i1_o in T.grid(16, 16):
diff --git a/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py b/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py
index 5cdc272440e7..057cfc42e4ec 100644
--- a/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py
+++ b/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py
@@ -25,7 +25,7 @@
 @tvm.script.ir_module
 class Before:
     @T.prim_func
-    def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def main(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         inputs_flat = T.Buffer([8192], dtype="float32", data=inputs.data)
@@ -56,7 +56,7 @@ def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 51
 @tvm.script.ir_module
 class After:
     @T.prim_func
-    def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def main(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         inputs_flat = T.Buffer([8192], dtype="float32", data=inputs.data)
@@ -87,7 +87,7 @@ def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 51
 @tvm.script.ir_module
 class After_simplified:
     @T.prim_func
-    def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def main(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # var definition
diff --git a/tests/python/unittest/test_tir_transform_simplify.py b/tests/python/unittest/test_tir_transform_simplify.py
index 1ddc0e50d98f..b50035aa69d4 100644
--- a/tests/python/unittest/test_tir_transform_simplify.py
+++ b/tests/python/unittest/test_tir_transform_simplify.py
@@ -164,10 +164,10 @@ def inner(mod):
 class TestLoadStoreNoop(BaseBeforeAfter):
     """Store of a value that was just read from the same location is a no-op."""
 
-    def before(A: T.Buffer[(1,), "float32"]):
+    def before(A: T.Buffer((1,), "float32")):
         A[0] = A[0]
 
-    def expected(A: T.Buffer[(1,), "float32"]):
+    def expected(A: T.Buffer((1,), "float32")):
         T.evaluate(0)
 
 
@@ -180,10 +180,10 @@ class TestLoadStoreNoopAfterSimplify(BaseBeforeAfter):
     regression.
     """
 
-    def before(A: T.Buffer[(1,), "float32"]):
+    def before(A: T.Buffer((1,), "float32")):
         A[0] = A[0] + (5.0 - 5.0)
 
-    def expected(A: T.Buffer[(1,), "float32"]):
+    def expected(A: T.Buffer((1,), "float32")):
         T.evaluate(0)
 
 
@@ -195,13 +195,13 @@ class TestNestedCondition(BaseBeforeAfter):
     constraint.
     """
 
-    def before(A: T.Buffer[(16,), "float32"]):
+    def before(A: T.Buffer((16,), "float32")):
         for i in T.serial(16):
             if i == 5:
                 if i == 5:
                     A[i] = 0.0
 
-    def expected(A: T.Buffer[(16,), "float32"]):
+    def expected(A: T.Buffer((16,), "float32")):
         for i in T.serial(16):
             if i == 5:
                 A[i] = 0.0
@@ -214,13 +214,13 @@ class TestNestedProvableCondition(BaseBeforeAfter):
     conditional.
     """
 
-    def before(A: T.Buffer[(16,), "float32"]):
+    def before(A: T.Buffer((16,), "float32")):
         for i in T.serial(16):
             if i == 5:
                 if i < 7:
                     A[i] = 0.0
 
-    def expected(A: T.Buffer[(16,), "float32"]):
+    def expected(A: T.Buffer((16,), "float32")):
         for i in T.serial(16):
             if i == 5:
                 A[i] = 0.0
@@ -233,13 +233,13 @@ class TestNestedVarCondition(BaseBeforeAfter):
     constraint.
     """
 
-    def before(A: T.Buffer[(16,), "float32"], n: T.int32):
+    def before(A: T.Buffer((16,), "float32"), n: T.int32):
         for i in T.serial(16):
             if i == n:
                 if i == n:
                     A[i] = 0.0
 
-    def expected(A: T.Buffer[(16,), "float32"], n: T.int32):
+    def expected(A: T.Buffer((16,), "float32"), n: T.int32):
         for i in T.serial(16):
             if i == n:
                 A[i] = 0.0
@@ -254,7 +254,7 @@ class TestAlteredBufferContents(BaseBeforeAfter):
     may not.
     """
 
-    def before(A: T.Buffer[(1,), "int32"], n: T.int32):
+    def before(A: T.Buffer((1,), "int32"), n: T.int32):
         if A[0] == n:
             A[0] = A[0] + 1
             if A[0] == n:
@@ -270,7 +270,7 @@ class TestNegationOfCondition(BaseBeforeAfter):
     condition is known to be false.
     """
 
-    def before(A: T.Buffer[(16,), "int32"]):
+    def before(A: T.Buffer((16,), "int32")):
         for i in T.serial(16):
             if i == 5:
                 if i != 5:
@@ -278,7 +278,7 @@ def before(A: T.Buffer[(16,), "int32"]):
                 else:
                     A[i] = 1
 
-    def expected(A: T.Buffer[(16,), "int32"]):
+    def expected(A: T.Buffer((16,), "int32")):
         for i in T.serial(16):
             if i == 5:
                 A[i] = 1
@@ -293,7 +293,7 @@ class TestNegationOfNotEqual(BaseBeforeAfter):
     ``i==5`` as the negation of a literal constraint.
     """
 
-    def before(A: T.Buffer[(16,), "int32"]):
+    def before(A: T.Buffer((16,), "int32")):
         for i in T.serial(16):
             if i != 5:
                 if i == 5:
@@ -301,7 +301,7 @@ def before(A: T.Buffer[(16,), "int32"]):
                 else:
                     A[i] = 1
 
-    def expected(A: T.Buffer[(16,), "int32"]):
+    def expected(A: T.Buffer((16,), "int32")):
         for i in T.serial(16):
             if i != 5:
                 A[i] = 1
@@ -314,7 +314,7 @@ class TestNegationOfVarCondition(BaseBeforeAfter):
     must rely on RewriteSimplifier recognizing the repeated literal.
     """
 
-    def before(A: T.Buffer[(16,), "int32"], n: T.int32):
+    def before(A: T.Buffer((16,), "int32"), n: T.int32):
         for i in T.serial(16):
             if i == n:
                 if i != n:
@@ -322,7 +322,7 @@ def before(A: T.Buffer[(16,), "int32"], n: T.int32):
                 else:
                     A[i] = 1
 
-    def expected(A: T.Buffer[(16,), "int32"], n: T.int32):
+    def expected(A: T.Buffer((16,), "int32"), n: T.int32):
         for i in T.serial(16):
             if i == n:
                 A[i] = 1
@@ -337,13 +337,13 @@ class TestLiteralConstraintSplitBooleanAnd(BaseBeforeAfter):
     the condition is to ensure we exercise RewriteSimplifier.
     """
 
-    def before(A: T.Buffer[(16, 16), "int32"], n: T.int32):
+    def before(A: T.Buffer((16, 16), "int32"), n: T.int32):
         for i, j in T.grid(16, 16):
             if i == n and j == n:
                 if i == n:
                     A[i, j] = 0
 
-    def expected(A: T.Buffer[(16, 16), "int32"], n: T.int32):
+    def expected(A: T.Buffer((16, 16), "int32"), n: T.int32):
         for i, j in T.grid(16, 16):
             if i == n and j == n:
                 A[i, j] = 0
@@ -360,7 +360,7 @@ class TestLiteralConstraintSplitBooleanOr(BaseBeforeAfter):
     RewriteSimplifier.
     """
 
-    def before(A: T.Buffer[(16, 16), "int32"], n: T.int32):
+    def before(A: T.Buffer((16, 16), "int32"), n: T.int32):
         for i, j in T.grid(16, 16):
             if i == n or j == n:
                 A[i, j] = 0
@@ -370,7 +370,7 @@ def before(A: T.Buffer[(16, 16), "int32"], n: T.int32):
                 else:
                     A[i, j] = 2
 
-    def expected(A: T.Buffer[(16, 16), "int32"], n: T.int32):
+    def expected(A: T.Buffer((16, 16), "int32"), n: T.int32):
         for i, j in T.grid(16, 16):
             if i == n or j == n:
                 A[i, j] = 0
@@ -387,14 +387,14 @@ class TestProveConditionUsingLet(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[4, "bool"]):
+    def before(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             if condition or i >= 3:
                 A[i] = condition
 
     @T.prim_func
-    def expected(A: T.Buffer[4, "bool"]):
+    def expected(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             A[i] = condition
@@ -409,7 +409,7 @@ class TestProveLetCondition(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[4, "bool"]):
+    def before(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             if i < 3:
@@ -417,7 +417,7 @@ def before(A: T.Buffer[4, "bool"]):
                     A[i] = condition
 
     @T.prim_func
-    def expected(A: T.Buffer[4, "bool"]):
+    def expected(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             if i < 3:
@@ -432,7 +432,7 @@ class TestProveRepeatedLetCondition(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[4, "bool"]):
+    def before(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             if condition:
@@ -440,7 +440,7 @@ def before(A: T.Buffer[4, "bool"]):
                     A[i] = condition
 
     @T.prim_func
-    def expected(A: T.Buffer[4, "bool"]):
+    def expected(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             if condition:
@@ -449,13 +449,13 @@ def expected(A: T.Buffer[4, "bool"]):
 
 class TestIfThenElseExpr(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if i < 12:
                 A[i] = T.if_then_else(i < 12, 1.0, 2.0, dtype="float32")
 
     @T.prim_func
-    def expected(A: T.Buffer[16, "float32"]):
+    def expected(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if i < 12:
                 A[i] = 1.0
@@ -465,13 +465,13 @@ class TestCeilLog2Int(BaseBeforeAfter):
     """Simplify expressions resulting from topi.math.ceil_log2"""
 
     @T.prim_func
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = T.cast(
             T.ceil(T.log2(T.cast(14, "float64"), dtype="float64"), dtype="float64"), dtype="int32"
         )
 
     @T.prim_func
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         A[0] = 4
 
 
@@ -479,7 +479,7 @@ class TestLeftCeilLog2LowerBound(BaseBeforeAfter):
     """Integer bounds are propagated through topi.math.ceil_log2"""
 
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             x = T.cast(
                 T.ceil(T.log2(T.cast(i + 1024 + 1, "float64"), dtype="float64"), dtype="float64"),
@@ -489,7 +489,7 @@ def before(A: T.Buffer[16, "float32"]):
                 A[i] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[16, "float32"]):
+    def expected(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             A[i] = 0.0
 
@@ -503,13 +503,13 @@ class TestLeftShiftLowerBound(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if T.shift_left(1, i, dtype="int32") >= 1:
                 A[i] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[16, "float32"]):
+    def expected(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             A[i] = 0.0
 
@@ -523,13 +523,13 @@ class TestLeftShiftUpperBound(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if T.shift_left(31, i, dtype="int32") <= 1015808:
                 A[i] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[16, "float32"]):
+    def expected(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             A[i] = 0.0
 
@@ -543,7 +543,7 @@ class TestLeftShiftOfNegativeValue(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if -64 <= T.shift_left(-i, 4, dtype="int32"):
                 A[i] = 0.0
@@ -560,7 +560,7 @@ class TestLeftShiftByNegativeValue(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if T.shift_left(16, -i, dtype="int32") <= 16:
                 A[i] = 0.0
@@ -651,7 +651,7 @@ def before(self, test_case):
         priors, postulate, _ = test_case
 
         @T.prim_func
-        def func(A: T.Buffer[1, "bool"]):
+        def func(A: T.Buffer(1, "bool")):
             if priors:
                 A[0] = postulate
 
@@ -667,7 +667,7 @@ def expected(self, test_case):
         if provable:
 
             @T.prim_func
-            def func(A: T.Buffer[1, "bool"]):
+            def func(A: T.Buffer(1, "bool")):
                 if priors:
                     A[0] = True
 
@@ -677,7 +677,7 @@ def func(A: T.Buffer[1, "bool"]):
             postulate = analyzer.canonical_simplify(postulate)
 
             @T.prim_func
-            def func(A: T.Buffer[1, "bool"]):
+            def func(A: T.Buffer(1, "bool")):
                 if priors:
                     A[0] = postulate
 
@@ -687,7 +687,7 @@ def func(A: T.Buffer[1, "bool"]):
 class TestSuppressTransitivelyProvableCondition(BaseBeforeAfter):
     transitively_prove_inequalities = False
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         if i < j and j < k:
             A[0] = i < k
 
@@ -699,10 +699,10 @@ class TestRewriteAsAndOfOrs(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[3, "bool"]):
+    def before(A: T.Buffer(3, "bool")):
         T.evaluate(A[0] or (A[1] and A[2]))
 
-    def expected(A: T.Buffer[3, "bool"]):
+    def expected(A: T.Buffer(3, "bool")):
         T.evaluate((A[0] or A[1]) and (A[0] or A[2]))
 
 
@@ -711,7 +711,7 @@ class TestSuppressRewriteAsAndOfOrs(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = False
 
-    def before(A: T.Buffer[3, "bool"]):
+    def before(A: T.Buffer(3, "bool")):
         T.evaluate(A[0] or (A[1] and A[2]))
 
     expected = before
@@ -729,10 +729,10 @@ class TestRewriteAsAndOfOrsWithTopLevelAnd(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[4, "bool"]):
+    def before(A: T.Buffer(4, "bool")):
         T.evaluate((A[0] or A[1]) and (A[1] or (A[0] and A[2] and A[3])))
 
-    def expected(A: T.Buffer[4, "bool"]):
+    def expected(A: T.Buffer(4, "bool")):
         # If the simplification is applied to the OrNode, then a
         # redundant `(A[1] or A[0])` would't be canceled out.  When
         # applying SimplifyAsAndOfOrs to the top-level AndNode, the
@@ -760,10 +760,10 @@ class TestRewriteAsAndOfOrsWithSimplificationBetweenGroups(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (i == 0 or j == 10 or k == 20) and (i == 0 or j == 10 or k != 30)
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = i == 0 or j == 10 or k == 20
 
 
@@ -777,10 +777,10 @@ class TestRewriteAsAndOfOrsWithSimplificationBetweenReorderedGroups(BaseBeforeAf
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (i == 0 or j == 10 or k == 20) and (j == 10 or k != 30 or i == 0)
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = j == 10 or k == 20 or i == 0
 
 
@@ -794,10 +794,10 @@ class TestRewriteAsAndOfOrUsingSimplificationAcrossAnd(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (k == 20) and ((i == 0 or j == 10) and (k != 30))
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (i == 0 or j == 10) and (k == 20)
 
 
@@ -815,10 +815,10 @@ class TestRewriteAsAndOfOrUsingSimplificationWithinOr(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (i == 20) or (j == 0) or (i != 30)
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (j == 0) or (i != 30)
 
 
@@ -842,11 +842,11 @@ class TestConditionalFloorMod(BaseBeforeAfter):
     `canonical_simplify`.
     """
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32):
         if T.floormod(0 - i, 2) == 0:
             A[0] = T.floormod(i, 2) == 0
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32):
         if T.floormod(i, -2) == 0:
             A[0] = True
 
@@ -861,10 +861,10 @@ class TestSimplifyRHSOfBooleanAndUsingLHS(BaseBeforeAfter):
 
     apply_constraints_to_boolean_branches = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 5 and n < 10
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 5
 
 
@@ -877,10 +877,10 @@ class TestSimplifyLHSOfBooleanAndUsingRHS(BaseBeforeAfter):
 
     apply_constraints_to_boolean_branches = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 10 and n < 5
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 5
 
 
@@ -894,10 +894,10 @@ class TestSimplifyRHSOfBooleanOrUsingLHS(BaseBeforeAfter):
 
     apply_constraints_to_boolean_branches = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 10 or n < 5
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 10
 
 
@@ -910,10 +910,10 @@ class TestSimplifyLHSOfBooleanOrUsingRHS(BaseBeforeAfter):
 
     apply_constraints_to_boolean_branches = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 5 or n < 10
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 10
 
 
@@ -929,10 +929,10 @@ class TestSimplifyRHSOfBooleanAndUsingLHSWithoutConst(BaseBeforeAfter):
     apply_constraints_to_boolean_branches = True
     transitively_prove_inequalities = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 5 and n < m + 10
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 5
 
 
@@ -948,10 +948,10 @@ class TestSimplifyLHSOfBooleanAndUsingRHSWithoutConst(BaseBeforeAfter):
     apply_constraints_to_boolean_branches = True
     transitively_prove_inequalities = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 10 and n < m + 5
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 5
 
 
@@ -967,10 +967,10 @@ class TestSimplifyRHSOfBooleanOrUsingLHSWithoutConst(BaseBeforeAfter):
     apply_constraints_to_boolean_branches = True
     transitively_prove_inequalities = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 10 or n < m + 5
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 10
 
 
@@ -986,10 +986,10 @@ class TestSimplifyLHSOfBooleanOrUsingRHSWithoutConst(BaseBeforeAfter):
     apply_constraints_to_boolean_branches = True
     transitively_prove_inequalities = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 5 or n < m + 10
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 10
 
 
@@ -998,11 +998,11 @@ class TestProvableConditionWithOffset(BaseBeforeAfter):
 
     transitively_prove_inequalities = False
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32):
         if i < j:
             A[0] = i < j + 1
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32):
         if i < j:
             A[0] = True
 
@@ -1035,7 +1035,7 @@ def before(self, test_case):
         priors, expr_before, _ = test_case
 
         @T.prim_func
-        def func(A: T.Buffer[1, "bool"]):
+        def func(A: T.Buffer(1, "bool")):
             if priors:
                 A[0] = expr_before
 
@@ -1046,7 +1046,7 @@ def expected(self, test_case):
         priors, _, expr_after = test_case
 
         @T.prim_func
-        def func(A: T.Buffer[1, "bool"]):
+        def func(A: T.Buffer(1, "bool")):
             if priors:
                 A[0] = expr_after
 
@@ -1058,11 +1058,11 @@ class TestProvableConditionWithOffset(BaseBeforeAfter):
 
     transitively_prove_inequalities = False
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32):
         if i < j:
             A[0] = i < j + 1
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32):
         if i < j:
             A[0] = True
 
@@ -1078,7 +1078,7 @@ class TestAlteredBufferContents(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[(1,), "int32"], n: T.int32):
+    def before(A: T.Buffer((1,), "int32"), n: T.int32):
         if A[0] == n:
             A[0] = A[0] + 1
             # If the simplifier incorrectly uses the invalidated
@@ -1091,7 +1091,7 @@ def before(A: T.Buffer[(1,), "int32"], n: T.int32):
             else:
                 A[0] = 10
 
-    def expected(A: T.Buffer[(1,), "int32"], n: T.int32):
+    def expected(A: T.Buffer((1,), "int32"), n: T.int32):
         if A[0] == n:
             A[0] = A[0] + 1
             A[0] = 10
@@ -1108,7 +1108,7 @@ class TestPossiblyAlteredBufferContents(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[(1,), "int32"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer((1,), "int32"), n: T.int32, m: T.int32):
         if A[0] == n:
             if m == 0:
                 A[0] = A[0] + 1
@@ -1126,12 +1126,12 @@ class TestSimplifyInputAssumption(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"], n: T.int32):
+    def before(A: T.Buffer(1, "int32"), n: T.int32):
         T.evaluate(T.assume(n == 0))
         if n == 0:
             A[0] = 42
 
-    def expected(A: T.Buffer[1, "int32"], n: T.int32):
+    def expected(A: T.Buffer(1, "int32"), n: T.int32):
         T.evaluate(T.assume(n == 0))
         A[0] = 42
 
@@ -1141,12 +1141,12 @@ class TestSimplifyInputAssumption(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"], n: T.int32):
+    def before(A: T.Buffer(1, "int32"), n: T.int32):
         T.evaluate(T.assume(n == 0))
         if n == 0:
             A[0] = 42
 
-    def expected(A: T.Buffer[1, "int32"], n: T.int32):
+    def expected(A: T.Buffer(1, "int32"), n: T.int32):
         T.evaluate(T.assume(n == 0))
         A[0] = 42
 
@@ -1156,7 +1156,7 @@ class TestNoSimplifyFromScopedInputAssumption(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer(1, "int32"), n: T.int32, m: T.int32):
         if m == 0:
             T.evaluate(T.assume(n == 0))
 
@@ -1171,13 +1171,13 @@ class TestSimplifyConditionalUsingBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = 0
 
         if A[0] == 0:
             A[0] = 42
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         A[0] = 0
         A[0] = 42
 
@@ -1192,7 +1192,7 @@ class TestKeepExpressionSimplifyUsingBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"], B: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32"), B: T.Buffer(1, "int32")):
         A[0] = 0
         B[0] = A[0]
 
@@ -1208,7 +1208,7 @@ class TestSimplifyConditionalInLoopUsingBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
@@ -1218,7 +1218,7 @@ def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
             else:
                 B[j] = 100
 
-    def expected(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
@@ -1231,13 +1231,13 @@ class TestSimplifyUsingBufferAssumption(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 0))
 
         if A[0] == 0:
             A[0] = 42
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 0))
         A[0] = 42
 
@@ -1247,7 +1247,7 @@ class TestSimplifyUsingBufferAssumptionInLoop(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == i))
 
@@ -1255,7 +1255,7 @@ def before(A: T.Buffer[16, "int32"]):
             if A[i] < 100:
                 A[i] = 0
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == i))
 
@@ -1269,7 +1269,7 @@ class TestSimplifyUsingPartiallyKnownBufferConditional(BaseBeforeAfter):
     propagate_knowns_to_prove_conditional = True
     apply_constraints_to_boolean_branches = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if 14 <= i:
                 T.evaluate(T.assume(A[i] == 0))
@@ -1283,7 +1283,7 @@ def before(A: T.Buffer[16, "int32"]):
                 if A[i] == 0:
                     A[i] = 100
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if 14 <= i:
                 T.evaluate(T.assume(A[i] == 0))
@@ -1307,7 +1307,7 @@ class TestSimplifyUsingPartiallyKnownBufferExpression(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(i < 14 or A[i] == 0))
 
@@ -1316,7 +1316,7 @@ def before(A: T.Buffer[16, "int32"]):
                 if A[i] == 0:
                     A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(i < 14 or A[i] == 0))
 
@@ -1336,7 +1336,7 @@ class TestNoSimplificationIfPredicateNotMet(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if 14 <= i:
                 T.evaluate(T.assume(A[i] == 0))
@@ -1354,7 +1354,7 @@ class TestNoSimplifyUsingInvalidatedScopedConstraint(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i == 0:
                 A[i] = 0
@@ -1374,7 +1374,7 @@ class TestNoSimplifyUsingOverwrittenValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == 0))
 
@@ -1398,7 +1398,7 @@ class TestNoSimplifyUsingLoopDependentBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"], B: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(16, "int32"), B: T.Buffer(1, "int32")):
         B[0] = 0
         for i in T.serial(16):
             if B[0] < 10:
@@ -1421,7 +1421,7 @@ class TestSimplifyPriorToOverwrittenValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == 0))
 
@@ -1435,7 +1435,7 @@ def before(A: T.Buffer[16, "int32"]):
             if A[i] == 0:
                 A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == 0))
 
@@ -1459,7 +1459,7 @@ class TestSimplifyElementWiseUsingPreLoopBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             B[i] = 0
 
@@ -1469,7 +1469,7 @@ def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
             else:
                 B[i] = A[i] + B[i]
 
-    def expected(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             B[i] = 0
 
@@ -1482,11 +1482,11 @@ class TestSimplifyNonConditional(BaseBeforeAfter):
 
     propagate_knowns_to_simplify_expressions = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = 0
         A[0] = A[0] + 1
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         A[0] = 0
         A[0] = 1
 
@@ -1499,7 +1499,7 @@ class TestSuppressSimplifyNonConditional(BaseBeforeAfter):
 
     propagate_knowns_to_simplify_expressions = False
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = 0
         A[0] = A[0] + 1
 
@@ -1515,7 +1515,7 @@ class TestSimplifyUsingTransitiveKnownBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 0))
 
         A[0] = A[0] + 1
@@ -1525,7 +1525,7 @@ def before(A: T.Buffer[1, "int32"]):
         if A[0] == 3:
             A[0] = 42
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 0))
 
         A[0] = A[0] + 1
@@ -1540,7 +1540,7 @@ class TestSimplifyRampIndexBroadcastValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[4, "int32"]):
+    def before(A: T.Buffer(4, "int32")):
         A[T.ramp(0, 1, 4)] = T.broadcast(0, 4)
 
         if A[0] == 0:
@@ -1549,7 +1549,7 @@ def before(A: T.Buffer[4, "int32"]):
         if A[1] == 0:
             A[1] = 60
 
-    def expected(A: T.Buffer[4, "int32"]):
+    def expected(A: T.Buffer(4, "int32")):
         A[T.ramp(0, 1, 4)] = T.broadcast(0, 4)
 
         A[0] = 42
@@ -1561,7 +1561,7 @@ class TestSimplifyRampIndexRampValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[4, "int32"]):
+    def before(A: T.Buffer(4, "int32")):
         A[T.ramp(0, 1, 4)] = T.ramp(11, 1, 4)
 
         if A[0] == 11:
@@ -1570,7 +1570,7 @@ def before(A: T.Buffer[4, "int32"]):
         if A[1] == 12:
             A[1] = 60
 
-    def expected(A: T.Buffer[4, "int32"]):
+    def expected(A: T.Buffer(4, "int32")):
         A[T.ramp(0, 1, 4)] = T.ramp(11, 1, 4)
 
         A[0] = 42
@@ -1589,7 +1589,7 @@ class TestSimplifyUsingPartiallyProvenBufferValueGather(BaseBeforeAfter):
     transitively_prove_inequalities = True
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "int32"]):
+    def before(A: T.Buffer(24, "int32"), B: T.Buffer(24, "int32"), F: T.Buffer(3, "int32")):
         # A has non-zero values only in the range 3 <= i < 17
         for i in T.serial(24):
             T.evaluate(T.assume(((3 <= i) and (i < 17)) or A[i] == 0))
@@ -1610,7 +1610,7 @@ def before(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "i
                 if B[i] != 0:
                     B[i] = 0
 
-    def expected(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "int32"]):
+    def expected(A: T.Buffer(24, "int32"), B: T.Buffer(24, "int32"), F: T.Buffer(3, "int32")):
         for i in T.serial(24):
             T.evaluate(T.assume(((3 <= i) and (i < 17)) or A[i] == 0))
 
@@ -1635,7 +1635,7 @@ class TestSimplifyUsingPartiallyProvenBufferValueScatter(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "int32"]):
+    def before(A: T.Buffer(24, "int32"), B: T.Buffer(24, "int32"), F: T.Buffer(3, "int32")):
         # A has non-zero values only in the range 3 <= i < 17
         for i in T.serial(24):
             T.evaluate(T.assume(((3 <= i) and (i < 17)) or A[i] == 0))
@@ -1658,7 +1658,7 @@ def before(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "i
                 if B[i] != 0:
                     B[i] = 0
 
-    def expected(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "int32"]):
+    def expected(A: T.Buffer(24, "int32"), B: T.Buffer(24, "int32"), F: T.Buffer(3, "int32")):
         for i in T.serial(24):
             T.evaluate(T.assume(((3 <= i) and (i < 17)) or A[i] == 0))
 
@@ -1680,11 +1680,11 @@ class TestSimplifyBufferStore(BaseBeforeAfter):
 
     propagate_knowns_to_simplify_expressions = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = 5
         A[0] = A[0] + 7
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         A[0] = 5
         A[0] = 12
 
diff --git a/tests/python/unittest/test_tir_transform_storage_flatten.py b/tests/python/unittest/test_tir_transform_storage_flatten.py
index 95e2eaed55fa..29623b498f43 100644
--- a/tests/python/unittest/test_tir_transform_storage_flatten.py
+++ b/tests/python/unittest/test_tir_transform_storage_flatten.py
@@ -139,7 +139,7 @@ def main():
             T.func_attr({"from_legacy_te_schedule": True})
 
             # If a pointer defined using a LetStmt,
-            A_data: T.Ptr[T.int32] = T.call_extern("dummy_extern_function", dtype="handle")
+            A_data: T.Ptr("int32") = T.call_extern("dummy_extern_function", dtype="handle")
 
             # and a buffer is backed by that pointer,
             A = T.decl_buffer([1], dtype="float32", data=A_data)
diff --git a/tests/python/unittest/test_tir_transform_storage_rewrite.py b/tests/python/unittest/test_tir_transform_storage_rewrite.py
index 2ed2e6ec6d71..4766022121df 100644
--- a/tests/python/unittest/test_tir_transform_storage_rewrite.py
+++ b/tests/python/unittest/test_tir_transform_storage_rewrite.py
@@ -652,7 +652,7 @@ def verify(n):
 
 def test_access_in_let_value():
     @T.prim_func
-    def func(A: T.Buffer[(8,), "float32"]):
+    def func(A: T.Buffer((8,), "float32")):
         for i in range(8):
             B_data = T.allocate((1,), "float32", "global")
             B = T.Buffer(shape=[1], dtype="float32", data=B_data)
@@ -661,7 +661,7 @@ def func(A: T.Buffer[(8,), "float32"]):
             A[i] = (x + 1.0) / (x - 1.0)
 
     @T.prim_func
-    def func_rewritten(A: T.Buffer[(8,), "float32"]) -> None:
+    def func_rewritten(A: T.Buffer((8,), "float32")) -> None:
         B_data = T.allocate((1,), "float32", "global")
         B = T.Buffer(shape=[1], dtype="float32", data=B_data)
         for i in range(8):
@@ -689,12 +689,12 @@ class TestLetBufferRewrite(BaseCompare):
     """
 
     def before() -> None:
-        A_data: T.Ptr[T.int32] = T.call_extern("dummy_func", dtype="handle")
+        A_data: T.Ptr("int32") = T.call_extern("dummy_func", dtype="handle")
         A = T.Buffer([8], "int32", data=A_data)
         A[0:8] = T.broadcast(42, 8)
 
     def expected() -> None:
-        A_data: T.Ptr[T.int32x8] = T.call_extern("dummy_func", dtype="handle")
+        A_data: T.Ptr("int32x8") = T.call_extern("dummy_func", dtype="handle")
         A = T.Buffer([1], "int32x8", data=A_data)
         A[0] = T.broadcast(42, 8)
 
@@ -702,7 +702,7 @@ def expected() -> None:
 class TestRewriteInPlaceUseOfNonFlatBuffer(BaseCompare):
     """A non-flat buffer may be re-used for in-place operations"""
 
-    def before(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")):
         B_data = T.allocate(
             [16, 16],
             dtype="float32",
@@ -735,7 +735,7 @@ def before(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]):
         for i, j in T.grid(16, 16):
             D[i, j] = C[i, j]
 
-    def expected(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]):
+    def expected(A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")):
         B_data = T.allocate(
             [16, 16],
             dtype="float32",
@@ -771,7 +771,7 @@ class TestNoRewriteOfSharedNonFlatBuffer(BaseCompare):
     not have matching shapes.
     """
 
-    def before(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")):
         B_data = T.allocate(
             [16, 16],
             dtype="float32",
diff --git a/tests/python/unittest/test_tir_transform_thread_sync.py b/tests/python/unittest/test_tir_transform_thread_sync.py
index b7caf04d659c..eb578a8817b5 100644
--- a/tests/python/unittest/test_tir_transform_thread_sync.py
+++ b/tests/python/unittest/test_tir_transform_thread_sync.py
@@ -98,7 +98,7 @@ def ir(A, B):
 @tvm.testing.requires_cuda
 def test_sync_read_thread_id_independent_location():
     @T.prim_func
-    def func(p0_arg: T.Buffer[(1, 2, 1, 1), "float32"], p1: T.Buffer[2, "float32"]) -> None:
+    def func(p0_arg: T.Buffer((1, 2, 1, 1), "float32"), p1: T.Buffer(2, "float32")) -> None:
         threadIdx_x = T.env_thread("threadIdx.x")
         blockIdx_x = T.env_thread("blockIdx.x")
         p0 = T.Buffer([2], dtype="float32", data=p0_arg.data)
diff --git a/tests/python/unittest/test_tir_transform_unify_thread_binding.py b/tests/python/unittest/test_tir_transform_unify_thread_binding.py
index 90fce22bc14f..e489298741cc 100644
--- a/tests/python/unittest/test_tir_transform_unify_thread_binding.py
+++ b/tests/python/unittest/test_tir_transform_unify_thread_binding.py
@@ -74,9 +74,9 @@ def unified_element_wise_thread_x(a: T.handle, b: T.handle, c: T.handle) -> None
 
 @T.prim_func
 def element_wise_thread_x_different_dtype(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(128, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((128, 128), "float32"),
 ) -> None:
     for i in T.thread_binding(128, "blockIdx.x"):
         for j0_0 in T.thread_binding(4, "threadIdx.x"):
@@ -91,9 +91,9 @@ def element_wise_thread_x_different_dtype(
 
 @T.prim_func
 def unified_element_wise_thread_x_different_dtype(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(128, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((128, 128), "float32"),
 ) -> None:
     for blockIdx_x in T.thread_binding(128, "blockIdx.x"):
         for threadIdx_x in T.thread_binding(4, "threadIdx.x"):
diff --git a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
index 6145c39b876d..5bbedd349259 100644
--- a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
+++ b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
@@ -14,15 +14,15 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import pytest
 import sys
 
+import pytest
 import tvm
+from tvm import PoolInfoProperties, WorkspacePoolInfo
 from tvm.script import tir as T
+from tvm.target import Target
 from tvm.tir import stmt_functor
 from tvm.tir.usmp import utils as usmp_utils
-from tvm.target import Target
-from tvm import WorkspacePoolInfo, PoolInfoProperties
 
 
 def _get_primfuncs_from_module(module):
@@ -144,20 +144,20 @@ def __tvm_main__(input: T.handle, output: T.handle) -> None:
 @tvm.script.ir_module
 class LinearStructurePlanned:
     @T.prim_func
-    def __tvm_main__(input: T.handle, fast_memory_0_var: T.Ptr[T.uint8], slow_memory_1_var: T.Ptr[T.uint8], output: T.handle) -> None:
+    def __tvm_main__(input: T.handle, fast_memory_0_var: T.Ptr("uint8"), slow_memory_1_var: T.Ptr("uint8"), output: T.handle) -> None:
         fast_memory_0_buffer_var = T.match_buffer(fast_memory_0_var, [200704], dtype="uint8", strides=[1], elem_offset=0, align=16)
         slow_memory_1_buffer_var = T.match_buffer(slow_memory_1_var, [1418528], dtype="uint8", strides=[1], elem_offset=0, align=16)
         # body
         T.attr("default", "device_id", 0)
         T.attr("default", "device_type", 1)
-        sid_9_let: T.Ptr[T.int8] = T.address_of(slow_memory_1_buffer_var[1117472], dtype="handle")
-        sid_8_let: T.Ptr[T.int8] = T.address_of(slow_memory_1_buffer_var[0], dtype="handle")
+        sid_9_let: T.Ptr("int8") = T.address_of(slow_memory_1_buffer_var[1117472], dtype="handle")
+        sid_8_let: T.Ptr("int8") = T.address_of(slow_memory_1_buffer_var[0], dtype="handle")
         T.evaluate(T.call_extern("tvmgen_default_fused_cast_subtract", input, T.lookup_param("p0", dtype="handle"), sid_9_let, fast_memory_0_buffer_var.data, slow_memory_1_buffer_var.data, dtype="int32"))
         T.evaluate(T.call_extern("tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast", sid_9_let, T.lookup_param("p1", dtype="handle"), T.lookup_param("p2", dtype="handle"), sid_8_let, fast_memory_0_buffer_var.data, slow_memory_1_buffer_var.data, dtype="int32"))
         T.evaluate(T.call_extern("tvmgen_default_fused_nn_max_pool2d_cast", sid_8_let, output, fast_memory_0_buffer_var.data, slow_memory_1_buffer_var.data, dtype="int32"))
 
     @T.prim_func
-    def tvmgen_default_fused_nn_max_pool2d_cast(placeholder_28: T.handle, T_cast_6: T.handle, fast_memory_6_var: T.Ptr[T.uint8], slow_memory_7_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_max_pool2d_cast(placeholder_28: T.handle, T_cast_6: T.handle, fast_memory_6_var: T.Ptr("uint8"), slow_memory_7_var: T.Ptr("uint8")) -> None:
         placeholder_29 = T.match_buffer(placeholder_28, [802816], dtype="uint8")
         T_cast_7 = T.match_buffer(T_cast_6, [177], dtype="int16")
         fast_memory_6_buffer_var = T.match_buffer(fast_memory_6_var, [200704], dtype="uint8", strides=[1], elem_offset=0, align=16)
@@ -174,7 +174,7 @@ def tvmgen_default_fused_nn_max_pool2d_cast(placeholder_28: T.handle, T_cast_6:
                 T_cast_7[ax0_ax1_fused_5 * 3584 + ax2_5 * 64 + ax3_3] = T.cast(tensor_2_let[ax0_ax1_fused_5 * 3584 + ax2_5 * 64 + ax3_3], "int16")
 
     @T.prim_func
-    def tvmgen_default_fused_cast_subtract(placeholder_2: T.handle, placeholder_3: T.handle, T_subtract: T.handle, fast_memory_2_var: T.Ptr[T.uint8], slow_memory_3_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_cast_subtract(placeholder_2: T.handle, placeholder_3: T.handle, T_subtract: T.handle, fast_memory_2_var: T.Ptr("uint8"), slow_memory_3_var: T.Ptr("uint8")) -> None:
         placeholder_4 = T.match_buffer(placeholder_2, [150528], dtype="uint8")
         placeholder_5 = T.match_buffer(placeholder_3, [1], dtype="int16")
         T_subtract_1 = T.match_buffer(T_subtract, [452], dtype="int16")
@@ -185,7 +185,7 @@ def tvmgen_default_fused_cast_subtract(placeholder_2: T.handle, placeholder_3: T
             T_subtract_1[ax0_ax1_fused_1 * 672 + ax2_1 * 3 + ax3_inner_1] = T.cast(placeholder_4[ax0_ax1_fused_1 * 672 + ax2_1 * 3 + ax3_inner_1], "int16") - placeholder_5[0]
 
     @T.prim_func
-    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast(placeholder_62: T.handle, placeholder_63: T.handle, placeholder_64: T.handle, T_cast_20: T.handle, fast_memory_4_var: T.Ptr[T.uint8], slow_memory_5_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast(placeholder_62: T.handle, placeholder_63: T.handle, placeholder_64: T.handle, T_cast_20: T.handle, fast_memory_4_var: T.Ptr("uint8"), slow_memory_5_var: T.Ptr("uint8")) -> None:
         placeholder_65 = T.match_buffer(placeholder_62, [150528], dtype="int16")
         placeholder_66 = T.match_buffer(placeholder_63, [9408], dtype="int16")
         placeholder_67 = T.match_buffer(placeholder_64, [64], dtype="int32")
@@ -380,7 +380,7 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(place
 @tvm.script.ir_module
 class ResnetStructurePlanned:
     @T.prim_func
-    def tvmgen_default_fused_cast_subtract_fixed_point_multiply_add_clip_cast_cast(placeholder: T.handle, placeholder_1: T.handle, T_cast: T.handle, global_workspace_1_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_cast_subtract_fixed_point_multiply_add_clip_cast_cast(placeholder: T.handle, placeholder_1: T.handle, T_cast: T.handle, global_workspace_1_var: T.Ptr("uint8")) -> None:
         placeholder_2 = T.match_buffer(placeholder, [360000], dtype="uint8")
         placeholder_3 = T.match_buffer(placeholder_1, [64], dtype="int32")
         T_cast_1 = T.match_buffer(T_cast, [215], dtype="int16")
@@ -390,7 +390,7 @@ def tvmgen_default_fused_cast_subtract_fixed_point_multiply_add_clip_cast_cast(p
             T_cast_1[ax0_ax1_fused * 4800 + ax2 * 64 + ax3_outer * 16 + ax3_inner] = T.cast(T.cast(T.max(T.min(T.q_multiply_shift(T.cast(placeholder_2[ax0_ax1_fused * 4800 + ax2 * 64 + ax3_outer * 16 + ax3_inner], "int32") - 94, 1843157232, 31, 1, dtype="int32") + placeholder_3[ax3_outer * 16 + ax3_inner], 255), 0), "uint8"), "int16")
 
     @T.prim_func
-    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_subtract_fixed_point_4200876283395191415_(placeholder_22: T.handle, placeholder_23: T.handle, placeholder_24: T.handle, placeholder_25: T.handle, T_cast_6: T.handle, global_workspace_5_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_subtract_fixed_point_4200876283395191415_(placeholder_22: T.handle, placeholder_23: T.handle, placeholder_24: T.handle, placeholder_25: T.handle, T_cast_6: T.handle, global_workspace_5_var: T.Ptr("uint8")) -> None:
         placeholder_29 = T.match_buffer(placeholder_22, [360000], dtype="int16")
         placeholder_27 = T.match_buffer(placeholder_23, [16384], dtype="int16")
         placeholder_26 = T.match_buffer(placeholder_24, [256], dtype="int32")
@@ -414,7 +414,7 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_s
                             T_cast_7[ax0_ax1_fused_ax2_fused_3 * 256 + ax3_outer_2 * 64 + ax3_inner_4] = T.cast(T.max(T.min(T.q_multiply_shift(T.cast(T.cast(T.max(T.min(T.q_multiply_shift(Conv2dOutput_3_let[ax3_inner_4] + placeholder_26[ax3_outer_2 * 64 + ax3_inner_4], 1343014664, 31, -8, dtype="int32") + 136, 255), 0), "uint8"), "int32") - 136, 1073903788, 31, 1, dtype="int32") + placeholder_28[ax0_ax1_fused_ax2_fused_3 * 256 + ax3_outer_2 * 64 + ax3_inner_4], 255), 0), "uint8")
 
     @T.prim_func
-    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_subtract_fixed_point_15934180698220515269_(placeholder_16: T.handle, placeholder_17: T.handle, placeholder_18: T.handle, T_add: T.handle, global_workspace_4_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_subtract_fixed_point_15934180698220515269_(placeholder_16: T.handle, placeholder_17: T.handle, placeholder_18: T.handle, T_add: T.handle, global_workspace_4_var: T.Ptr("uint8")) -> None:
         placeholder_19 = T.match_buffer(placeholder_16, [360000], dtype="int16")
         placeholder_20 = T.match_buffer(placeholder_17, [16384], dtype="int16")
         placeholder_21 = T.match_buffer(placeholder_18, [256], dtype="int32")
@@ -437,7 +437,7 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_s
                             T_add_1[ax0_ax1_fused_ax2_fused_2 * 256 + ax3_outer_1 * 64 + ax3_inner_3] = T.q_multiply_shift(T.cast(T.cast(T.max(T.min(T.q_multiply_shift(Conv2dOutput_2_let[ax3_inner_3] + placeholder_21[ax3_outer_1 * 64 + ax3_inner_3], 1711626602, 31, -8, dtype="int32") + 132, 255), 0), "uint8"), "int32") - 132, 2094289803, 31, -2, dtype="int32") + 136
 
     @T.prim_func
-    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(placeholder_4: T.handle, placeholder_5: T.handle, placeholder_6: T.handle, T_cast_2: T.handle, global_workspace_2_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(placeholder_4: T.handle, placeholder_5: T.handle, placeholder_6: T.handle, T_cast_2: T.handle, global_workspace_2_var: T.Ptr("uint8")) -> None:
         placeholder_7 = T.match_buffer(placeholder_4, [360000], dtype="int16")
         placeholder_8 = T.match_buffer(placeholder_5, [4096], dtype="int16")
         placeholder_9 = T.match_buffer(placeholder_6, [64], dtype="int32")
@@ -459,7 +459,7 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(place
                         T_cast_3[ax0_ax1_fused_ax2_fused * 64 + ax3_inner_1] = T.cast(T.cast(T.max(T.min(T.q_multiply_shift(Conv2dOutput_let[ax3_inner_1] + placeholder_9[ax3_inner_1], 1843106743, 31, -6, dtype="int32"), 255), 0), "uint8"), "int16")
 
     @T.prim_func
-    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_1(placeholder_10: T.handle, placeholder_11: T.handle, placeholder_12: T.handle, T_cast_4: T.handle, global_workspace_3_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_1(placeholder_10: T.handle, placeholder_11: T.handle, placeholder_12: T.handle, T_cast_4: T.handle, global_workspace_3_var: T.Ptr("uint8")) -> None:
         placeholder_13 = T.match_buffer(placeholder_10, [360000], dtype="int16")
         placeholder_14 = T.match_buffer(placeholder_11, [36864], dtype="int16")
         placeholder_15 = T.match_buffer(placeholder_12, [64], dtype="int32")
@@ -481,15 +481,15 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_1(pla
                         T_cast_5[ax0_ax1_fused_ax2_fused_1 * 64 + ax3_inner_2] = T.cast(T.cast(T.max(T.min(T.q_multiply_shift(Conv2dOutput_1_let[ax3_inner_2] + placeholder_15[ax3_inner_2], 1608879842, 31, -7, dtype="int32"), 255), 0), "uint8"), "int16")
 
     @T.prim_func
-    def __tvm_main__(input: T.handle, global_workspace_0_var: T.Ptr[T.uint8], output: T.handle) -> None:
+    def __tvm_main__(input: T.handle, global_workspace_0_var: T.Ptr("uint8"), output: T.handle) -> None:
         global_workspace_0_buffer_var = T.match_buffer(global_workspace_0_var, [7920256], dtype="uint8", strides=[1], elem_offset=0, align=16)
         # body
         T.attr("default", "device_id", 0)
         T.attr("default", "device_type", 1)
-        sid_2_let: T.Ptr[T.int8] = T.address_of(global_workspace_0_buffer_var[5760000], dtype="handle")
-        sid_6_let: T.Ptr[T.int8] = T.address_of(global_workspace_0_buffer_var[0], dtype="handle")
-        sid_7_let: T.Ptr[T.int8] = T.address_of(global_workspace_0_buffer_var[6480000], dtype="handle")
-        sid_8_let: T.Ptr[T.int8] = T.address_of(global_workspace_0_buffer_var[6480000], dtype="handle")
+        sid_2_let: T.Ptr("int8") = T.address_of(global_workspace_0_buffer_var[5760000], dtype="handle")
+        sid_6_let: T.Ptr("int8") = T.address_of(global_workspace_0_buffer_var[0], dtype="handle")
+        sid_7_let: T.Ptr("int8") = T.address_of(global_workspace_0_buffer_var[6480000], dtype="handle")
+        sid_8_let: T.Ptr("int8") = T.address_of(global_workspace_0_buffer_var[6480000], dtype="handle")
         T.evaluate(T.call_extern("tvmgen_default_fused_cast_subtract_fixed_point_multiply_add_clip_cast_cast", input, T.lookup_param("p0", dtype="handle"), sid_2_let, global_workspace_0_buffer_var.data, dtype="int32"))
         T.evaluate(T.call_extern("tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast", sid_2_let, T.lookup_param("p3", dtype="handle"), T.lookup_param("p4", dtype="handle"), sid_8_let, global_workspace_0_buffer_var.data, dtype="int32"))
         T.evaluate(T.call_extern("tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_1", sid_8_let, T.lookup_param("p5", dtype="handle"), T.lookup_param("p6", dtype="handle"), sid_7_let, global_workspace_0_buffer_var.data, dtype="int32"))
@@ -557,7 +557,7 @@ def __tvm_main__(input: T.handle, output: T.handle) -> None:
 @tvm.script.ir_module
 class TensorIntrinStructurePlanned:
     @T.prim_func
-    def tensor_intrin_primfunc(global_workspace_1_var: T.Ptr[T.uint8]) -> None:
+    def tensor_intrin_primfunc(global_workspace_1_var: T.Ptr("uint8")) -> None:
         global_workspace_1_buffer_var = T.match_buffer(
             global_workspace_1_var, [40], dtype="uint8", strides=[1], elem_offset=0, align=16
         )
@@ -576,7 +576,7 @@ def tensor_intrin_primfunc(global_workspace_1_var: T.Ptr[T.uint8]) -> None:
 
     @T.prim_func
     def __tvm_main__(
-        input: T.handle, global_workspace_1_var: T.Ptr[T.uint8], output: T.handle
+        input: T.handle, global_workspace_1_var: T.Ptr("uint8"), output: T.handle
     ) -> None:
         global_workspace_1_buffer_var = T.match_buffer(
             global_workspace_1_var, [40], dtype="uint8", strides=[1], elem_offset=0, align=16
diff --git a/tests/python/unittest/test_tvm_testing_before_after.py b/tests/python/unittest/test_tvm_testing_before_after.py
index 946493922ed5..4fcca7957b15 100644
--- a/tests/python/unittest/test_tvm_testing_before_after.py
+++ b/tests/python/unittest/test_tvm_testing_before_after.py
@@ -70,7 +70,7 @@ class TestBeforeAfterParametrizedFixture(BaseBeforeAfter):
     @tvm.testing.fixture
     def before(self, n):
         @T.prim_func
-        def func(A: T.Buffer[n, "float32"]):
+        def func(A: T.Buffer(n, "float32")):
             for i in T.serial(n):
                 A[i] = 0.0
 
@@ -89,11 +89,11 @@ class TestBeforeAfterIRModule(BaseBeforeAfter):
     """
 
     class before:
-        def func_A(A: T.Buffer[16, "float32"]):
+        def func_A(A: T.Buffer(16, "float32")):
             for i in T.serial(16):
                 A[i] = 0.0
 
-        def func_B(A: T.Buffer[16, "int32"]):
+        def func_B(A: T.Buffer(16, "int32")):
             for i in T.serial(16):
                 A[i] = 42
 
@@ -112,12 +112,12 @@ def before(self):
         @ir_module
         class mod:
             @T.prim_func
-            def func_A(A: T.Buffer[16, "float32"]):
+            def func_A(A: T.Buffer(16, "float32")):
                 for i in T.serial(16):
                     A[i] = 0.0
 
             @T.prim_func
-            def func_B(A: T.Buffer[16, "int32"]):
+            def func_B(A: T.Buffer(16, "int32")):
                 for i in T.serial(16):
                     A[i] = 42
 
diff --git a/tests/python/unittest/test_tvmscript_ops.py b/tests/python/unittest/test_tvmscript_ops.py
index e10681338727..8eba301fe719 100644
--- a/tests/python/unittest/test_tvmscript_ops.py
+++ b/tests/python/unittest/test_tvmscript_ops.py
@@ -163,7 +163,7 @@ def test_alloc_zero_dim_buffer_round_trip():
 
 
 @T.prim_func
-def ceildiv_test(A: T.Buffer[16, "int32"]):
+def ceildiv_test(A: T.Buffer(16, "int32")):
     for i in range(16):
         A[i] = T.ceildiv(A[i], 4)
 
diff --git a/tests/python/unittest/test_tvmscript_parser_source.py b/tests/python/unittest/test_tvmscript_parser_source.py
index 359583c1aa06..416bfd719f5c 100644
--- a/tests/python/unittest/test_tvmscript_parser_source.py
+++ b/tests/python/unittest/test_tvmscript_parser_source.py
@@ -92,7 +92,7 @@ class dummy:
         class Module:
             @T.prim_func
             def impl(
-                A: T.Buffer[(12, 196, 64), "float32"],
+                A: T.Buffer((12, 196, 64), "float32"),
             ) -> None:
                 T.evaluate(0)
 
diff --git a/tests/python/unittest/test_tvmscript_parser_tir.py b/tests/python/unittest/test_tvmscript_parser_tir.py
index e3f87928acf8..e96ae4da8c2e 100644
--- a/tests/python/unittest/test_tvmscript_parser_tir.py
+++ b/tests/python/unittest/test_tvmscript_parser_tir.py
@@ -31,7 +31,7 @@ def test_tir_buffer_proxy():
         and buffer_0.dtype == "float32"
     )
 
-    buffer_1 = T.Buffer[(64, 64, 64), "int32"]
+    buffer_1 = T.Buffer((64, 64, 64), "int32")
     assert (
         isinstance(buffer_1, tir.Buffer)
         and list(buffer_1.shape) == [64, 64, 64]
@@ -49,7 +49,7 @@ def test_tir_ptr_proxy():
         and ptr_0.type_annotation.storage_scope == "global"
     )
 
-    ptr_1 = T.Ptr["float32", "shared"]
+    ptr_1 = T.Ptr("float32", "shared")
     assert (
         isinstance(ptr_1, tir.Var)
         and ptr_1.dtype == "handle"
diff --git a/tests/python/unittest/test_tvmscript_regression.py b/tests/python/unittest/test_tvmscript_regression.py
index 6678c10acd7a..c4ca23b3f037 100644
--- a/tests/python/unittest/test_tvmscript_regression.py
+++ b/tests/python/unittest/test_tvmscript_regression.py
@@ -74,7 +74,7 @@ def func_ref():
 
 def test_tir_buffer_region_extent_correct_dtype():
     @T.prim_func
-    def func(A: T.Buffer[(T.int64(16), T.int64(1)), "float32"]):
+    def func(A: T.Buffer((T.int64(16), T.int64(1)), "float32")):
         for i in T.grid(T.int64(16)):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
diff --git a/tests/python/unittest/test_tvmscript_roundtrip.py b/tests/python/unittest/test_tvmscript_roundtrip.py
index f52b488fef6b..05a3270d158b 100644
--- a/tests/python/unittest/test_tvmscript_roundtrip.py
+++ b/tests/python/unittest/test_tvmscript_roundtrip.py
@@ -193,12 +193,8 @@ def mmult(
             )
             # buffer definition
             buf_type_ids = T.match_buffer(arg_type_ids, [3], dtype="int32")
-
             packedB = T.Buffer([32768], dtype="float32")
             C_global = T.Buffer([1024], dtype="float32")
-            # var definition
-            # C_global = T.buffer_var("float32", "global")
-            # packedB = T.buffer_var("float32", "global")
             # body
             assert num_args == 3, "mmult: num_args should be 3"
             arg0: T.handle = T.tvm_struct_get(args, 0, 12, dtype="handle")
@@ -208,30 +204,30 @@ def mmult(
             arg2: T.handle = T.tvm_struct_get(args, 2, 12, dtype="handle")
             arg2_code: T.int32 = buf_type_ids[2]
 
-            A_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
+            A_data: T.Ptr("int32") = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
             T.attr(A_data, "storage_alignment", 128)
             A = T.Buffer([1024 * 1024], dtype="int32", data=A_data)
-            buf0_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 2, dtype="handle")
+            buf0_shape_data: T.Ptr("int32") = T.tvm_struct_get(arg0, 0, 2, dtype="handle")
             buf0_shape = T.Buffer([2], dtype="int32", data=buf0_shape_data)
-            buf0_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 3, dtype="handle")
+            buf0_strides_data: T.Ptr("int32") = T.tvm_struct_get(arg0, 0, 3, dtype="handle")
             buf0_strides = T.Buffer([2], dtype="int32", data=buf0_strides_data)
 
             dev_id: T.int32 = T.tvm_struct_get(arg0, 0, 9, dtype="int32")
 
-            B_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
+            B_data: T.Ptr("int32") = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
             T.attr(B_data, "storage_alignment", 128)
             B = T.Buffer([1024 * 1024], dtype="int32", data=B_data)
-            buf1_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 2, dtype="handle")
+            buf1_shape_data: T.Ptr("int32") = T.tvm_struct_get(arg1, 0, 2, dtype="handle")
             buf1_shape = T.Buffer([2], dtype="int32", data=buf1_shape_data)
-            buf1_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 3, dtype="handle")
+            buf1_strides_data: T.Ptr("int32") = T.tvm_struct_get(arg1, 0, 3, dtype="handle")
             buf1_strides = T.Buffer([2], dtype="int32", data=buf1_strides_data)
 
-            C_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 1, dtype="handle")
+            C_data: T.Ptr("int32") = T.tvm_struct_get(arg2, 0, 1, dtype="handle")
             T.attr(C_data, "storage_alignment", 128)
             C = T.Buffer([1024 * 1024], dtype="int32", data=C_data)
-            buf2_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 2, dtype="handle")
+            buf2_shape_data: T.Ptr("int32") = T.tvm_struct_get(arg2, 0, 2, dtype="handle")
             buf2_shape = T.Buffer([2], dtype="int32", data=buf2_shape_data)
-            buf2_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 3, dtype="handle")
+            buf2_strides_data: T.Ptr("int32") = T.tvm_struct_get(arg2, 0, 3, dtype="handle")
             buf2_strides = T.Buffer([2], dtype="int32", data=buf2_strides_data)
 
             assert (((arg0_code == 3) or (arg0_code == 13)) or (arg0_code == 7)) or (
@@ -932,9 +928,9 @@ def func(A: T.handle, W: T.handle, Conv: T.handle) -> None:
 def opt_conv_tensorcore_lower():
     @T.prim_func
     def func(
-        A: T.Buffer[(16, 14, 14, 16, 16, 16), "float16"],
-        W: T.Buffer[(3, 3, 16, 32, 16, 16), "float16"],
-        Conv: T.Buffer[(16, 14, 14, 32, 16, 16), "float32"],
+        A: T.Buffer((16, 14, 14, 16, 16, 16), "float16"),
+        W: T.Buffer((3, 3, 16, 32, 16, 16), "float16"),
+        Conv: T.Buffer((16, 14, 14, 32, 16, 16), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "default_function", "tir.noalias": True})
@@ -2226,7 +2222,7 @@ def opt_conv_tensorcore_mod_host():
     @T.prim_func
     def opt_conv_tensorcore_mod_host(
         args: T.handle,
-        arg_type_ids: T.Buffer[(3,), "int32"],
+        arg_type_ids: T.Buffer((3,), "int32"),
         num_args: T.int32,
         out_ret_value: T.handle,
         out_ret_tcode: T.handle,
@@ -2242,7 +2238,7 @@ def opt_conv_tensorcore_mod_host(
             }
         )
         # body
-        stack_tcode_data: T.Ptr[T.int32] = T.tvm_stack_alloca("arg_tcode", 10, dtype="handle")
+        stack_tcode_data: T.Ptr("int32") = T.tvm_stack_alloca("arg_tcode", 10, dtype="handle")
         stack_tcode = T.Buffer([9], "int32", data=stack_tcode_data)
         stack_value: T.handle = T.tvm_stack_alloca("arg_value", 10, dtype="handle")
         assert num_args == 3, "default_function: num_args should be 3"
@@ -2255,25 +2251,25 @@ def opt_conv_tensorcore_mod_host(
 
         A: T.handle = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
         T.attr(A, "storage_alignment", 128)
-        arg0_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg0, 0, 2, dtype="handle")
+        arg0_shape_data: T.Ptr("int64") = T.tvm_struct_get(arg0, 0, 2, dtype="handle")
         arg0_shape = T.Buffer([6], "int64", data=arg0_shape_data)
-        arg0_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg0, 0, 3, dtype="handle")
+        arg0_strides_data: T.Ptr("int64") = T.tvm_struct_get(arg0, 0, 3, dtype="handle")
         arg0_strides = T.Buffer([6], "int64", data=arg0_strides_data)
 
         dev_id: T.int32 = T.tvm_struct_get(arg0, 0, 9, dtype="int32")
 
         W: T.handle = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
         T.attr(W, "storage_alignment", 128)
-        arg1_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg1, 0, 2, dtype="handle")
+        arg1_shape_data: T.Ptr("int64") = T.tvm_struct_get(arg1, 0, 2, dtype="handle")
         arg1_shape = T.Buffer([6], "int64", data=arg1_shape_data)
-        arg1_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg1, 0, 3, dtype="handle")
+        arg1_strides_data: T.Ptr("int64") = T.tvm_struct_get(arg1, 0, 3, dtype="handle")
         arg1_strides = T.Buffer([6], "int64", data=arg1_strides_data)
 
         Conv: T.handle = T.tvm_struct_get(arg2, 0, 1, dtype="handle")
         T.attr(Conv, "storage_alignment", 128)
-        arg2_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg2, 0, 2, dtype="handle")
+        arg2_shape_data: T.Ptr("int64") = T.tvm_struct_get(arg2, 0, 2, dtype="handle")
         arg2_shape = T.Buffer([6], "int64", data=arg2_shape_data)
-        arg2_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg2, 0, 3, dtype="handle")
+        arg2_strides_data: T.Ptr("int64") = T.tvm_struct_get(arg2, 0, 3, dtype="handle")
         arg2_strides = T.Buffer([6], "int64", data=arg2_strides_data)
 
         assert (((arg0_code == 3) or (arg0_code == 13)) or (arg0_code == 7)) or (
@@ -3129,7 +3125,7 @@ def func_root_attr():
 
 def func_trivial_root_block():
     @T.prim_func
-    def func(A: T.Buffer[1, "int32"]):
+    def func(A: T.Buffer(1, "int32")):
         with T.block("root"):
             A[0] = 0
 
@@ -3138,7 +3134,7 @@ def func(A: T.Buffer[1, "int32"]):
 
 def func_nested_root_block():
     @T.prim_func
-    def func(A: T.Buffer[1, "int32"]):
+    def func(A: T.Buffer(1, "int32")):
         with T.block("root"):
             with T.block("block"):
                 A[0] = 0
@@ -3149,7 +3145,7 @@ def func(A: T.Buffer[1, "int32"]):
 def func_T_ptr_let_statement():
     @T.prim_func
     def func_T_ptr_let_statement(
-        args: T.handle, arg_type_ids_handle: T.Ptr[T.int32], num_args: T.int32
+        args: T.handle, arg_type_ids_handle: T.Ptr("int32"), num_args: T.int32
     ) -> None:
         # The T.Ptr declaration in the parameter list should parse
         # correctly, and should be usable as the data pointer in a buffer.
@@ -3161,14 +3157,14 @@ def func_T_ptr_let_statement(
         # Functions that return a "handle" can be assigned to a T.Ptr
         # variable.  A variable annotated with T.Ptr still has dtype of
         # T.handle, but has type annotation as a pointer type.
-        A_data: T.Ptr[T.float32] = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
+        A_data: T.Ptr("float32") = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
 
         # The buffer declaration has a data pointer defined earlier in
         # this function.  It should only be defined after the data pointer
         # has been defined, and should not be hoisted into the header of
         # the function as other buffer_decl statements can be.
         A = T.Buffer([1024], dtype="float32", data=A_data)
-        B_data: T.Ptr[T.float32] = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
+        B_data: T.Ptr("float32") = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
         B = T.Buffer([1024], dtype="float32", data=B_data)
 
         B[0] = A[0]
@@ -3188,7 +3184,7 @@ def func_T_ptr_allocate() -> None:
 
 def llvm_intrin_call():
     @T.prim_func
-    def ctpop(A: T.Buffer[(16,), "uint8"], B: T.Buffer[(16,), "uint8"]) -> None:
+    def ctpop(A: T.Buffer((16,), "uint8"), B: T.Buffer((16,), "uint8")) -> None:
         for i in range(0, 16):
             with T.block("A"):
                 vi = T.axis.remap(
@@ -3270,13 +3266,13 @@ def string_annotation_of_special_chars():
 
 def pointer_type():
     @T.prim_func
-    def func_with_ptr_type_annotations(x: T.Ptr[T.int32], y: T.Ptr[T.int32, "shared"]):
+    def func_with_ptr_type_annotations(x: T.Ptr("int32"), y: T.Ptr("int32", "shared")):
         xx_data = T.allocate([16], "int32", "global")
         xx = T.Buffer(shape=[16], dtype="int32", scope="global", data=xx_data)
         yy_data = T.allocate([16], "int32", "shared")
         yy = T.Buffer(shape=[16], dtype="int32", scope="shared", data=yy_data)
-        a: T.Ptr[T.int32] = T.address_of(xx[0], dtype="handle")
-        b: T.Ptr[T.int32, "shared"] = T.address_of(yy[0], dtype="handle")
+        a: T.Ptr("int32") = T.address_of(xx[0], dtype="handle")
+        b: T.Ptr("int32", "shared") = T.address_of(yy[0], dtype="handle")
         T.evaluate(T.call_extern("copy", a, b, dtype=""))
 
     return func_with_ptr_type_annotations
@@ -3328,7 +3324,7 @@ def func():
 
 def void_ptr():
     @T.prim_func
-    def func(out_ret_value: T.Ptr[T.void]):
+    def func(out_ret_value: T.Ptr("void")):
         T.evaluate(out_ret_value)
 
     return func
@@ -3336,7 +3332,7 @@ def func(out_ret_value: T.Ptr[T.void]):
 
 def decl_buffer():
     @T.prim_func
-    def func(A: T.Buffer[(16, 16), "float32"], B: T.Buffer[(16, 16), "float32"]) -> None:
+    def func(A: T.Buffer((16, 16), "float32"), B: T.Buffer((16, 16), "float32")) -> None:
         A_flattened = T.decl_buffer(data=A.data, shape=(256,), dtype="float32")
         B_flattened = T.decl_buffer(data=B.data, shape=(256,), dtype="float32")
         C_alias = T.decl_buffer(data=A_flattened.data, shape=(256,), dtype="float32")
@@ -3348,7 +3344,7 @@ def func(A: T.Buffer[(16, 16), "float32"], B: T.Buffer[(16, 16), "float32"]) ->
 
 def allocate_and_decl_buffer():
     @T.prim_func
-    def func(A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"]) -> None:
+    def func(A: T.Buffer((16,), "float32"), B: T.Buffer((16,), "float32")) -> None:
         D_data = T.allocate((16,), "float32", "global")
         D = T.decl_buffer((16,), "float32", data=D_data)
         for i in range(4):
@@ -3367,7 +3363,7 @@ def func(A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"]) -> None:
 def float_infinity():
     @T.prim_func
     def func(
-        placeholder: T.Buffer[(1, 512, 768), "float32"], T_isinf: T.Buffer[(1, 512, 768), "bool"]
+        placeholder: T.Buffer((1, 512, 768), "float32"), T_isinf: T.Buffer((1, 512, 768), "bool")
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -3445,7 +3441,7 @@ def func() -> None:
 
 def implicit_evaluate():
     @T.prim_func
-    def func(A: T.Buffer[1, "int32"]):
+    def func(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 5))
         A[0] = 10
 
@@ -3508,7 +3504,7 @@ def nested_boolean_expressions():
     def make_ir_generator(name, expression):
         def inner():
             @T.prim_func
-            def func(A: T.Buffer[1, "bool"], i: T.bool, j: T.bool, k: T.bool):
+            def func(A: T.Buffer(1, "bool"), i: T.bool, j: T.bool, k: T.bool):
                 A[0] = expression(i, j, k)
 
             return func
@@ -3524,7 +3520,7 @@ def func(A: T.Buffer[1, "bool"], i: T.bool, j: T.bool, k: T.bool):
 
 def multi_env_threads():
     @T.prim_func
-    def func(A: T.Buffer[128, "float32"], C: T.Buffer[128, "float32"]):
+    def func(A: T.Buffer(128, "float32"), C: T.Buffer(128, "float32")):
         B = T.alloc_buffer([128], dtype="float32")
         for i in T.thread_binding(128, thread="threadIdx.x"):
             B[i] = A[i] + 1.0
diff --git a/tests/python/unittest/test_tvmscript_syntax_sugar.py b/tests/python/unittest/test_tvmscript_syntax_sugar.py
index 35f9e6c2e635..a840722bea8c 100644
--- a/tests/python/unittest/test_tvmscript_syntax_sugar.py
+++ b/tests/python/unittest/test_tvmscript_syntax_sugar.py
@@ -121,8 +121,8 @@ def elementwise_buffer_kwargs(
 # match buffer - use buffer without kwargs
 @T.prim_func
 def elementwise_buffer_no_kwargs(
-    a: T.Buffer[(128, 128, 128, 128), "float32"],
-    b: T.Buffer[(128, 128, 128, 128), "float32"],
+    a: T.Buffer((128, 128, 128, 128), "float32"),
+    b: T.Buffer((128, 128, 128, 128), "float32"),
 ) -> None:
     for i, j, k, l in T.grid(128, 128, 128, 128):
         with T.block("B"):
@@ -145,7 +145,7 @@ def func_no_sugar(a: T.handle):
             A[i] = 0.0
 
     @T.prim_func
-    def func_with_sugar(A: T.Buffer[16, "float32"]):
+    def func_with_sugar(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             A[i] = 0.0
 
@@ -191,8 +191,8 @@ def match_buffer_int64(a: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def match_buffer_int64_after_roundtrip(
-    A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-    C: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+    A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+    C: T.Buffer((T.int64(128), T.int64(128)), "float32"),
 ) -> None:
     B = T.alloc_buffer((T.int64(128), T.int64(128)), dtype="float32")
     for i, j in T.grid(128, 128):
@@ -213,13 +213,13 @@ def test_match_buffer_int64():
 
 def test_match_buffer_region_has_implicit_shape_dtype():
     @T.prim_func
-    def explicit_shape_dtype(A: T.Buffer[(16, 64), "int32"]):
+    def explicit_shape_dtype(A: T.Buffer((16, 64), "int32")):
         with T.block():
             B = T.match_buffer(A[8:16, 32:64], shape=(8, 32), dtype="int32")
             T.evaluate(0)
 
     @T.prim_func
-    def implicit_shape_dtype(A: T.Buffer[(16, 64), "int32"]):
+    def implicit_shape_dtype(A: T.Buffer((16, 64), "int32")):
         with T.block():
             B = T.match_buffer(A[8:16, 32:64])
             T.evaluate(0)
@@ -245,7 +245,7 @@ def test_letstmt_bufferload_without_type_annotation():
 
     # Failure occurred during parsing of the tvmscript.
     @T.prim_func
-    def func_without_type_annotation(A: T.Buffer[(1,), "int32"]):
+    def func_without_type_annotation(A: T.Buffer((1,), "int32")):
         x = A[0]
         T.evaluate(x)
 
@@ -350,8 +350,8 @@ def mma_sync_m16n16k16_desc_manual(a: T.handle, b: T.handle, c: T.handle) -> Non
 def test_int64_loop():
     @T.prim_func
     def int64_grid(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
     ) -> None:
         for i, j in T.grid(T.int64(128), T.int64(128)):
             with T.block("C"):
@@ -360,8 +360,8 @@ def int64_grid(
 
     @T.prim_func
     def int64_grid_expanded(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
     ) -> None:
         for i in range(T.int64(0), T.int64(128)):
             for j in range(T.int64(0), T.int64(128)):
@@ -375,12 +375,12 @@ def int64_grid_expanded(
 
 def test_implicit_evaluate_assume():
     @T.prim_func
-    def explicit(A: T.Buffer[1, "int32"]):
+    def explicit(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 5))
         A[0] = 10
 
     @T.prim_func
-    def implicit(A: T.Buffer[1, "int32"]):
+    def implicit(A: T.Buffer(1, "int32")):
         T.assume(A[0] == 5)
         A[0] = 10
 
@@ -389,11 +389,11 @@ def implicit(A: T.Buffer[1, "int32"]):
 
 def test_implicit_evaluate_call_extern():
     @T.prim_func
-    def explicit(A: T.Buffer[1, "int32"]):
+    def explicit(A: T.Buffer(1, "int32")):
         T.evaluate(T.call_extern("extern_func", A.data, dtype="int32"))
 
     @T.prim_func
-    def implicit(A: T.Buffer[1, "int32"]):
+    def implicit(A: T.Buffer(1, "int32")):
         T.call_extern("extern_func", A.data, dtype="int32")
 
     assert_structural_equal(implicit, explicit)