diff --git a/apps/pt_tvmdsoop/tests/test_as_torch.py b/apps/pt_tvmdsoop/tests/test_as_torch.py
index 0243e86edebd3..684dcd4396050 100644
--- a/apps/pt_tvmdsoop/tests/test_as_torch.py
+++ b/apps/pt_tvmdsoop/tests/test_as_torch.py
@@ -52,7 +52,7 @@ def main(a: T.handle, b: T.handle, c: T.handle) -> None:
 @tvm.script.ir_module
 class ModuleGPU:
     @T.prim_func
-    def main(A: T.Buffer[8, "float32"], B: T.Buffer[8, "float32"]) -> None:
+    def main(A: T.Buffer(8, "float32"), B: T.Buffer(8, "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i_0 in T.thread_binding(2, thread="blockIdx.x"):
             for i_2 in T.thread_binding(2, thread="threadIdx.x"):
diff --git a/apps/pt_tvmdsoop/tests/test_boolean_tensor.py b/apps/pt_tvmdsoop/tests/test_boolean_tensor.py
index 4718b40439453..540cef0c90a3f 100644
--- a/apps/pt_tvmdsoop/tests/test_boolean_tensor.py
+++ b/apps/pt_tvmdsoop/tests/test_boolean_tensor.py
@@ -81,10 +81,10 @@ def test_tensor_boolean_operation():
 @as_torch
 @T.prim_func
 def negate_tvmscript(
-    X: T.Buffer[(8, 8), "bool"],
-    Y: T.Buffer[(8, 8), "float32"],
-    Z: T.Buffer[(8, 8), "bool"],
-    U: T.Buffer[(8, 8), "float32"],
+    X: T.Buffer((8, 8), "bool"),
+    Y: T.Buffer((8, 8), "float32"),
+    Z: T.Buffer((8, 8), "bool"),
+    U: T.Buffer((8, 8), "float32"),
 ) -> None:
     for i, j in T.grid(8, 8):
         with T.block():
diff --git a/include/tvm/script/printer/doc.h b/include/tvm/script/printer/doc.h
index 6321caa4e0574..4a2d7df1adec6 100644
--- a/include/tvm/script/printer/doc.h
+++ b/include/tvm/script/printer/doc.h
@@ -774,7 +774,7 @@ class AssignDocNode : public StmtDocNode {
   /*!
    * \brief The right hand side of the assignment.
    *
-   * If null, this doc represents declaration, e.g. `A: T.Buffer[(1,2)]`
+   * If null, this doc represents declaration, e.g. `A: T.Buffer((1,2))`
    * */
   Optional<ExprDoc> rhs;
   /*! \brief The type annotation of this assignment. */
diff --git a/include/tvm/tir/transform.h b/include/tvm/tir/transform.h
index 829594d61b98e..be7589b04bf54 100644
--- a/include/tvm/tir/transform.h
+++ b/include/tvm/tir/transform.h
@@ -576,7 +576,7 @@ TVM_DLL Pass UnifiedStaticMemoryPlanner();
  *
  * \code{.py}
  * @T.prim_func
- * def before_transform(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]) -> None:
+ * def before_transform(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")) -> None:
  *     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
  *         for i in T.serial(0, 16,
  *                           annotations={"software_pipeline_stage": [0, 1],
@@ -601,7 +601,7 @@ TVM_DLL Pass UnifiedStaticMemoryPlanner();
  *
  * \code{.py}
  * @T.prim_func
- * def after_transform(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]) -> None:
+ * def after_transform(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")) -> None:
  *     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
  *         with T.block():
  *             T.reads([A[tx, 0:16]])
diff --git a/python/tvm/ir/base.py b/python/tvm/ir/base.py
index 5df529b0532fd..5f3a679591d1d 100644
--- a/python/tvm/ir/base.py
+++ b/python/tvm/ir/base.py
@@ -282,3 +282,34 @@ def structural_hash(node, map_free_vars=False):
     structrual_equal
     """
     return _ffi_node_api.StructuralHash(node, map_free_vars)  # type: ignore # pylint: disable=no-member
+
+
+def deprecated(
+    method_name: str,
+    new_method_name: str,
+):
+    """A decorator to indicate that a method is deprecated
+
+    Parameters
+    ----------
+    method_name : str
+        The name of the method to deprecate
+    new_method_name : str
+        The name of the new method to use instead
+    """
+    import functools  # pylint: disable=import-outside-toplevel
+    import warnings  # pylint: disable=import-outside-toplevel
+
+    def _deprecate(func):
+        @functools.wraps(func)
+        def _wrapper(*args, **kwargs):
+            warnings.warn(
+                f"{method_name} is deprecated, use {new_method_name} instead",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            return func(*args, **kwargs)
+
+        return _wrapper
+
+    return _deprecate
diff --git a/python/tvm/micro/contrib/stm32/__init__.py b/python/tvm/micro/contrib/stm32/__init__.py
index 80e57f2d3912a..8558f53359551 100755
--- a/python/tvm/micro/contrib/stm32/__init__.py
+++ b/python/tvm/micro/contrib/stm32/__init__.py
@@ -1,20 +1,20 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Module container of STM32 code generator."""
-
-from .emitter import CodeEmitter, get_input_tensor_name, get_output_tensor_name
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Module container of STM32 code generator."""
+
+from .emitter import CodeEmitter, get_input_tensor_name, get_output_tensor_name
diff --git a/python/tvm/parser.py b/python/tvm/parser.py
index 63c40deb2069a..b79682d8907b1 100644
--- a/python/tvm/parser.py
+++ b/python/tvm/parser.py
@@ -16,9 +16,12 @@
 # under the License.
 # pylint: disable=invalid-name
 """The legacy TVM parser """
+from .ir.base import deprecated
+
 # pylint: disable=import-outside-toplevel
 
 
+@deprecated("tvm.parser.parse", "tvm.relay.parse")
 def parse(*args, **kwargs):
     """Deprecated, use `tvm.relay.parse` instead"""
     from tvm.relay import parse as _impl
@@ -26,6 +29,7 @@ def parse(*args, **kwargs):
     return _impl(*args, **kwargs)
 
 
+@deprecated("tvm.parser.parse_expr", "tvm.relay.parse_expr")
 def parse_expr(*args, **kwargs):
     """Deprecated, use `tvm.relay.parse_expr` instead"""
     from tvm.relay import parse_expr as _impl
@@ -33,6 +37,7 @@ def parse_expr(*args, **kwargs):
     return _impl(*args, **kwargs)
 
 
+@deprecated("tvm.parser.fromtext", "tvm.relay.fromtext")
 def fromtext(*args, **kwargs):
     """Deprecated, use `tvm.relay.fromtext` instead"""
     from tvm.relay import fromtext as _impl
@@ -40,6 +45,7 @@ def fromtext(*args, **kwargs):
     return _impl(*args, **kwargs)
 
 
+@deprecated("tvm.parser.SpanCheck", "tvm.relay.SpanCheck")
 def SpanCheck(*args, **kwargs):
     """Deprecated, use `tvm.relay.SpanCheck` instead"""
     from tvm.relay import SpanCheck as _impl
diff --git a/python/tvm/script/parser/core/utils.py b/python/tvm/script/parser/core/utils.py
index 453ac18b382b7..6a693df12f89f 100644
--- a/python/tvm/script/parser/core/utils.py
+++ b/python/tvm/script/parser/core/utils.py
@@ -15,7 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 """TVM Script Parser utils"""
-
 import inspect
 from types import FrameType
 from typing import Any, Callable, Dict, List
diff --git a/python/tvm/script/parser/tir/entry.py b/python/tvm/script/parser/tir/entry.py
index e7ec7cf886d49..bacf92c14287e 100644
--- a/python/tvm/script/parser/tir/entry.py
+++ b/python/tvm/script/parser/tir/entry.py
@@ -18,6 +18,7 @@
 import inspect
 from typing import Callable, Union
 
+from tvm.ir.base import deprecated
 from tvm.tir import Buffer, PrimFunc
 
 from ...ir_builder.tir import buffer_decl, ptr
@@ -49,7 +50,7 @@ def prim_func(func: Callable) -> Union[PrimFunc, Callable]:
 
 class BufferProxy:
     """Buffer proxy class for constructing tir buffer.
-    Overload __call__ and __getitem__ to support syntax as T.Buffer() and T.Buffer[].
+    Overload __call__ and __getitem__ to support syntax as T.Buffer() and T.Buffer().
     """
 
     def __call__(
@@ -78,6 +79,7 @@ def __call__(
             axis_separators=axis_separators,
         )
 
+    @deprecated("T.Buffer(...)", "T.Buffer(...)")
     def __getitem__(self, keys) -> Buffer:
         if not isinstance(keys, tuple):
             return self(keys)
@@ -88,7 +90,7 @@ def __getitem__(self, keys) -> Buffer:
 
 class PtrProxy:
     """Ptr proxy class for constructing tir pointer.
-    Overload __call__ and __getitem__ to support syntax as T.Ptr() and T.Ptr[].
+    Overload __call__ and __getitem__ to support syntax as T.Ptr() and T.Ptr().
     """
 
     def __call__(self, dtype, storage_scope="global"):
@@ -96,6 +98,7 @@ def __call__(self, dtype, storage_scope="global"):
             dtype = dtype().dtype
         return ptr(dtype, storage_scope)  # pylint: disable=no-member # type: ignore
 
+    @deprecated("T.Ptr(...)", "T.Ptr(...)")
     def __getitem__(self, keys):
         if not isinstance(keys, tuple):
             return self(keys)
diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
index 19669cd60cf44..5f0e94869d05a 100644
--- a/python/tvm/testing/utils.py
+++ b/python/tvm/testing/utils.py
@@ -1932,13 +1932,13 @@ class object that inherits from `Exception`.
         class TestRemoveIf(tvm.testing.CompareBeforeAfter):
             transform = tvm.tir.transform.Simplify()
 
-            def before(A: T.Buffer[1, "int32"]):
+            def before(A: T.Buffer(1, "int32")):
                 if True:
                     A[0] = 42
                 else:
                     A[0] = 5
 
-            def expected(A: T.Buffer[1, "int32"]):
+            def expected(A: T.Buffer(1, "int32")):
                 A[0] = 42
 
     """
diff --git a/python/tvm/tir/schedule/schedule.py b/python/tvm/tir/schedule/schedule.py
index 6a71e5872fcd6..4727b4a5f00ec 100644
--- a/python/tvm/tir/schedule/schedule.py
+++ b/python/tvm/tir/schedule/schedule.py
@@ -754,9 +754,9 @@ def add_unit_loop(self, block_or_loop: Union[LoopRV, BlockRV]) -> LoopRV:
 
             @T.prim_func
             def before_add_unit_loop(
-                A: T.Buffer[(), "int32"],
-                B: T.Buffer[(), "int32"],
-                C: T.Buffer[(), "int32"],
+                A: T.Buffer((), "int32"),
+                B: T.Buffer((), "int32"),
+                C: T.Buffer((), "int32"),
             ) -> None:
                 with T.block("C"):
                     vi = T.axis.spatial(1, 0)
@@ -776,9 +776,9 @@ def before_add_unit_loop(
 
             @T.prim_func
             def after_add_unit_loop(
-                A: T.Buffer[(), "int32"],
-                B: T.Buffer[(), "int32"],
-                C: T.Buffer[(), "int32"],
+                A: T.Buffer((), "int32"),
+                B: T.Buffer((), "int32"),
+                C: T.Buffer((), "int32"),
             ) -> None:
                 for u in T.serial(1):
                     with T.block("C"):
@@ -1240,7 +1240,7 @@ def cache_inplace(
         .. code-block:: python
 
             @T.prim_func
-            def before_cache_inplace(data_io: T.Buffer[(64), "int32"]):
+            def before_cache_inplace(data_io: T.Buffer((64), "int32")):
                 for i0 in T.serial(1):
                     with T.block("A"):
                         T.reads(data_io[:64])
@@ -1261,7 +1261,7 @@ def before_cache_inplace(data_io: T.Buffer[(64), "int32"]):
         .. code-block:: python
 
             @T.prim_func
-            def cache_inplace(data_io: T.Buffer[64, "int32"]) -> None:
+            def cache_inplace(data_io: T.Buffer(64, "int32")) -> None:
                 data_io_local = T.alloc_buffer([64], dtype="int32", scope="local")
                 for i0 in T.serial(1):
                     for ax0 in T.serial(64):
@@ -1350,7 +1350,7 @@ def resize(a: T.handle, b: T.handle) -> None:
 
             @T.prim_func
             def resize_cache_index(
-                A: T.Buffer[(1, 3, 40, 40), "float32"], B: T.Buffer[(1, 3, 80, 80), "float32"]
+                A: T.Buffer((1, 3, 40, 40), "float32"), B: T.Buffer((1, 3, 80, 80), "float32")
             ) -> None:
                 index_var_0 = T.alloc_buffer([80, 80], dtype="int32", strides=[1])
                 index_var_1 = T.alloc_buffer([80], dtype="int32", strides=[1])
@@ -1431,8 +1431,8 @@ def reindex(
 
             @T.prim_func
             def before_reindex(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32")
             ) -> None:
                 for i, j in T.grid(128, 128):
                     with T.block("B"):
@@ -1453,8 +1453,8 @@ def before_reindex(
 
             @T.prim_func
             def after_reindex(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32")
             ) -> None:
                 A_reindex = T.alloc_buffer((128, 128), "float32")
                 for i, j in T.grid(128, 128):
@@ -2151,7 +2151,7 @@ def set_scope(self, block: Union[BlockRV, str], buffer_index: int, storage_scope
 
             @T.prim_func
             def before_set_scope(
-                A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
             ) -> None:
                 B = T.alloc_buffer((128, 128), dtype="float32")
 
@@ -2178,7 +2178,7 @@ def before_set_scope(
 
             @T.prim_func
             def after_set_scope(
-                A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
             ) -> None:
                 B_shared = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
 
@@ -2227,8 +2227,8 @@ def blockize(self, loop: LoopRV, preserve_unit_iters: bool = True) -> BlockRV:
 
             @T.prim_func
             def before_blockize(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32")
             ) -> None:
                 for i_0, j_0, i_1, j_1 in T.grid(8, 8, 16, 16):
                     with T.block("B"):
@@ -2254,8 +2254,8 @@ def before_blockize(
 
             @T.prim_func
             def after_blockize(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32")
             )-> None:
                 for i_0, j_0 in T.grid(8, 8):
                     with T.block("B_o"):
@@ -2305,9 +2305,9 @@ def tensorize(
 
             @T.prim_func
             def before_tensorize(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"],
-                C: T.Buffer[(128, 128), "float32"],
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32"),
+                C: T.Buffer((128, 128), "float32"),
             ) -> None:
                 # body
                 # with T.block("root")
@@ -2380,9 +2380,9 @@ def mma_intrin(a: T.handle, b: T.handle, c: T.handle) -> None:
 
             @T.prim_func
             def after_tensorize(
-                A: T.Buffer[(128, 128), "float32"],
-                B: T.Buffer[(128, 128), "float32"],
-                C: T.Buffer[(128, 128), "float32"],
+                A: T.Buffer((128, 128), "float32"),
+                B: T.Buffer((128, 128), "float32"),
+                C: T.Buffer((128, 128), "float32"),
             ) -> None:
                 # body
                 # with T.block("root")
@@ -2819,8 +2819,8 @@ def transform_block_layout(
 
             @T.prim_func
             def before_transform_block_layout(
-                A: T.Buffer[(16, 16), "float32"],
-                B: T.Buffer[(16, 16), "float32"]
+                A: T.Buffer((16, 16), "float32"),
+                B: T.Buffer((16, 16), "float32")
             ) -> None:
                 for i, j in T.grid(16, 16):
                     with T.block("B"):
@@ -2841,8 +2841,8 @@ def before_transform_block_layout(
 
             @T.prim_func
             def after_transform_block_layout(
-                A: T.Buffer[(16, 16), "float32"],
-                B: T.Buffer[(16, 16), "float32"]
+                A: T.Buffer((16, 16), "float32"),
+                B: T.Buffer((16, 16), "float32")
             ) -> None:
                 for i in range(256):
                     with T.block("B"):
@@ -2903,7 +2903,7 @@ def set_axis_separator(
 
             @T.prim_func
             def before_set_axis_separator(
-                A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
             ) -> None:
                 B = T.alloc_buffer((128, 128), dtype="float32")
 
@@ -2931,7 +2931,7 @@ def before_set_axis_separator(
 
             @T.prim_func
             def after_set_axis_separators(
-                A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+                A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
             ) -> None:
                 B = T.alloc_buffer([128, 128], dtype="float32", axis_separators=[1])
 
@@ -2992,7 +2992,7 @@ def decompose_padding(self, block: Union[BlockRV, str], loop: LoopRV) -> BlockRV
         .. code-block:: python
 
             @T.prim_func
-            def before_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
+            def before_decompose(x: T.Buffer(128, "int32"), y: T.Buffer(140, "int32")):
                 for i in range(140):
                     with T.block("block"):
                         vi = T.axis.remap("S", [i])
@@ -3012,7 +3012,7 @@ def before_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
         .. code-block:: python
 
             @T.prim_func
-            def after_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
+            def after_decompose(x: T.Buffer(128, "int32"), y: T.Buffer(140, "int32")):
                 for i in T.serial(140):
                     with T.block("block_pad_const"):
                         vi = T.axis.spatial(140, i)
@@ -3067,9 +3067,9 @@ def pad_einsum(self, block: Union[BlockRV, str], padding: List[int]) -> None:
 
             @T.prim_func
             def before_pad_einsum(
-                A: T.Buffer[(128, 127), "float32"],
-                B: T.Buffer[(127, 127), "float32"],
-                C: T.Buffer[(128, 127), "float32"],
+                A: T.Buffer((128, 127), "float32"),
+                B: T.Buffer((127, 127), "float32"),
+                C: T.Buffer((128, 127), "float32"),
             ) -> None:
                 A_shared = T.alloc_buffer((128, 127), "float32", scope="shared")
                 B_shared = T.alloc_buffer((127, 127), "float32", scope="shared")
@@ -3108,9 +3108,9 @@ def before_pad_einsum(
 
             @T.prim_func
             def after_pad_einsum(
-                A: T.Buffer[(128, 127), "float32"],
-                B: T.Buffer[(127, 127), "float32"],
-                C: T.Buffer[(128, 127), "float32"],
+                A: T.Buffer((128, 127), "float32"),
+                B: T.Buffer((127, 127), "float32"),
+                C: T.Buffer((128, 127), "float32"),
             ) -> None:
                 A_shared_padded = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
                 B_shared_padded = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
@@ -3193,7 +3193,7 @@ def rolling_buffer(
 
             @T.prim_func
             def before_rolling_buffer(
-                A: T.Buffer[(12, 12), "int8"], C: T.Buffer[(8, 8), "int8"]
+                A: T.Buffer((12, 12), "int8"), C: T.Buffer((8, 8), "int8")
             ) -> None:
                 # body
                 # with T.block("root")
@@ -3230,8 +3230,8 @@ def before_rolling_buffer(
 
             @T.prim_func
             def after_rolling_buffer(
-                A: T.Buffer[(12, 12), "int8"],
-                C: T.Buffer[(8, 8), "int8"]
+                A: T.Buffer((12, 12), "int8"),
+                C: T.Buffer((8, 8), "int8")
             ) -> None:
                 # body
                 # with T.block("root")
diff --git a/python/tvm/topi/hexagon/qnn/adaptive_avg_pool1d.py b/python/tvm/topi/hexagon/qnn/adaptive_avg_pool1d.py
index 80f1cd1ecf789..14bdd45b56f73 100755
--- a/python/tvm/topi/hexagon/qnn/adaptive_avg_pool1d.py
+++ b/python/tvm/topi/hexagon/qnn/adaptive_avg_pool1d.py
@@ -1,120 +1,120 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-""" Compute and schedule for adaptive_avg_pool1d slice op
-
-Following are few notes and assumptions made by the implementation:
-
-Assumptions:
-1) The input is in NCW layout. Distilbert is the only model that calls
-   nn.adaptive_avg_pool1d and the only layout it uses is 'NCW'.
-2) The op takes output_size as an argument and
-   only handles the specialized case where output_size is 1.
-   The argument output_size is used as the value of output_width.
-3) Both input and output dtype is uint8/int8 and
-   quantization parameter is provided to the op.
-4) Input is assumed to always be multiple of fixed chunk 32c64w.
-
-Notes:
-1) If input width is used as output width, there can be two cases:
-    a. If the quantization parameters of input and output are same,
-       it can return the input as output so the op will be a no-op.
-    b. If the quantization parameters of input and output are different,
-       it will essentially be a requantize op.
-2) If output_size is a value besides 1 or input_width,
-   adaptive_avg_pool1d may use dynamic stride and kernel for each output element.
-   When this case occurs, kernel won't be known at compile time. We want to use
-   the generic implementation nn.adaptive_avg_pool1d() for this case.
-"""
-
-from tvm import te
-from tvm import tir
-from ..utils import get_layout_transform_fn, get_fixed_point_value, saturate
-
-
-def adaptive_avg_pool1d(
-    data: te.Tensor,
-    output_size: list,
-    odtype: str,
-    input_zero_point: int,
-    input_scale: float,
-    output_zero_point: int,
-    output_scale: float,
-):
-    """adaptive_avg_pool1d compute"""
-    _, _, inw = data.shape
-
-    out_width = output_size[0]
-
-    n, c = data.shape[:2]
-    oshape = (n, c) + (out_width,)
-
-    # Kernel is same as input_width since output_width is assumed to be 1
-    if out_width == 1:
-        kw_r = inw
-    else:
-        raise RuntimeError(f"Unsupported output_size, {out_width}'")
-
-    if odtype == "uint8":
-        temp_dtype = "uint32"
-    elif odtype == "int8":
-        temp_dtype = "int32"
-    else:
-        raise RuntimeError(f"Unsupported output dtype, {odtype}'")
-
-    scale_with_area = input_scale / (output_scale * int(kw_r))
-    scale_fixed_point, rsh = get_fixed_point_value(scale_with_area, "int16")
-    corr = (output_zero_point << rsh) - input_zero_point * kw_r * scale_fixed_point
-
-    rw_r = te.reduce_axis((0, kw_r), name="rw_r")
-
-    sum_compute = te.compute(
-        oshape,
-        lambda n, c, w: te.sum(data[n, c, w + rw_r].astype(temp_dtype), axis=[rw_r]),
-        name="sum",
-    )
-
-    avg_compute = te.compute(
-        oshape,
-        lambda n, c, w: saturate(
-            ((sum_compute[n, c, w] * scale_fixed_point) + corr) >> rsh, odtype
-        ).astype(odtype),
-        name="adaptive_avg_1d",
-    )
-    return avg_compute
-
-
-def stir_schedule_ncw_32c64w(outs, ins, input_layout: str):
-    """Schedule for input layout ncw-32c64w and output layout ncw"""
-    func = te.create_prim_func([ins, outs])
-    s = tir.Schedule(func)
-
-    sum_block = s.get_block("sum")
-
-    # Input is multiple of fixed chunk but output is NxCx1
-    # Hence transform_layout is only applied on input
-    input_transformed_layout = get_layout_transform_fn(input_layout)
-    s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout)
-
-    return s
-
-
-def tir_adaptive_avg_pool1d_schedule(outs, ins, output_layout: str, input_layout: str):
-    """STIR based schedule"""
-    if output_layout == "ncw":
-        return stir_schedule_ncw_32c64w(outs, ins, input_layout)
-    raise RuntimeError(f"Unexpected layout '{output_layout}'")
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+""" Compute and schedule for adaptive_avg_pool1d slice op
+
+Following are few notes and assumptions made by the implementation:
+
+Assumptions:
+1) The input is in NCW layout. Distilbert is the only model that calls
+   nn.adaptive_avg_pool1d and the only layout it uses is 'NCW'.
+2) The op takes output_size as an argument and
+   only handles the specialized case where output_size is 1.
+   The argument output_size is used as the value of output_width.
+3) Both input and output dtype is uint8/int8 and
+   quantization parameter is provided to the op.
+4) Input is assumed to always be multiple of fixed chunk 32c64w.
+
+Notes:
+1) If input width is used as output width, there can be two cases:
+    a. If the quantization parameters of input and output are same,
+       it can return the input as output so the op will be a no-op.
+    b. If the quantization parameters of input and output are different,
+       it will essentially be a requantize op.
+2) If output_size is a value besides 1 or input_width,
+   adaptive_avg_pool1d may use dynamic stride and kernel for each output element.
+   When this case occurs, kernel won't be known at compile time. We want to use
+   the generic implementation nn.adaptive_avg_pool1d() for this case.
+"""
+
+from tvm import te
+from tvm import tir
+from ..utils import get_layout_transform_fn, get_fixed_point_value, saturate
+
+
+def adaptive_avg_pool1d(
+    data: te.Tensor,
+    output_size: list,
+    odtype: str,
+    input_zero_point: int,
+    input_scale: float,
+    output_zero_point: int,
+    output_scale: float,
+):
+    """adaptive_avg_pool1d compute"""
+    _, _, inw = data.shape
+
+    out_width = output_size[0]
+
+    n, c = data.shape[:2]
+    oshape = (n, c) + (out_width,)
+
+    # Kernel is same as input_width since output_width is assumed to be 1
+    if out_width == 1:
+        kw_r = inw
+    else:
+        raise RuntimeError(f"Unsupported output_size, {out_width}'")
+
+    if odtype == "uint8":
+        temp_dtype = "uint32"
+    elif odtype == "int8":
+        temp_dtype = "int32"
+    else:
+        raise RuntimeError(f"Unsupported output dtype, {odtype}'")
+
+    scale_with_area = input_scale / (output_scale * int(kw_r))
+    scale_fixed_point, rsh = get_fixed_point_value(scale_with_area, "int16")
+    corr = (output_zero_point << rsh) - input_zero_point * kw_r * scale_fixed_point
+
+    rw_r = te.reduce_axis((0, kw_r), name="rw_r")
+
+    sum_compute = te.compute(
+        oshape,
+        lambda n, c, w: te.sum(data[n, c, w + rw_r].astype(temp_dtype), axis=[rw_r]),
+        name="sum",
+    )
+
+    avg_compute = te.compute(
+        oshape,
+        lambda n, c, w: saturate(
+            ((sum_compute[n, c, w] * scale_fixed_point) + corr) >> rsh, odtype
+        ).astype(odtype),
+        name="adaptive_avg_1d",
+    )
+    return avg_compute
+
+
+def stir_schedule_ncw_32c64w(outs, ins, input_layout: str):
+    """Schedule for input layout ncw-32c64w and output layout ncw"""
+    func = te.create_prim_func([ins, outs])
+    s = tir.Schedule(func)
+
+    sum_block = s.get_block("sum")
+
+    # Input is multiple of fixed chunk but output is NxCx1
+    # Hence transform_layout is only applied on input
+    input_transformed_layout = get_layout_transform_fn(input_layout)
+    s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout)
+
+    return s
+
+
+def tir_adaptive_avg_pool1d_schedule(outs, ins, output_layout: str, input_layout: str):
+    """STIR based schedule"""
+    if output_layout == "ncw":
+        return stir_schedule_ncw_32c64w(outs, ins, input_layout)
+    raise RuntimeError(f"Unexpected layout '{output_layout}'")
diff --git a/python/tvm/topi/hexagon/qnn/global_avg_pool2d.py b/python/tvm/topi/hexagon/qnn/global_avg_pool2d.py
index 1c171be8976e6..24d5224f71cfc 100755
--- a/python/tvm/topi/hexagon/qnn/global_avg_pool2d.py
+++ b/python/tvm/topi/hexagon/qnn/global_avg_pool2d.py
@@ -1,95 +1,95 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Assumptions:
-1) The input is in NCHW layout. Squeezenet is the only model that calls
-   nn.global_avg_pool2d and the only layout it uses is 'NCHW'.
-2) Both input and output dtype is uint8 and
-   quantization parameter is provided to the op.
-3) Input is assumed to always be multiple of fixed chunk 32c8h8w.
-"""
-
-from tvm import te
-from tvm import tir
-from ..utils import get_layout_transform_fn, get_fixed_point_value, saturate
-
-
-def global_avg_pool2d_u8(
-    data: te.Tensor,
-    odtype: str,
-    input_zero_point: int,
-    input_scale: float,
-    output_zero_point: int,
-    output_scale: float,
-):
-    """global_avg_pool2d"""
-    input_b, input_c, input_h, input_w = data.shape
-    oshape = (input_b, input_c) + (1, 1)
-
-    if input_h * input_w < 256:
-        bits = "16"
-    else:
-        bits = "32"
-
-    if odtype == "uint8":
-        temp_dtype = "uint" + bits
-    elif odtype == "int8":
-        temp_dtype = "int" + bits
-    else:
-        raise RuntimeError(f"Unsupported output dtype, {odtype}'")
-
-    pool_area = input_h * input_w
-    rh_r = te.reduce_axis((0, input_h), name="rh_r")
-    rw_r = te.reduce_axis((0, input_w), name="rw_r")
-
-    scale_with_area = input_scale / (output_scale * int(pool_area))
-    scale_fixed_point, rsh = get_fixed_point_value(scale_with_area, "int16")
-    corr = (output_zero_point << rsh) - input_zero_point * pool_area * scale_fixed_point
-
-    sum_compute = te.compute(
-        oshape,
-        lambda n, c, h, w: te.sum(
-            data[n, c, h + rh_r, w + rw_r].astype(temp_dtype), axis=[rh_r, rw_r]
-        ),
-        name="sum",
-    )
-
-    avg_compute = te.compute(
-        oshape,
-        lambda n, c, h, w: saturate(
-            ((sum_compute[n, c, h, w] * scale_fixed_point) + corr) >> rsh, odtype
-        ).astype(odtype),
-        name="global_avg_pool2d",
-    )
-
-    return avg_compute
-
-
-def stir_global_avg_pool2d_u8_schedule(outs: te.Tensor, ins: te.Tensor, input_layout: str):
-    """Schedule"""
-    func = te.create_prim_func([ins, outs])
-    s = tir.Schedule(func)
-
-    sum_block = s.get_block("sum")
-
-    # Input is multiple of fixed chunk but output is NxCx1x1
-    # Hence transform_layout is only applied on input
-    input_transformed_layout = get_layout_transform_fn(input_layout)
-    s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout)
-
-    return s
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Assumptions:
+1) The input is in NCHW layout. Squeezenet is the only model that calls
+   nn.global_avg_pool2d and the only layout it uses is 'NCHW'.
+2) Both input and output dtype is uint8 and
+   quantization parameter is provided to the op.
+3) Input is assumed to always be multiple of fixed chunk 32c8h8w.
+"""
+
+from tvm import te
+from tvm import tir
+from ..utils import get_layout_transform_fn, get_fixed_point_value, saturate
+
+
+def global_avg_pool2d_u8(
+    data: te.Tensor,
+    odtype: str,
+    input_zero_point: int,
+    input_scale: float,
+    output_zero_point: int,
+    output_scale: float,
+):
+    """global_avg_pool2d"""
+    input_b, input_c, input_h, input_w = data.shape
+    oshape = (input_b, input_c) + (1, 1)
+
+    if input_h * input_w < 256:
+        bits = "16"
+    else:
+        bits = "32"
+
+    if odtype == "uint8":
+        temp_dtype = "uint" + bits
+    elif odtype == "int8":
+        temp_dtype = "int" + bits
+    else:
+        raise RuntimeError(f"Unsupported output dtype, {odtype}'")
+
+    pool_area = input_h * input_w
+    rh_r = te.reduce_axis((0, input_h), name="rh_r")
+    rw_r = te.reduce_axis((0, input_w), name="rw_r")
+
+    scale_with_area = input_scale / (output_scale * int(pool_area))
+    scale_fixed_point, rsh = get_fixed_point_value(scale_with_area, "int16")
+    corr = (output_zero_point << rsh) - input_zero_point * pool_area * scale_fixed_point
+
+    sum_compute = te.compute(
+        oshape,
+        lambda n, c, h, w: te.sum(
+            data[n, c, h + rh_r, w + rw_r].astype(temp_dtype), axis=[rh_r, rw_r]
+        ),
+        name="sum",
+    )
+
+    avg_compute = te.compute(
+        oshape,
+        lambda n, c, h, w: saturate(
+            ((sum_compute[n, c, h, w] * scale_fixed_point) + corr) >> rsh, odtype
+        ).astype(odtype),
+        name="global_avg_pool2d",
+    )
+
+    return avg_compute
+
+
+def stir_global_avg_pool2d_u8_schedule(outs: te.Tensor, ins: te.Tensor, input_layout: str):
+    """Schedule"""
+    func = te.create_prim_func([ins, outs])
+    s = tir.Schedule(func)
+
+    sum_block = s.get_block("sum")
+
+    # Input is multiple of fixed chunk but output is NxCx1x1
+    # Hence transform_layout is only applied on input
+    input_transformed_layout = get_layout_transform_fn(input_layout)
+    s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout)
+
+    return s
diff --git a/python/tvm/topi/hexagon/qnn/qadd_qsub_qmul.py b/python/tvm/topi/hexagon/qnn/qadd_qsub_qmul.py
index 043ad313bdef7..a974ad6431074 100755
--- a/python/tvm/topi/hexagon/qnn/qadd_qsub_qmul.py
+++ b/python/tvm/topi/hexagon/qnn/qadd_qsub_qmul.py
@@ -1,270 +1,270 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-
-"""Compute and schedule for quantized add, multiply, subtract op
-
-Please note the following assumptions made by the implementation:
-
-1) The inputs will be multiple of crouton layout except for the axis that needs broadcasting."""
-
-from tvm import te
-from tvm import tir
-from ..utils import get_layout_transform_fn, get_fixed_point_value
-
-
-def broadcast_axis(tensor_A, tensor_B):
-    """Find out the indices that will have broadcasting"""
-    A_broadcast = []
-    B_broadcast = []
-
-    for i in range(len(tensor_A.shape)):
-        if tensor_A.shape[i] == tensor_B.shape[i]:
-            A_broadcast.append(1)
-            B_broadcast.append(1)
-        elif tensor_A.shape[i] == 1:
-            A_broadcast.append(0)
-            B_broadcast.append(1)
-        elif tensor_B.shape[i] == 1:
-            A_broadcast.append(1)
-            B_broadcast.append(0)
-    return A_broadcast, B_broadcast
-
-
-def saturate(x: te.Tensor, dtype: str):
-    """Saturate value for the specified data type"""
-    return te.max(te.min_value(dtype), te.min(x, te.max_value(dtype)))
-
-
-def get_int_scale(
-    scale_A: float,
-    scale_B: float,
-    scale_M: float,
-    zero_point_A: int,
-    zero_point_B: int,
-    zero_point_M: int,
-    op: str,
-):
-    """
-    Get fixed-point number and exp_scale_factor from topi.hexagon.utils.get_fixed_point_value.
-    Also, depending on the op, this function uses exp_scale_factor(log2 of the scale factor)
-    to adjust the output's zero_point.
-    """
-
-    C_recip = 1 / scale_M
-
-    if op == "qmul":
-        scale = scale_A * scale_B * C_recip
-        scale_fixed_point, rsh = get_fixed_point_value(scale, "int16")
-
-        # We need to adjust output's zero point value since the compute for the op is multiplied
-        # by a scaling factor.
-        # The scaling factor is 2^x where x is the exp_scale_factor which is assigned to rsh here.
-        # Since zero_point_M is multipled by 2^rsh while converting floating-point scale value
-        # into fixed-point number, we left shift it by rsh in our compute to reflect that.
-
-        corr = zero_point_M << rsh
-
-        return scale_fixed_point, rsh, corr
-
-    a_scale_f = scale_A * C_recip
-    b_scale_f = scale_B * C_recip
-    scale_fixed_point_a, rsh_a = get_fixed_point_value(a_scale_f, "int16")
-    scale_fixed_point_b, rsh_b = get_fixed_point_value(b_scale_f, "int16")
-
-    # Here we have two exp_scale_factors rsh_a and rsh_b.
-    # To avoid complexity, we want to use a common exp_scale_factor and
-    # we want to use the lowest of the two.
-
-    # Since, either of scale_fixed_point_a or scale_fixed_point_b has already been multiplied
-    # by 2^max(rsh_a, rsh_b) in topi.hexagon.utils.get_fixed_point_value,
-    # we want to undo that by right shifting that scale_fixed_point value
-    # by the difference of rsh_a and rsh_b.
-
-    # This results into having a common exp_scale_factor for both scale_fixed_point_a
-    # and scale_fixed_point_b.
-
-    # We also set rsh here which is used to adjust the zero_point_M and compute the corr value,
-    # computation of which comes from the original equation of the op's compute.
-
-    if rsh_a > rsh_b:
-        scale_fixed_point_a = scale_fixed_point_a >> (rsh_a - rsh_b)
-        rsh = rsh_b
-    else:
-        scale_fixed_point_b = scale_fixed_point_b >> (rsh_b - rsh_a)
-        rsh = rsh_a
-
-    if op == "qadd":
-        corr = (zero_point_M << rsh) - (
-            zero_point_A * scale_fixed_point_a + zero_point_B * scale_fixed_point_b
-        )
-    else:
-        corr = (zero_point_M << rsh) - (
-            zero_point_A * scale_fixed_point_a - zero_point_B * scale_fixed_point_b
-        )
-
-    return scale_fixed_point_a, scale_fixed_point_b, rsh, corr
-
-
-def qadd_broadcast_compute(
-    tensor_A: te.Tensor,
-    tensor_B: te.Tensor,
-    output_shape: list,
-    zero_point_A: int,
-    scale_A: float,
-    zero_point_B: int,
-    scale_B: float,
-    zero_point_M: int,
-    scale_M: float,
-    dtype: str,
-):
-    """Compute quantized add with broadcasting"""
-    A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B)
-    n_a, h_a, w_a, c_a = A_broadcast
-    n_b, h_b, w_b, c_b = B_broadcast
-
-    scale_a, scale_b, rsh, corr = get_int_scale(
-        scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qadd"
-    )
-
-    return te.compute(
-        output_shape,
-        lambda n, h, w, c: saturate(
-            (
-                (
-                    (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] * scale_a)
-                    + (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] * scale_b)
-                    + corr
-                )
-                >> rsh
-            ),
-            dtype,
-        ).astype(dtype),
-    )
-
-
-def qsubtract_broadcast_compute(
-    tensor_A: te.Tensor,
-    tensor_B: te.Tensor,
-    output_shape: list,
-    zero_point_A: int,
-    scale_A: float,
-    zero_point_B: int,
-    scale_B: float,
-    zero_point_M: int,
-    scale_M: float,
-    dtype: str,
-):
-    """Compute quantized subtract with broadcasting"""
-    A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B)
-    n_a, h_a, w_a, c_a = A_broadcast
-    n_b, h_b, w_b, c_b = B_broadcast
-
-    scale_a, scale_b, rsh, corr = get_int_scale(
-        scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qsub"
-    )
-
-    return te.compute(
-        output_shape,
-        lambda n, h, w, c: saturate(
-            (
-                (
-                    (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] * scale_a)
-                    - (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] * scale_b)
-                    + corr
-                )
-                >> rsh
-            ),
-            dtype,
-        ).astype(dtype),
-    )
-
-
-def qmultiply_broadcast_compute(
-    tensor_A: te.Tensor,
-    tensor_B: te.Tensor,
-    output_shape: list,
-    zero_point_A: int,
-    scale_A: float,
-    zero_point_B: int,
-    scale_B: float,
-    zero_point_M: int,
-    scale_M: float,
-    dtype: str,
-):
-    """Compute quantized multiply with broadcasting"""
-    A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B)
-    n_a, h_a, w_a, c_a = A_broadcast
-    n_b, h_b, w_b, c_b = B_broadcast
-
-    scale_int, rsh, corr = get_int_scale(
-        scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qmul"
-    )
-
-    return te.compute(
-        output_shape,
-        lambda n, h, w, c: saturate(
-            (
-                (
-                    scale_int
-                    * (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] - zero_point_A)
-                    * (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] - zero_point_B)
-                    + corr
-                )
-                >> rsh
-            ),
-            dtype,
-        ).astype(dtype),
-    )
-
-
-def tir_schedule_quant(
-    out_M: te.Tensor,
-    tensor_A: te.Tensor,
-    tensor_B: te.Tensor,
-    output_layout: str,
-    tensor_A_layout: str,
-    tensor_B_layout: str,
-):
-    """Schedule for output layout nhwc-8h8w32c-2d"""
-    func = te.create_prim_func([tensor_A, tensor_B, out_M])
-
-    s = tir.Schedule(func)
-
-    block = s.get_block("compute")
-
-    if tensor_A_layout == "nhwc-8h8w32c-2d":
-        tensor_A_transformed_layout = get_layout_transform_fn(tensor_A_layout)
-        s.transform_layout(block, buffer=tensor_A.name, index_map=tensor_A_transformed_layout)
-
-    if tensor_B_layout == "nhwc-8h8w32c-2d":
-        tensor_B_transformed_layout = get_layout_transform_fn(tensor_B_layout)
-        s.transform_layout(block, buffer=tensor_B.name, index_map=tensor_B_transformed_layout)
-
-    output_transformed_layout = get_layout_transform_fn(output_layout)
-    s.transform_layout(block, buffer=out_M.name, index_map=output_transformed_layout)
-
-    n, h, w, c = s.get_loops(block)
-
-    h_o, h_i = s.split(h, [None, 8])
-    w_o, w_i = s.split(w, [None, 8])
-    c_o, c_i = s.split(c, [None, 32])
-    wio, wii = s.split(w_i, [None, 4])
-
-    s.reorder(n, h_o, w_o, c_o, h_i, wio, wii, c_i)
-
-    return s
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name
+
+"""Compute and schedule for quantized add, multiply, subtract op
+
+Please note the following assumptions made by the implementation:
+
+1) The inputs will be multiple of crouton layout except for the axis that needs broadcasting."""
+
+from tvm import te
+from tvm import tir
+from ..utils import get_layout_transform_fn, get_fixed_point_value
+
+
+def broadcast_axis(tensor_A, tensor_B):
+    """Find out the indices that will have broadcasting"""
+    A_broadcast = []
+    B_broadcast = []
+
+    for i in range(len(tensor_A.shape)):
+        if tensor_A.shape[i] == tensor_B.shape[i]:
+            A_broadcast.append(1)
+            B_broadcast.append(1)
+        elif tensor_A.shape[i] == 1:
+            A_broadcast.append(0)
+            B_broadcast.append(1)
+        elif tensor_B.shape[i] == 1:
+            A_broadcast.append(1)
+            B_broadcast.append(0)
+    return A_broadcast, B_broadcast
+
+
+def saturate(x: te.Tensor, dtype: str):
+    """Saturate value for the specified data type"""
+    return te.max(te.min_value(dtype), te.min(x, te.max_value(dtype)))
+
+
+def get_int_scale(
+    scale_A: float,
+    scale_B: float,
+    scale_M: float,
+    zero_point_A: int,
+    zero_point_B: int,
+    zero_point_M: int,
+    op: str,
+):
+    """
+    Get fixed-point number and exp_scale_factor from topi.hexagon.utils.get_fixed_point_value.
+    Also, depending on the op, this function uses exp_scale_factor(log2 of the scale factor)
+    to adjust the output's zero_point.
+    """
+
+    C_recip = 1 / scale_M
+
+    if op == "qmul":
+        scale = scale_A * scale_B * C_recip
+        scale_fixed_point, rsh = get_fixed_point_value(scale, "int16")
+
+        # We need to adjust output's zero point value since the compute for the op is multiplied
+        # by a scaling factor.
+        # The scaling factor is 2^x where x is the exp_scale_factor which is assigned to rsh here.
+        # Since zero_point_M is multipled by 2^rsh while converting floating-point scale value
+        # into fixed-point number, we left shift it by rsh in our compute to reflect that.
+
+        corr = zero_point_M << rsh
+
+        return scale_fixed_point, rsh, corr
+
+    a_scale_f = scale_A * C_recip
+    b_scale_f = scale_B * C_recip
+    scale_fixed_point_a, rsh_a = get_fixed_point_value(a_scale_f, "int16")
+    scale_fixed_point_b, rsh_b = get_fixed_point_value(b_scale_f, "int16")
+
+    # Here we have two exp_scale_factors rsh_a and rsh_b.
+    # To avoid complexity, we want to use a common exp_scale_factor and
+    # we want to use the lowest of the two.
+
+    # Since, either of scale_fixed_point_a or scale_fixed_point_b has already been multiplied
+    # by 2^max(rsh_a, rsh_b) in topi.hexagon.utils.get_fixed_point_value,
+    # we want to undo that by right shifting that scale_fixed_point value
+    # by the difference of rsh_a and rsh_b.
+
+    # This results into having a common exp_scale_factor for both scale_fixed_point_a
+    # and scale_fixed_point_b.
+
+    # We also set rsh here which is used to adjust the zero_point_M and compute the corr value,
+    # computation of which comes from the original equation of the op's compute.
+
+    if rsh_a > rsh_b:
+        scale_fixed_point_a = scale_fixed_point_a >> (rsh_a - rsh_b)
+        rsh = rsh_b
+    else:
+        scale_fixed_point_b = scale_fixed_point_b >> (rsh_b - rsh_a)
+        rsh = rsh_a
+
+    if op == "qadd":
+        corr = (zero_point_M << rsh) - (
+            zero_point_A * scale_fixed_point_a + zero_point_B * scale_fixed_point_b
+        )
+    else:
+        corr = (zero_point_M << rsh) - (
+            zero_point_A * scale_fixed_point_a - zero_point_B * scale_fixed_point_b
+        )
+
+    return scale_fixed_point_a, scale_fixed_point_b, rsh, corr
+
+
+def qadd_broadcast_compute(
+    tensor_A: te.Tensor,
+    tensor_B: te.Tensor,
+    output_shape: list,
+    zero_point_A: int,
+    scale_A: float,
+    zero_point_B: int,
+    scale_B: float,
+    zero_point_M: int,
+    scale_M: float,
+    dtype: str,
+):
+    """Compute quantized add with broadcasting"""
+    A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B)
+    n_a, h_a, w_a, c_a = A_broadcast
+    n_b, h_b, w_b, c_b = B_broadcast
+
+    scale_a, scale_b, rsh, corr = get_int_scale(
+        scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qadd"
+    )
+
+    return te.compute(
+        output_shape,
+        lambda n, h, w, c: saturate(
+            (
+                (
+                    (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] * scale_a)
+                    + (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] * scale_b)
+                    + corr
+                )
+                >> rsh
+            ),
+            dtype,
+        ).astype(dtype),
+    )
+
+
+def qsubtract_broadcast_compute(
+    tensor_A: te.Tensor,
+    tensor_B: te.Tensor,
+    output_shape: list,
+    zero_point_A: int,
+    scale_A: float,
+    zero_point_B: int,
+    scale_B: float,
+    zero_point_M: int,
+    scale_M: float,
+    dtype: str,
+):
+    """Compute quantized subtract with broadcasting"""
+    A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B)
+    n_a, h_a, w_a, c_a = A_broadcast
+    n_b, h_b, w_b, c_b = B_broadcast
+
+    scale_a, scale_b, rsh, corr = get_int_scale(
+        scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qsub"
+    )
+
+    return te.compute(
+        output_shape,
+        lambda n, h, w, c: saturate(
+            (
+                (
+                    (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] * scale_a)
+                    - (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] * scale_b)
+                    + corr
+                )
+                >> rsh
+            ),
+            dtype,
+        ).astype(dtype),
+    )
+
+
+def qmultiply_broadcast_compute(
+    tensor_A: te.Tensor,
+    tensor_B: te.Tensor,
+    output_shape: list,
+    zero_point_A: int,
+    scale_A: float,
+    zero_point_B: int,
+    scale_B: float,
+    zero_point_M: int,
+    scale_M: float,
+    dtype: str,
+):
+    """Compute quantized multiply with broadcasting"""
+    A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B)
+    n_a, h_a, w_a, c_a = A_broadcast
+    n_b, h_b, w_b, c_b = B_broadcast
+
+    scale_int, rsh, corr = get_int_scale(
+        scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qmul"
+    )
+
+    return te.compute(
+        output_shape,
+        lambda n, h, w, c: saturate(
+            (
+                (
+                    scale_int
+                    * (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] - zero_point_A)
+                    * (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] - zero_point_B)
+                    + corr
+                )
+                >> rsh
+            ),
+            dtype,
+        ).astype(dtype),
+    )
+
+
+def tir_schedule_quant(
+    out_M: te.Tensor,
+    tensor_A: te.Tensor,
+    tensor_B: te.Tensor,
+    output_layout: str,
+    tensor_A_layout: str,
+    tensor_B_layout: str,
+):
+    """Schedule for output layout nhwc-8h8w32c-2d"""
+    func = te.create_prim_func([tensor_A, tensor_B, out_M])
+
+    s = tir.Schedule(func)
+
+    block = s.get_block("compute")
+
+    if tensor_A_layout == "nhwc-8h8w32c-2d":
+        tensor_A_transformed_layout = get_layout_transform_fn(tensor_A_layout)
+        s.transform_layout(block, buffer=tensor_A.name, index_map=tensor_A_transformed_layout)
+
+    if tensor_B_layout == "nhwc-8h8w32c-2d":
+        tensor_B_transformed_layout = get_layout_transform_fn(tensor_B_layout)
+        s.transform_layout(block, buffer=tensor_B.name, index_map=tensor_B_transformed_layout)
+
+    output_transformed_layout = get_layout_transform_fn(output_layout)
+    s.transform_layout(block, buffer=out_M.name, index_map=output_transformed_layout)
+
+    n, h, w, c = s.get_loops(block)
+
+    h_o, h_i = s.split(h, [None, 8])
+    w_o, w_i = s.split(w, [None, 8])
+    c_o, c_i = s.split(c, [None, 32])
+    wio, wii = s.split(w_i, [None, 4])
+
+    s.reorder(n, h_o, w_o, c_o, h_i, wio, wii, c_i)
+
+    return s
diff --git a/python/tvm/topi/hexagon/qnn/quantize.py b/python/tvm/topi/hexagon/qnn/quantize.py
index ff03aac0a8628..3fd91ddce6ca4 100755
--- a/python/tvm/topi/hexagon/qnn/quantize.py
+++ b/python/tvm/topi/hexagon/qnn/quantize.py
@@ -1,80 +1,80 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Compute and schedule for hexagon quantize
-Please note the following assumptions made by the implementation:
-1) The input and output data will be multiple of crouton layout
-2) And the supported layout is NHWC
-3) The input layout will be nhwc-4h2w32c2w-2d and
-   output layout will be nhwc-8h8w32c-2d"""
-
-
-from tvm import te
-from tvm import tir
-from ..utils import get_layout_transform_fn, saturate
-
-
-def quantize_compute(tensor_A: te.Tensor, scale: float, zero_point: int, dtype: str):
-    """Compute for quantize"""
-    scale_recip = 1 / scale
-
-    return te.compute(
-        tensor_A.shape,
-        lambda n, h, w, c: saturate(
-            ((tensor_A[n, h, w, c] * scale_recip).astype("int32") + zero_point),
-            dtype,
-        ).astype(dtype),
-        name="quantize",
-    )
-
-
-def tir_quantize_schedule(
-    out_M: te.Tensor,
-    tensor_A: te.Tensor,
-    input_layout: str,
-    output_layout: str,
-):
-    """Schedule for output layout nhwc-8h8w32c-2d"""
-    func = te.create_prim_func([tensor_A, out_M])
-
-    s = tir.Schedule(func)
-
-    block = s.get_block("quantize")
-
-    input_transformed_layout = get_layout_transform_fn(input_layout)
-    s.transform_layout(block, buffer=tensor_A.name, index_map=input_transformed_layout)
-
-    output_transformed_layout = get_layout_transform_fn(output_layout)
-    s.transform_layout(block, buffer=out_M.name, index_map=output_transformed_layout)
-
-    # Fixed chunk size is 2048 byte
-    # For uint8 the layout for fixed chunk is 8x8x32
-    # where each element is 1 bytes
-    # Split and reorder is done to iterate over the fixed chunk
-    # Channel is split by a factor of 32
-    # Width is split by a factor of 8
-    # Height is split by a factor of 8
-    n, h, w, c = s.get_loops(block)
-
-    h_o, h_i = s.split(h, [None, 8])
-    w_o, w_i = s.split(w, [None, 8])
-    c_o, c_i = s.split(c, [None, 32])
-    wio, wii = s.split(w_i, [None, 4])
-
-    s.reorder(n, h_o, w_o, c_o, h_i, wio, wii, c_i)
-
-    return s
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name
+"""Compute and schedule for hexagon quantize
+Please note the following assumptions made by the implementation:
+1) The input and output data will be multiple of crouton layout
+2) And the supported layout is NHWC
+3) The input layout will be nhwc-4h2w32c2w-2d and
+   output layout will be nhwc-8h8w32c-2d"""
+
+
+from tvm import te
+from tvm import tir
+from ..utils import get_layout_transform_fn, saturate
+
+
+def quantize_compute(tensor_A: te.Tensor, scale: float, zero_point: int, dtype: str):
+    """Compute for quantize"""
+    scale_recip = 1 / scale
+
+    return te.compute(
+        tensor_A.shape,
+        lambda n, h, w, c: saturate(
+            ((tensor_A[n, h, w, c] * scale_recip).astype("int32") + zero_point),
+            dtype,
+        ).astype(dtype),
+        name="quantize",
+    )
+
+
+def tir_quantize_schedule(
+    out_M: te.Tensor,
+    tensor_A: te.Tensor,
+    input_layout: str,
+    output_layout: str,
+):
+    """Schedule for output layout nhwc-8h8w32c-2d"""
+    func = te.create_prim_func([tensor_A, out_M])
+
+    s = tir.Schedule(func)
+
+    block = s.get_block("quantize")
+
+    input_transformed_layout = get_layout_transform_fn(input_layout)
+    s.transform_layout(block, buffer=tensor_A.name, index_map=input_transformed_layout)
+
+    output_transformed_layout = get_layout_transform_fn(output_layout)
+    s.transform_layout(block, buffer=out_M.name, index_map=output_transformed_layout)
+
+    # Fixed chunk size is 2048 byte
+    # For uint8 the layout for fixed chunk is 8x8x32
+    # where each element is 1 bytes
+    # Split and reorder is done to iterate over the fixed chunk
+    # Channel is split by a factor of 32
+    # Width is split by a factor of 8
+    # Height is split by a factor of 8
+    n, h, w, c = s.get_loops(block)
+
+    h_o, h_i = s.split(h, [None, 8])
+    w_o, w_i = s.split(w, [None, 8])
+    c_o, c_i = s.split(c, [None, 32])
+    wio, wii = s.split(w_i, [None, 4])
+
+    s.reorder(n, h_o, w_o, c_o, h_i, wio, wii, c_i)
+
+    return s
diff --git a/python/tvm/topi/hexagon/resize2d.py b/python/tvm/topi/hexagon/resize2d.py
index 0e817e2e93302..6e6c0e471db07 100755
--- a/python/tvm/topi/hexagon/resize2d.py
+++ b/python/tvm/topi/hexagon/resize2d.py
@@ -1,116 +1,116 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-
-"""Compute and schedule for resize2d
-Please note the following assumptions made by the implementation:
-1) The input and output data will be multiple of crouton layout
-2) And the supported layout is NHWC"""
-
-from tvm import te
-from tvm import tir
-from tvm import topi
-from .utils import get_layout_transform_fn
-
-
-def resize2d_compute(
-    data,
-    roi,
-    size,
-    layout,
-    method="linear",
-    coordinate_transformation_mode="half_pixel",
-    rounding_method="",
-    bicubic_alpha=-0.5,
-    bicubic_exclude=0,
-    extrapolation_value=0.0,
-    out_dtype=None,
-    output_shape=None,
-):
-    """Call resize2d op from topi.image"""
-    return topi.image.resize2d(
-        data,
-        roi,
-        size,
-        layout,
-        method,
-        coordinate_transformation_mode,
-        rounding_method,
-        bicubic_alpha,
-        bicubic_exclude,
-        extrapolation_value,
-        out_dtype,
-        output_shape,
-    )
-
-
-def tir_resize2d_schedule(
-    out_m,
-    input_a,
-    input_layout: str,
-    output_layout: str,
-):
-    """Schedule for input and output layout nhwc-8h2w32c2w-2d and nhwc-8h8w32c-2d"""
-    func = te.create_prim_func([input_a, out_m])
-
-    s = tir.Schedule(func)
-
-    block = s.get_block("resize")
-
-    if input_layout in (
-        "nhwc-8h2w32c2w-2d",
-        "nhwc-8h8w32c-2d",
-    ):
-        input_transformed_layout = get_layout_transform_fn(input_layout)
-        s.transform_layout(block, buffer=("read", 0), index_map=input_transformed_layout)
-
-    output_transformed_layout = get_layout_transform_fn(output_layout)
-    s.transform_layout(block, buffer=("write", 0), index_map=output_transformed_layout)
-
-    if output_layout == "nhwc-8h2w32c2w-2d":
-        # Fixed chunk size is 2048 byte
-        # For fp16 the layout for fixed chunk is 8x4x32
-        # where each element is 2 bytes
-        # Split and reorder is done to iterate over the fixed chunk
-        # Channel is split by a factor of 32
-        # Width is split by a factor of 4
-        # Height is split by a factor of 8
-        n, h, w, c = s.get_loops(block)
-
-        ho, hi = s.split(h, [None, 8])
-        wo, wi = s.split(w, [None, 4])
-        co, ci = s.split(c, [None, 32])
-
-        s.reorder(n, ho, wo, co, hi, wi, ci)
-
-    elif output_layout == "nhwc-8h8w32c-2d":
-        # Fixed chunk size is 2048 byte
-        # For uint8 the layout for fixed chunk is 8x8x32
-        # where each element is 1 bytes
-        # Split and reorder is done to iterate over the fixed chunk
-        # Channel is split by a factor of 32
-        # Width is split by a factor of 8
-        # Height is split by a factor of 8
-        n, h, w, c = s.get_loops(block)
-
-        ho, hi = s.split(h, [None, 8])
-        wo, wi = s.split(w, [None, 8])
-        co, ci = s.split(c, [None, 32])
-
-        s.reorder(n, ho, wo, co, hi, wi, ci)
-
-    return s
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name
+
+"""Compute and schedule for resize2d
+Please note the following assumptions made by the implementation:
+1) The input and output data will be multiple of crouton layout
+2) And the supported layout is NHWC"""
+
+from tvm import te
+from tvm import tir
+from tvm import topi
+from .utils import get_layout_transform_fn
+
+
+def resize2d_compute(
+    data,
+    roi,
+    size,
+    layout,
+    method="linear",
+    coordinate_transformation_mode="half_pixel",
+    rounding_method="",
+    bicubic_alpha=-0.5,
+    bicubic_exclude=0,
+    extrapolation_value=0.0,
+    out_dtype=None,
+    output_shape=None,
+):
+    """Call resize2d op from topi.image"""
+    return topi.image.resize2d(
+        data,
+        roi,
+        size,
+        layout,
+        method,
+        coordinate_transformation_mode,
+        rounding_method,
+        bicubic_alpha,
+        bicubic_exclude,
+        extrapolation_value,
+        out_dtype,
+        output_shape,
+    )
+
+
+def tir_resize2d_schedule(
+    out_m,
+    input_a,
+    input_layout: str,
+    output_layout: str,
+):
+    """Schedule for input and output layout nhwc-8h2w32c2w-2d and nhwc-8h8w32c-2d"""
+    func = te.create_prim_func([input_a, out_m])
+
+    s = tir.Schedule(func)
+
+    block = s.get_block("resize")
+
+    if input_layout in (
+        "nhwc-8h2w32c2w-2d",
+        "nhwc-8h8w32c-2d",
+    ):
+        input_transformed_layout = get_layout_transform_fn(input_layout)
+        s.transform_layout(block, buffer=("read", 0), index_map=input_transformed_layout)
+
+    output_transformed_layout = get_layout_transform_fn(output_layout)
+    s.transform_layout(block, buffer=("write", 0), index_map=output_transformed_layout)
+
+    if output_layout == "nhwc-8h2w32c2w-2d":
+        # Fixed chunk size is 2048 byte
+        # For fp16 the layout for fixed chunk is 8x4x32
+        # where each element is 2 bytes
+        # Split and reorder is done to iterate over the fixed chunk
+        # Channel is split by a factor of 32
+        # Width is split by a factor of 4
+        # Height is split by a factor of 8
+        n, h, w, c = s.get_loops(block)
+
+        ho, hi = s.split(h, [None, 8])
+        wo, wi = s.split(w, [None, 4])
+        co, ci = s.split(c, [None, 32])
+
+        s.reorder(n, ho, wo, co, hi, wi, ci)
+
+    elif output_layout == "nhwc-8h8w32c-2d":
+        # Fixed chunk size is 2048 byte
+        # For uint8 the layout for fixed chunk is 8x8x32
+        # where each element is 1 bytes
+        # Split and reorder is done to iterate over the fixed chunk
+        # Channel is split by a factor of 32
+        # Width is split by a factor of 8
+        # Height is split by a factor of 8
+        n, h, w, c = s.get_loops(block)
+
+        ho, hi = s.split(h, [None, 8])
+        wo, wi = s.split(w, [None, 8])
+        co, ci = s.split(c, [None, 32])
+
+        s.reorder(n, ho, wo, co, hi, wi, ci)
+
+    return s
diff --git a/python/tvm/topi/hexagon/slice_ops/add_subtract_multiply.py b/python/tvm/topi/hexagon/slice_ops/add_subtract_multiply.py
index 86b6adb997cbd..0596f79b66a8c 100755
--- a/python/tvm/topi/hexagon/slice_ops/add_subtract_multiply.py
+++ b/python/tvm/topi/hexagon/slice_ops/add_subtract_multiply.py
@@ -1,87 +1,87 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-
-"""Compute and schedule for add, multiply, subtract slice op
-
-Please note the following assumptions made by the implementation:
-
-1) The inputs will be multiple of crouton layout except for the axis that needs broadcasting."""
-
-from tvm import te
-from tvm import tir
-from tvm import topi
-from ..utils import get_layout_transform_fn
-
-
-def add_broadcast_compute(input_a, input_b):
-    """Call the add op from topi"""
-    return topi.add(input_a, input_b)
-
-
-def subtract_broadcast_compute(input_a, input_b):
-    """Call the subtract op from topi"""
-    return topi.subtract(input_a, input_b)
-
-
-def multiply_broadcast_compute(input_a, input_b):
-    """Call the multiply op from topi"""
-    return topi.multiply(input_a, input_b)
-
-
-def tir_broadcast_schedule(
-    out_m,
-    input_a,
-    input_b,
-    output_layout: str,
-    input_a_layout: str,
-    input_b_layout: str,
-    op_name: str,
-):
-    """Schedule for input and output layout nhwc-8h2w32c2w-2d considering broadcast"""
-    func = te.create_prim_func([input_a, input_b, out_m])
-
-    s = tir.Schedule(func)
-
-    block_dict = {"add": "T_add", "subtract": "T_subtract", "multiply": "T_multiply"}
-
-    block = s.get_block(block_dict[op_name])
-
-    if input_a_layout == "nhwc-8h2w32c2w-2d":
-        input_a_transformed_layout = get_layout_transform_fn(input_a_layout)
-        s.transform_layout(block, buffer=("read", 0), index_map=input_a_transformed_layout)
-
-    if input_b_layout == "nhwc-8h2w32c2w-2d":
-        input_b_transformed_layout = get_layout_transform_fn(input_b_layout)
-        s.transform_layout(block, buffer=("read", 1), index_map=input_b_transformed_layout)
-
-    output_transformed_layout = get_layout_transform_fn(output_layout)
-    s.transform_layout(block, buffer=("write", 0), index_map=output_transformed_layout)
-
-    n, h, w, c = s.get_loops(block)
-
-    h_o, h_i = s.split(h, [None, 8])
-    w_o, w_i = s.split(w, [None, 4])
-    c_o, c_i = s.split(c, [None, 32])
-    wio, wii = s.split(w_i, [None, 2])
-
-    s.reorder(n, h_o, w_o, c_o, h_i, wio, c_i, wii)
-
-    fused = s.fuse(c_i, wii)
-    s.vectorize(fused)
-
-    return s
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name
+
+"""Compute and schedule for add, multiply, subtract slice op
+
+Please note the following assumptions made by the implementation:
+
+1) The inputs will be multiple of crouton layout except for the axis that needs broadcasting."""
+
+from tvm import te
+from tvm import tir
+from tvm import topi
+from ..utils import get_layout_transform_fn
+
+
+def add_broadcast_compute(input_a, input_b):
+    """Call the add op from topi"""
+    return topi.add(input_a, input_b)
+
+
+def subtract_broadcast_compute(input_a, input_b):
+    """Call the subtract op from topi"""
+    return topi.subtract(input_a, input_b)
+
+
+def multiply_broadcast_compute(input_a, input_b):
+    """Call the multiply op from topi"""
+    return topi.multiply(input_a, input_b)
+
+
+def tir_broadcast_schedule(
+    out_m,
+    input_a,
+    input_b,
+    output_layout: str,
+    input_a_layout: str,
+    input_b_layout: str,
+    op_name: str,
+):
+    """Schedule for input and output layout nhwc-8h2w32c2w-2d considering broadcast"""
+    func = te.create_prim_func([input_a, input_b, out_m])
+
+    s = tir.Schedule(func)
+
+    block_dict = {"add": "T_add", "subtract": "T_subtract", "multiply": "T_multiply"}
+
+    block = s.get_block(block_dict[op_name])
+
+    if input_a_layout == "nhwc-8h2w32c2w-2d":
+        input_a_transformed_layout = get_layout_transform_fn(input_a_layout)
+        s.transform_layout(block, buffer=("read", 0), index_map=input_a_transformed_layout)
+
+    if input_b_layout == "nhwc-8h2w32c2w-2d":
+        input_b_transformed_layout = get_layout_transform_fn(input_b_layout)
+        s.transform_layout(block, buffer=("read", 1), index_map=input_b_transformed_layout)
+
+    output_transformed_layout = get_layout_transform_fn(output_layout)
+    s.transform_layout(block, buffer=("write", 0), index_map=output_transformed_layout)
+
+    n, h, w, c = s.get_loops(block)
+
+    h_o, h_i = s.split(h, [None, 8])
+    w_o, w_i = s.split(w, [None, 4])
+    c_o, c_i = s.split(c, [None, 32])
+    wio, wii = s.split(w_i, [None, 2])
+
+    s.reorder(n, h_o, w_o, c_o, h_i, wio, c_i, wii)
+
+    fused = s.fuse(c_i, wii)
+    s.vectorize(fused)
+
+    return s
diff --git a/python/tvm/topi/hexagon/slice_ops/conv2d.py b/python/tvm/topi/hexagon/slice_ops/conv2d.py
index 439fd80648f9d..ab782b5fa21aa 100644
--- a/python/tvm/topi/hexagon/slice_ops/conv2d.py
+++ b/python/tvm/topi/hexagon/slice_ops/conv2d.py
@@ -166,7 +166,7 @@ def conv2d_schedule(
 
     # from tvm.script import tir as T
     @T.prim_func
-    def func(InputTensor: T.Buffer[(1, 24, 12, 32), "float16"], Weights: T.Buffer[(3, 3, 32, 32), "float16"], compute: T.Buffer[(1, 16, 8, 32), "float16"]) -> None:
+    def func(InputTensor: T.Buffer((1, 24, 12, 32), "float16"), Weights: T.Buffer((3, 3, 32, 32), "float16"), compute: T.Buffer((1, 16, 8, 32), "float16")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
diff --git a/python/tvm/topi/hexagon/slice_ops/dwconv2d.py b/python/tvm/topi/hexagon/slice_ops/dwconv2d.py
index d22dc02a5c1b0..d94afe98bc61c 100644
--- a/python/tvm/topi/hexagon/slice_ops/dwconv2d.py
+++ b/python/tvm/topi/hexagon/slice_ops/dwconv2d.py
@@ -94,7 +94,7 @@ def dwconv2d_schedule(
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(InputTensor: T.Buffer[(1, 16, 8, 32), "float16"], Weights: T.Buffer[(3, 3, 1, 32), "float16"], Output: T.Buffer[(1, 8, 4, 32), "float16"]) -> None:
+        def main(InputTensor: T.Buffer((1, 16, 8, 32), "float16"), Weights: T.Buffer((3, 3, 1, 32), "float16"), Output: T.Buffer((1, 8, 4, 32), "float16")) -> None:
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
             # body
diff --git a/python/tvm/topi/hexagon/slice_ops/global_avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/global_avg_pool2d.py
index 30222c11bb540..9e6ae077851e0 100755
--- a/python/tvm/topi/hexagon/slice_ops/global_avg_pool2d.py
+++ b/python/tvm/topi/hexagon/slice_ops/global_avg_pool2d.py
@@ -1,52 +1,52 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Assumptions:
-1) The input is in NCHW layout. Squeezenet is the only model that calls
-   nn.global_avg_pool2d and the only layout it uses is 'NCHW'.
-2) The op takes input data as an argument.
-3) Both input and output dtype is float32 and
-4) Input is assumed to always be multiple of fixed chunk 32c8h4w.
-"""
-
-from tvm import te
-from tvm import tir
-from tvm import topi
-from ..utils import get_layout_transform_fn
-
-
-def global_avg_pool2d(
-    data: te.Tensor,
-):
-    """global_avg_pool2d"""
-    return topi.nn.global_pool(data, "avg", "NCHW")
-
-
-def stir_global_avg_pool2d_schedule(outs: te.Tensor, ins: te.Tensor, input_layout: str):
-    """Schedule"""
-    func = te.create_prim_func([ins, outs])
-    s = tir.Schedule(func)
-
-    sum_block = s.get_block("adaptive_pool_sum")
-
-    # Input is multiple of fixed chunk but output is NxCx1x1
-    # Hence transform_layout is only applied on input
-    input_transformed_layout = get_layout_transform_fn(input_layout)
-    s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout)
-
-    return s
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Assumptions:
+1) The input is in NCHW layout. Squeezenet is the only model that calls
+   nn.global_avg_pool2d and the only layout it uses is 'NCHW'.
+2) The op takes input data as an argument.
+3) Both input and output dtype is float32 and
+4) Input is assumed to always be multiple of fixed chunk 32c8h4w.
+"""
+
+from tvm import te
+from tvm import tir
+from tvm import topi
+from ..utils import get_layout_transform_fn
+
+
+def global_avg_pool2d(
+    data: te.Tensor,
+):
+    """global_avg_pool2d"""
+    return topi.nn.global_pool(data, "avg", "NCHW")
+
+
+def stir_global_avg_pool2d_schedule(outs: te.Tensor, ins: te.Tensor, input_layout: str):
+    """Schedule"""
+    func = te.create_prim_func([ins, outs])
+    s = tir.Schedule(func)
+
+    sum_block = s.get_block("adaptive_pool_sum")
+
+    # Input is multiple of fixed chunk but output is NxCx1x1
+    # Hence transform_layout is only applied on input
+    input_transformed_layout = get_layout_transform_fn(input_layout)
+    s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout)
+
+    return s
diff --git a/src/tir/analysis/control_flow_graph.h b/src/tir/analysis/control_flow_graph.h
index 00a6b68ff945c..f2e46b2478a3b 100644
--- a/src/tir/analysis/control_flow_graph.h
+++ b/src/tir/analysis/control_flow_graph.h
@@ -315,7 +315,7 @@ class BufferState {
  *
  * \code{.py}
  * @T.prim_func
- * def func(T.Buffer[16, "float32"]):
+ * def func(T.Buffer(16, "float32")):
  *     for i in T.serial(16):
  *         if i < 8:
  *              B[i] = i
diff --git a/tests/micro/zephyr/test_zephyr_aot_exec.py b/tests/micro/zephyr/test_zephyr_aot_exec.py
index d42c7a00b40ed..d5bcf08a0cb62 100644
--- a/tests/micro/zephyr/test_zephyr_aot_exec.py
+++ b/tests/micro/zephyr/test_zephyr_aot_exec.py
@@ -105,7 +105,7 @@ def test_aot_executor(workspace_dir, board, microtvm_debug, use_fvp, serial_numb
     print("test_relay: construct relay program\n")
 
     # Construct Relay program.
-    relay_mod = tvm.parser.fromtext(
+    relay_mod = tvm.relay.fromtext(
         """
       #[version = "0.0.5"]
       def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
diff --git a/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py b/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
index ace1db7811da6..a4ea1ea32e6bf 100644
--- a/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
+++ b/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
@@ -48,8 +48,8 @@ def @main(%data : Tensor[(16, 29), int8]) -> Tensor[(16, 29), int8] {
   %1
 }
 """
-    orig_mod = tvm.parser.fromtext(original_model)
-    cmsisnn_mod = tvm.parser.fromtext(cmsisnn_model)
+    orig_mod = tvm.relay.fromtext(original_model)
+    cmsisnn_mod = tvm.relay.fromtext(cmsisnn_model)
     params = {}
 
     # validate the output
diff --git a/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py b/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py
index 02b5f9f7f1225..99bd273115a7e 100644
--- a/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py
+++ b/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py
@@ -473,7 +473,7 @@ def test_reordering_based_on_cycles():
     @tvm.script.ir_module
     class ModuleBefore:
         @T.prim_func
-        def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208, "uint8"], placeholder_encoded_1: T.Buffer[112, "uint8"], placeholder_encoded_2: T.Buffer[96, "uint8"], placeholder_encoded_3: T.Buffer[112, "uint8"], ethosu_write: T.Buffer[43672, "int8"]) -> None:
+        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_encoded_3: T.Buffer(112, "uint8"), ethosu_write: T.Buffer(43672, "int8")) -> None:
             # function attr dict
             T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
             ax0_ax1_fused_ax2_fused_ax3_fused = T.var("int32")
@@ -521,7 +521,7 @@ def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208
     @tvm.script.ir_module
     class ModuleAfter:
         @T.prim_func
-        def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208, "uint8"], placeholder_encoded_1: T.Buffer[112, "uint8"], placeholder_encoded_2: T.Buffer[96, "uint8"], placeholder_encoded_3: T.Buffer[112, "uint8"], ethosu_write: T.Buffer[43672, "int8"]) -> None:
+        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_encoded_3: T.Buffer(112, "uint8"), ethosu_write: T.Buffer(43672, "int8")) -> None:
             # function attr dict
             T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
             ax0_ax1_fused_ax2_fused_ax3_fused = T.var("int32")
@@ -576,7 +576,7 @@ def test_reordering_based_on_cycles_luts_present():
     @tvm.script.ir_module
     class ModuleBefore:
         @T.prim_func
-        def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208, "uint8"], placeholder_encoded_1: T.Buffer[112, "uint8"], placeholder_1: T.Buffer[256, "int8"], placeholder_encoded_2: T.Buffer[96, "uint8"], placeholder_2: T.Buffer[256, "int8"], placeholder_3: T.Buffer[256, "int8"], ethosu_write: T.Buffer[46200, "int8"]) -> None:
+        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_1: T.Buffer(256, "int8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_2: T.Buffer(256, "int8"), placeholder_3: T.Buffer(256, "int8"), ethosu_write: T.Buffer(46200, "int8")) -> None:
             # function attr dict
             T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
             ax0_ax1_fused_ax2_fused_ax3_fused = T.var("int32")
@@ -626,7 +626,7 @@ def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208
     @tvm.script.ir_module
     class ModuleAfter:
         @T.prim_func
-        def main(placeholder: T.Buffer[97156, "int8"], placeholder_encoded: T.Buffer[208, "uint8"], placeholder_encoded_1: T.Buffer[112, "uint8"], placeholder_1: T.Buffer[256, "int8"], placeholder_encoded_2: T.Buffer[96, "uint8"], placeholder_2: T.Buffer[256, "int8"], placeholder_3: T.Buffer[256, "int8"], ethosu_write: T.Buffer[46200, "int8"]) -> None:
+        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_1: T.Buffer(256, "int8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_2: T.Buffer(256, "int8"), placeholder_3: T.Buffer(256, "int8"), ethosu_write: T.Buffer(46200, "int8")) -> None:
             # function attr dict
             T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
             ax0_ax1_fused_ax2_fused_ax3_fused = T.var("int32")
diff --git a/tests/python/contrib/test_ethosu/test_create_tiles.py b/tests/python/contrib/test_ethosu/test_create_tiles.py
index d51c438cbf4e1..e4b4067a29778 100644
--- a/tests/python/contrib/test_ethosu/test_create_tiles.py
+++ b/tests/python/contrib/test_ethosu/test_create_tiles.py
@@ -48,7 +48,7 @@ def test_create_tiles_h():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(placeholder1: T.Buffer[(100,), "int8"], placeholder2: T.Buffer[(100,), "int8"]) -> None:
+        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
             T.attr("i0", "pragma_layout", "NHCWB16")
             for i0 in T.serial(0, 1):
                 for i1 in T.serial(0, 6):
@@ -79,7 +79,7 @@ def test_create_tiles_w():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(placeholder1: T.Buffer[(100,), "int8"], placeholder2: T.Buffer[(100,), "int8"]) -> None:
+        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
             T.attr("i0", "pragma_layout", "NHCWB16")
             for i0 in T.serial(0, 1):
                 for i1 in T.serial(0, 1):
@@ -110,7 +110,7 @@ def test_create_tiles_wrong_var_stride():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(placeholder1: T.Buffer[(100,), "int8"], placeholder2: T.Buffer[(100,), "int8"]) -> None:
+        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
             T.attr("i0", "pragma_layout", "NHCWB16")
             for i0 in T.serial(0, 1):
                 for i1 in T.serial(0, 6):
@@ -141,7 +141,7 @@ def test_create_tiles_multiple_var_occurrences():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(placeholder1: T.Buffer[(100,), "int8"], placeholder2: T.Buffer[(100,), "int8"]) -> None:
+        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
             T.attr("i0", "pragma_layout", "NHWC")
             for i0 in T.serial(0, 1):
                 for i1 in T.serial(0, 5):
diff --git a/tests/python/contrib/test_ethosu/test_encode_constants.py b/tests/python/contrib/test_ethosu/test_encode_constants.py
index 871c7e29df20f..030976845298b 100644
--- a/tests/python/contrib/test_ethosu/test_encode_constants.py
+++ b/tests/python/contrib/test_ethosu/test_encode_constants.py
@@ -36,7 +36,7 @@
 @tvm.script.ir_module
 class WeightStreamOnlyU55:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         placeholder = T.Buffer([8192], "int8", data=input_placeholder.data)
@@ -66,7 +66,7 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_writ
 @tvm.script.ir_module
 class WeightStreamOnlyU65:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         # buffer definition
@@ -154,7 +154,7 @@ def _get_func():
 @tvm.script.ir_module
 class RereadWeightsU55:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer1 = T.Buffer([384], "uint8")
@@ -175,7 +175,7 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_writ
 @tvm.script.ir_module
 class RereadWeightsU65:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         # buffer definition
@@ -256,7 +256,7 @@ def _get_func():
 @tvm.script.ir_module
 class DirectReadOnlyU55:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([592], "uint8")
@@ -276,7 +276,7 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_writ
 @tvm.script.ir_module
 class DirectReadOnlyU65:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         # buffer definition
@@ -354,7 +354,7 @@ def _get_func():
 @tvm.script.ir_module
 class MixedReadU55:
     @T.prim_func
-    def main(input_ifm: T.Buffer[(1,16,16,32), "int8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"]) -> None:
+    def main(input_ifm: T.Buffer((1,16,16,32), "int8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer1 = T.Buffer([112], "uint8")
@@ -387,7 +387,7 @@ def main(input_ifm: T.Buffer[(1,16,16,32), "int8"], input_ethosu_write: T.Buffer
 @tvm.script.ir_module
 class MixedReadU65:
     @T.prim_func
-    def main(input_ifm: T.Buffer[(1,16,16,32), "int8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"]) -> None:
+    def main(input_ifm: T.Buffer((1,16,16,32), "int8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         # buffer definition
diff --git a/tests/python/contrib/test_ethosu/test_hoist_allocates.py b/tests/python/contrib/test_ethosu/test_hoist_allocates.py
index ea1cae50e6eb8..f38e981e93bd8 100644
--- a/tests/python/contrib/test_ethosu/test_hoist_allocates.py
+++ b/tests/python/contrib/test_ethosu/test_hoist_allocates.py
@@ -106,7 +106,7 @@ def test_double_convolution():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1, 27, 42, 3), "int8"], input_placeholder_encoded: T.Buffer[(3, 3, 2, 3), "uint8"], input_placeholder_encoded_1: T.Buffer[(3, 10), "uint8"], input_placeholder_encoded_2: T.Buffer[(3, 3, 2, 3), "uint8"], input_placeholder_encoded_3: T.Buffer[(3, 10), "uint8"], input_ethosu_write: T.Buffer[(1, 27, 42, 3), "int8"]) -> None:
+        def main(input_placeholder: T.Buffer((1, 27, 42, 3), "int8"), input_placeholder_encoded: T.Buffer((3, 3, 2, 3), "uint8"), input_placeholder_encoded_1: T.Buffer((3, 10), "uint8"), input_placeholder_encoded_2: T.Buffer((3, 3, 2, 3), "uint8"), input_placeholder_encoded_3: T.Buffer((3, 10), "uint8"), input_ethosu_write: T.Buffer((1, 27, 42, 3), "int8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             placeholder = T.Buffer([3402], dtype="int8", data=input_placeholder.data)
@@ -150,7 +150,7 @@ def test_identities():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1, 2, 3, 4), "int8"], T_concat: T.Buffer[(24,), "int8"]) -> None:
+        def main(input_placeholder: T.Buffer((1, 2, 3, 4), "int8"), T_concat: T.Buffer((24,), "int8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             placeholder = T.Buffer([24], dtype="int8", data=input_placeholder.data)
@@ -187,7 +187,7 @@ def test_outer_seq_stmt():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"], buffer_encoded: T.Buffer[(128,), "uint8"], buffer_encoded_1: T.Buffer[(32,), "uint8"], buffer_encoded_2: T.Buffer[(112,), "uint8"], buffer_encoded_3: T.Buffer[(32,), "uint8"], buffer_encoded_4: T.Buffer[(112,), "uint8"], buffer_encoded_5: T.Buffer[(32,), "uint8"], buffer_encoded_6: T.Buffer[(112,), "uint8"], buffer_encoded_7: T.Buffer[(32,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8"), buffer_encoded: T.Buffer((128,), "uint8"), buffer_encoded_1: T.Buffer((32,), "uint8"), buffer_encoded_2: T.Buffer((112,), "uint8"), buffer_encoded_3: T.Buffer((32,), "uint8"), buffer_encoded_4: T.Buffer((112,), "uint8"), buffer_encoded_5: T.Buffer((32,), "uint8"), buffer_encoded_6: T.Buffer((112,), "uint8"), buffer_encoded_7: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
@@ -237,7 +237,7 @@ def test_allocate_without_seq_stmt():
     @tvm.script.ir_module
     class Module:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"], buffer_encoded: T.Buffer[(128,), "uint8"], buffer_encoded_1: T.Buffer[(32,), "uint8"], buffer_encoded_2: T.Buffer[(112,), "uint8"], buffer_encoded_3: T.Buffer[(32,), "uint8"], buffer_encoded_4: T.Buffer[(112,), "uint8"], buffer_encoded_5: T.Buffer[(32,), "uint8"], buffer_encoded_6: T.Buffer[(112,), "uint8"], buffer_encoded_7: T.Buffer[(32,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8"), buffer_encoded: T.Buffer((128,), "uint8"), buffer_encoded_1: T.Buffer((32,), "uint8"), buffer_encoded_2: T.Buffer((112,), "uint8"), buffer_encoded_3: T.Buffer((32,), "uint8"), buffer_encoded_4: T.Buffer((112,), "uint8"), buffer_encoded_5: T.Buffer((32,), "uint8"), buffer_encoded_6: T.Buffer((112,), "uint8"), buffer_encoded_7: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
diff --git a/tests/python/contrib/test_ethosu/test_legalize.py b/tests/python/contrib/test_ethosu/test_legalize.py
index 5bc31dacb59d9..c445ceb2f3e3d 100644
--- a/tests/python/contrib/test_ethosu/test_legalize.py
+++ b/tests/python/contrib/test_ethosu/test_legalize.py
@@ -86,7 +86,7 @@ def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tens
           (%1, %2, %3, %4)
         }
         """
-        return tvm.parser.fromtext(expected_ir_string)
+        return tvm.relay.fromtext(expected_ir_string)
 
     def expected_mod_axis2():
         expected_ir_string = """
@@ -107,7 +107,7 @@ def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tens
           (%1, %2, %3, %4)
         }
         """
-        return tvm.parser.fromtext(expected_ir_string)
+        return tvm.relay.fromtext(expected_ir_string)
 
     rewrite_split = [legalize.PartitionedSplitRewriter(), legalize.SplitRewriter()]
 
@@ -174,7 +174,7 @@ def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tens
           (%8, %10, %12, %14, %16)
         }
         """
-        return tvm.parser.fromtext(expected_ir_string)
+        return tvm.relay.fromtext(expected_ir_string)
 
     def expected_mod_axis2():
         expected_ir_string = """
@@ -209,7 +209,7 @@ def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tens
           (%8, %10, %12, %14, %16)
         }
         """
-        return tvm.parser.fromtext(expected_ir_string)
+        return tvm.relay.fromtext(expected_ir_string)
 
     rewrite_split = [legalize.PartitionedSplitRewriter(), legalize.SplitRewriter()]
 
diff --git a/tests/python/contrib/test_ethosu/test_merge_constants.py b/tests/python/contrib/test_ethosu/test_merge_constants.py
index 7465e220787c1..909f9fe67365e 100644
--- a/tests/python/contrib/test_ethosu/test_merge_constants.py
+++ b/tests/python/contrib/test_ethosu/test_merge_constants.py
@@ -38,7 +38,7 @@ def test_only_one_operator():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -56,7 +56,7 @@ def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"])
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(160,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((160,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -83,7 +83,7 @@ def test_all_operators_with_weights():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"], buffer4: T.Buffer[(112,), "uint8"], buffer5: T.Buffer[(32,), "uint8"], buffer6: T.Buffer[(112,), "uint8"], buffer7: T.Buffer[(32,), "uint8"], buffer8: T.Buffer[(112,), "uint8"], buffer9: T.Buffer[(32,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8"), buffer4: T.Buffer((112,), "uint8"), buffer5: T.Buffer((32,), "uint8"), buffer6: T.Buffer((112,), "uint8"), buffer7: T.Buffer((32,), "uint8"), buffer8: T.Buffer((112,), "uint8"), buffer9: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -122,7 +122,7 @@ def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"],
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(160,), "uint8"], buffer4: T.Buffer[(144,), "uint8"], buffer6: T.Buffer[(144,), "uint8"], buffer8: T.Buffer[(144,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((160,), "uint8"), buffer4: T.Buffer((144,), "uint8"), buffer6: T.Buffer((144,), "uint8"), buffer8: T.Buffer((144,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -173,7 +173,7 @@ def test_operators_with_and_without_weights():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(80,), "uint8"], buffer3: T.Buffer[(64,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((80,), "uint8"), buffer3: T.Buffer((64,), "uint8")) -> None:
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer0 = T.Buffer([390336], "int8")
             buffer1 = T.Buffer([97156], "int8")
@@ -192,7 +192,7 @@ def main(buffer2: T.Buffer[(80,), "uint8"], buffer3: T.Buffer[(64,), "uint8"]) -
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(144,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((144,), "uint8")) -> None:
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer0 = T.Buffer([390336], "int8")
             buffer1 = T.Buffer([97156], "int8")
@@ -221,15 +221,15 @@ def test_copy_to_buffer_with_local_scope():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer1: T.Buffer[(64,), "uint8"],
-        buffer2: T.Buffer[(48,), "uint8"],
-        buffer3: T.Buffer[(256,), "uint8"],
-        buffer4: T.Buffer[(256,), "uint8"],
-        buffer5: T.Buffer[(16,), "uint8"],
-        buffer6: T.Buffer[(48,), "uint8"],
-        buffer7: T.Buffer[(256,), "uint8"],
-        buffer8: T.Buffer[(64,), "uint8"],
-        buffer9: T.Buffer[(256,), "int8"],
+        def main(buffer1: T.Buffer((64,), "uint8"),
+        buffer2: T.Buffer((48,), "uint8"),
+        buffer3: T.Buffer((256,), "uint8"),
+        buffer4: T.Buffer((256,), "uint8"),
+        buffer5: T.Buffer((16,), "uint8"),
+        buffer6: T.Buffer((48,), "uint8"),
+        buffer7: T.Buffer((256,), "uint8"),
+        buffer8: T.Buffer((64,), "uint8"),
+        buffer9: T.Buffer((256,), "int8"),
         ) -> None:
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # body
@@ -258,13 +258,13 @@ def main(buffer1: T.Buffer[(64,), "uint8"],
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer1: T.Buffer[(64,), "uint8"],
-            buffer2: T.Buffer[(96,), "uint8"],
-            buffer4: T.Buffer[(256,), "uint8"],
-            buffer5: T.Buffer[(64,), "uint8"],
-            buffer7: T.Buffer[(256,), "uint8"],
-            buffer8: T.Buffer[(64,), "uint8"],
-            buffer9: T.Buffer[(256,), "int8"],
+        def main(buffer1: T.Buffer((64,), "uint8"),
+            buffer2: T.Buffer((96,), "uint8"),
+            buffer4: T.Buffer((256,), "uint8"),
+            buffer5: T.Buffer((64,), "uint8"),
+            buffer7: T.Buffer((256,), "uint8"),
+            buffer8: T.Buffer((64,), "uint8"),
+            buffer9: T.Buffer((256,), "int8"),
             ) -> None:
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # body
@@ -348,7 +348,7 @@ def test_copies_to_the_same_buffer():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -369,7 +369,7 @@ def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"])
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(160,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((160,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             buffer1 = T.Buffer([8192], "int8")
@@ -399,7 +399,7 @@ def test_read_from_the_same_buffer():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], buffer1: T.Buffer[(368,), "uint8"], buffer2: T.Buffer[(96,), "uint8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+        def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), buffer1: T.Buffer((368,), "uint8"), buffer2: T.Buffer((96,), "uint8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -419,7 +419,7 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], buffer1: T.Buffer
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(464,), "uint8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -449,7 +449,7 @@ def test_arbitrary_argument_order():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(368,), "uint8"], buffer2: T.Buffer[(96,), "uint8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"], buffer3: T.Buffer[(368,), "uint8"], buffer4: T.Buffer[(96,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((368,), "uint8"), buffer2: T.Buffer((96,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer3: T.Buffer((368,), "uint8"), buffer4: T.Buffer((96,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -476,7 +476,7 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(3
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(464,), "uint8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"], buffer2: T.Buffer[(464,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer2: T.Buffer((464,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -515,7 +515,7 @@ def test_arbitrary_argument_order_const_split():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(368,), "uint8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"], buffer2: T.Buffer[(96,), "uint8"], buffer3: T.Buffer[(368,), "uint8"], buffer4: T.Buffer[(96,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((368,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer2: T.Buffer((96,), "uint8"), buffer3: T.Buffer((368,), "uint8"), buffer4: T.Buffer((96,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -542,7 +542,7 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(3
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(464,), "uint8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"], buffer2: T.Buffer[(464,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer2: T.Buffer((464,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -581,7 +581,7 @@ def test_arbitrary_argument_order_const_split_mixed():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(368,), "uint8"], buffer2: T.Buffer[(368,), "uint8"], input_ethosu_write: T.Buffer[(2,16,16,8), "int8"], buffer3: T.Buffer[(96,), "uint8"], buffer4: T.Buffer[(96,), "uint8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((368,), "uint8"), buffer2: T.Buffer((368,), "uint8"), input_ethosu_write: T.Buffer((2,16,16,8), "int8"), buffer3: T.Buffer((96,), "uint8"), buffer4: T.Buffer((96,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -608,7 +608,7 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(3
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(464,), "uint8"], buffer2: T.Buffer[(464,), "uint8"], input_ethosu_write: T.Buffer[(2,16,16,8), "int8"]) -> None:
+        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), buffer2: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((2,16,16,8), "int8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             # buffer definition
@@ -647,7 +647,7 @@ def test_cycle_count():
     @tvm.script.ir_module
     class InputModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"], buffer4: T.Buffer[(112,), "uint8"], buffer5: T.Buffer[(32,), "uint8"], buffer6: T.Buffer[(112,), "uint8"], buffer7: T.Buffer[(32,), "uint8"], buffer8: T.Buffer[(112,), "uint8"], buffer9: T.Buffer[(32,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8"), buffer4: T.Buffer((112,), "uint8"), buffer5: T.Buffer((32,), "uint8"), buffer6: T.Buffer((112,), "uint8"), buffer7: T.Buffer((32,), "uint8"), buffer8: T.Buffer((112,), "uint8"), buffer9: T.Buffer((32,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             v1a = T.var("int32")
@@ -710,7 +710,7 @@ def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"],
     @tvm.script.ir_module
     class ReferenceModule:
         @T.prim_func
-        def main(buffer2: T.Buffer[(160,), "uint8"], buffer4: T.Buffer[(144,), "uint8"], buffer6: T.Buffer[(144,), "uint8"], buffer8: T.Buffer[(144,), "uint8"]) -> None:
+        def main(buffer2: T.Buffer((160,), "uint8"), buffer4: T.Buffer((144,), "uint8"), buffer6: T.Buffer((144,), "uint8"), buffer8: T.Buffer((144,), "uint8")) -> None:
             # function attr dict
             T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
             v1a = T.var("int32")
diff --git a/tests/python/contrib/test_ethosu/test_remove_concatenates.py b/tests/python/contrib/test_ethosu/test_remove_concatenates.py
index 64777aa0fb71b..ef034930d7bc0 100644
--- a/tests/python/contrib/test_ethosu/test_remove_concatenates.py
+++ b/tests/python/contrib/test_ethosu/test_remove_concatenates.py
@@ -31,7 +31,7 @@
 @tvm.script.ir_module
 class ReferenceModule:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1,8,12,16), "int8"], input_placeholder_1: T.Buffer[(1,8,10,16), "int8"], input_T_concat: T.Buffer[(1,8,32,16), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1,8,12,16), "int8"), input_placeholder_1: T.Buffer((1,8,10,16), "int8"), input_T_concat: T.Buffer((1,8,32,16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
 
diff --git a/tests/python/contrib/test_ethosu/test_replace_conv2d.py b/tests/python/contrib/test_ethosu/test_replace_conv2d.py
index ffa6d6effd79b..6bcea7008c862 100644
--- a/tests/python/contrib/test_ethosu/test_replace_conv2d.py
+++ b/tests/python/contrib/test_ethosu/test_replace_conv2d.py
@@ -367,7 +367,7 @@ def _visit(stmt):
 @tvm.script.ir_module
 class Conv2dDoubleCascade1:
     @T.prim_func
-    def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 8), "int8"]) -> None:
+    def main(input_placeholder_5: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 8, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([304], "uint8")
@@ -389,7 +389,7 @@ def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write
 @tvm.script.ir_module
 class Conv2dDoubleCascade2:
     @T.prim_func
-    def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 8), "int8"]) -> None:
+    def main(input_placeholder_5: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 8, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([80], "uint8")
@@ -411,7 +411,7 @@ def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write
 @tvm.script.ir_module
 class Conv2dDoubleCascade3:
     @T.prim_func
-    def main(input_placeholder_5: T.Buffer[(1, 16, 16, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 20, 4, 8), "int8"]) -> None:
+    def main(input_placeholder_5: T.Buffer((1, 16, 16, 3), "int8"), input_ethosu_write_1: T.Buffer((1, 20, 4, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([1744], "uint8")
@@ -436,7 +436,7 @@ def main(input_placeholder_5: T.Buffer[(1, 16, 16, 3), "int8"], input_ethosu_wri
 @tvm.script.ir_module
 class Conv2dDoubleCascade4:
     @T.prim_func
-    def main(input_placeholder_5: T.Buffer[(1, 8, 1, 8, 16), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 2, 8, 16), "int8"]) -> None:
+    def main(input_placeholder_5: T.Buffer((1, 8, 1, 8, 16), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 2, 8, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([1456], "uint8")
@@ -458,7 +458,7 @@ def main(input_placeholder_5: T.Buffer[(1, 8, 1, 8, 16), "int8"], input_ethosu_w
 @tvm.script.ir_module
 class Conv2dDoubleCascade5:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write: T.Buffer[(1, 32, 32, 8), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write: T.Buffer((1, 32, 32, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
@@ -480,7 +480,7 @@ def main(input_placeholder: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write:
 @tvm.script.ir_module
 class Conv2dDoubleCascade6:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 8, 1, 8, 16), "int8"], input_ethosu_write: T.Buffer[(1, 32, 2, 32, 16), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 8, 1, 8, 16), "int8"), input_ethosu_write: T.Buffer((1, 32, 2, 32, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([1456], "uint8")
@@ -644,7 +644,7 @@ def _get_func(
 @tvm.script.ir_module
 class Conv2dInlineCopy1:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(1, 10, 12, 8), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 16), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((1, 10, 12, 8), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 8, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([848], "uint8")
@@ -659,7 +659,7 @@ def main(input_placeholder_3: T.Buffer[(1, 10, 12, 8), "int8"], input_ethosu_wri
 @tvm.script.ir_module
 class Conv2dInlineCopy2:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(1, 7, 9, 5), "int8"], input_ethosu_write_1: T.Buffer[(1, 3, 5, 16), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((1, 7, 9, 5), "int8"), input_ethosu_write_1: T.Buffer((1, 3, 5, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
@@ -703,7 +703,7 @@ def _get_func(ifm_shape, lower, upper, ofm_channels=16):
 @tvm.script.ir_module
 class Conv2dInlineReshape1:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(4, 6, 8, 1), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((4, 6, 8, 1), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
@@ -719,7 +719,7 @@ def main(input_placeholder_3: T.Buffer[(4, 6, 8, 1), "int8"], input_ethosu_write
 @tvm.script.ir_module
 class Conv2dInlineReshape2:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(1, 24, 8), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((1, 24, 8), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
@@ -735,7 +735,7 @@ def main(input_placeholder_3: T.Buffer[(1, 24, 8), "int8"], input_ethosu_write_1
 @tvm.script.ir_module
 class Conv2dInlineReshape3:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(192, 1), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((192, 1), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
@@ -751,7 +751,7 @@ def main(input_placeholder_3: T.Buffer[(192, 1), "int8"], input_ethosu_write_1:
 @tvm.script.ir_module
 class Conv2dInlineReshape4:
     @T.prim_func
-    def main(placeholder_3: T.Buffer[(192,), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
+    def main(placeholder_3: T.Buffer((192,), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([160], "uint8")
diff --git a/tests/python/contrib/test_ethosu/test_replace_copy.py b/tests/python/contrib/test_ethosu/test_replace_copy.py
index 29e1f9814c819..94763c5d3fbf9 100644
--- a/tests/python/contrib/test_ethosu/test_replace_copy.py
+++ b/tests/python/contrib/test_ethosu/test_replace_copy.py
@@ -34,7 +34,7 @@
 @tvm.script.ir_module
 class ReferenceModule:
     @T.prim_func
-    def main(input_placeholder_3: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write_1: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
+    def main(input_placeholder_3: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write_1: T.Buffer((1, 16, 16, 8), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer_1 = T.Buffer([384], "uint8")
@@ -78,7 +78,7 @@ def _get_func():
 @tvm.script.ir_module
 class WeightStream:
     @T.prim_func
-    def main(input_placeholder_5: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write_1: T.Buffer[(1, 16, 16, 16), "int8"]) -> None:
+    def main(input_placeholder_5: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write_1: T.Buffer((1, 16, 16, 16), "int8")) -> None:
         # function attr dict
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         buffer = T.Buffer([528], "uint8")
diff --git a/tests/python/contrib/test_ethosu/test_scheduler.py b/tests/python/contrib/test_ethosu/test_scheduler.py
index eb3f4c7c2a312..21f0b7c157ae1 100644
--- a/tests/python/contrib/test_ethosu/test_scheduler.py
+++ b/tests/python/contrib/test_ethosu/test_scheduler.py
@@ -180,7 +180,7 @@ def test_schedule_cache_reads():
 @tvm.script.ir_module
 class DiamondGraphTir:
     @T.prim_func
-    def main(input_placeholder: T.Buffer[(1, 56, 56, 96), "int8"], input_ethosu_write: T.Buffer[(1, 56, 56, 24), "int8"]) -> None:
+    def main(input_placeholder: T.Buffer((1, 56, 56, 96), "int8"), input_ethosu_write: T.Buffer((1, 56, 56, 24), "int8")) -> None:
         T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
         placeholder = T.Buffer([301056], dtype='int8', data=input_placeholder.data)
         ethosu_write = T.Buffer([75264], dtype='int8', data=input_ethosu_write.data)
diff --git a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py b/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
index 632fe0017f957..22f886a5917a0 100644
--- a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
+++ b/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
@@ -33,7 +33,7 @@
 @tvm.script.ir_module
 class SingleEthosUConv2D:
     @T.prim_func
-    def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(1024,), "int8"]) -> None:
+    def main(placeholder_3: T.Buffer((8192,), "int8"), ethosu_conv2d_1: T.Buffer((1024,), "int8")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         placeholder_4 = T.Buffer([1], "uint8")
@@ -48,7 +48,7 @@ def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(10
 @tvm.script.ir_module
 class MultiEthosUConv2D:
     @T.prim_func
-    def main(placeholder_6: T.Buffer[(192,), "int8"], ethosu_conv2d_1: T.Buffer[(512,), "int8"]) -> None:
+    def main(placeholder_6: T.Buffer((192,), "int8"), ethosu_conv2d_1: T.Buffer((512,), "int8")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         placeholder_9 = T.Buffer([1], "uint8")
@@ -70,7 +70,7 @@ def main(placeholder_6: T.Buffer[(192,), "int8"], ethosu_conv2d_1: T.Buffer[(512
 @tvm.script.ir_module
 class MultiEthosUCopy:
     @T.prim_func
-    def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(2048,), "int8"]) -> None:
+    def main(placeholder_3: T.Buffer((8192,), "int8"), ethosu_conv2d_1: T.Buffer((2048,), "int8")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         placeholder_5 = T.Buffer([1], "int32")
@@ -89,7 +89,7 @@ def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(20
 @tvm.script.ir_module
 class WeightStreamOnly:
     @T.prim_func
-    def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,), "int8"]) -> None:
+    def main(placeholder: T.Buffer((8192,), "int8"), ethosu_write: T.Buffer((2048,), "int8")) -> None:
         buffer = T.Buffer([1], "uint8")
         buffer_1 = T.Buffer([1], "uint8")
         buffer_2 = T.Buffer([1], "uint8")
@@ -135,7 +135,7 @@ def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,),
 @tvm.script.ir_module
 class MixedRead:
     @T.prim_func
-    def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,), "int8"]) -> None:
+    def main(placeholder: T.Buffer((8192,), "int8"), ethosu_write: T.Buffer((2048,), "int8")) -> None:
         buffer = T.Buffer([1], "uint8")
         buffer_1 = T.Buffer([1], "uint8")
         buffer_2 = T.Buffer([1], "uint8")
@@ -672,7 +672,7 @@ def populate_ethosu_copy_calls(stmt):
 @tvm.script.ir_module
 class MixedConstantDatatypes:
     @T.prim_func
-    def main(placeholder_4: T.Buffer[(2048,), "int8"], ethosu_write_1: T.Buffer[(16,), "int8"]) -> None:
+    def main(placeholder_4: T.Buffer((2048,), "int8"), ethosu_write_1: T.Buffer((16,), "int8")) -> None:
         buffer = T.Buffer([1], "uint8")
         buffer_1 = T.Buffer([1], "uint8")
         buffer_2 = T.Buffer([1], "int16")
diff --git a/tests/python/contrib/test_hexagon/test_async_dma_pipeline.py b/tests/python/contrib/test_hexagon/test_async_dma_pipeline.py
index 2b6bca008e057..f8d9b5f596be7 100644
--- a/tests/python/contrib/test_hexagon/test_async_dma_pipeline.py
+++ b/tests/python/contrib/test_hexagon/test_async_dma_pipeline.py
@@ -28,12 +28,12 @@
 # pylint: disable=invalid-name
 @T.prim_func
 def conv2d_async_non_contig(
-    p0: T.Buffer[(T.int64(1), T.int64(1), T.int64(56), T.int64(56), T.int64(4)), "uint8"],
-    fused_constant_1: T.Buffer[
+    p0: T.Buffer((T.int64(1), T.int64(1), T.int64(56), T.int64(56), T.int64(4)), "uint8"),
+    fused_constant_1: T.Buffer(
         (T.int64(1), T.int64(1), T.int64(3), T.int64(3), T.int64(1), T.int64(32), T.int64(4)),
         "uint8",
     ],
-    conv2d_NCHWc_int8: T.Buffer[
+    conv2d_NCHWc_int8: T.Buffer(
         (T.int64(1), T.int64(1), T.int64(54), T.int64(54), T.int64(32)), "int32"
     ],
 ):
@@ -538,9 +538,9 @@ class ModulePipelined:
     # pylint: disable=no-self-argument
     @T.prim_func
     def main(
-        p0_buffer: T.Buffer[(1, 1, 230, 230, 4), "uint8"],
-        p1_buffer: T.Buffer[(2, 1, 7, 7, 1, 32, 4), "int8"],
-        t_cast: T.Buffer[(1, 2, 112, 112, 32), "int32"],
+        p0_buffer: T.Buffer((1, 1, 230, 230, 4), "uint8"),
+        p1_buffer: T.Buffer((2, 1, 7, 7, 1, 32, 4), "int8"),
+        t_cast: T.Buffer((1, 2, 112, 112, 32), "int32"),
     ) -> None:
         # pylint: disable=missing-function-docstring
         # function attr dict
@@ -690,9 +690,9 @@ class ModuleBase:
     # pylint: disable=no-self-argument
     @T.prim_func
     def main(
-        p0_buffer: T.Buffer[(1, 1, 230, 230, 4), "uint8"],
-        p1_buffer: T.Buffer[(2, 1, 7, 7, 1, 32, 4), "int8"],
-        t_cast: T.Buffer[(1, 2, 112, 112, 32), "int32"],
+        p0_buffer: T.Buffer((1, 1, 230, 230, 4), "uint8"),
+        p1_buffer: T.Buffer((2, 1, 7, 7, 1, 32, 4), "int8"),
+        t_cast: T.Buffer((1, 2, 112, 112, 32), "int32"),
     ) -> None:
         # pylint: disable=missing-function-docstring
         # function attr dict
diff --git a/tests/python/contrib/test_hexagon/test_meta_schedule.py b/tests/python/contrib/test_hexagon/test_meta_schedule.py
index 1089f0f035891..a64f0fc286535 100644
--- a/tests/python/contrib/test_hexagon/test_meta_schedule.py
+++ b/tests/python/contrib/test_hexagon/test_meta_schedule.py
@@ -241,9 +241,9 @@ class ModuleVRMPYAutoTensorize:
     # pylint: disable=no-self-argument
     @T.prim_func
     def main(  # type: ignore
-        X: T.Buffer[(128, 768), "uint8"],  # type: ignore
-        packed_width: T.Buffer[(24, 192, 32, 4), "uint8"],  # type: ignore
-        compute: T.Buffer[(128, 768), "int32"],  # type: ignore
+        X: T.Buffer((128, 768), "uint8"),  # type: ignore
+        packed_width: T.Buffer((24, 192, 32, 4), "uint8"),  # type: ignore
+        compute: T.Buffer((128, 768), "int32"),  # type: ignore
     ) -> None:
         # pylint: disable=missing-function-docstring
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/contrib/test_hexagon/test_software_pipeline_async.py b/tests/python/contrib/test_hexagon/test_software_pipeline_async.py
index c831472a521d4..7c010f363fe1e 100644
--- a/tests/python/contrib/test_hexagon/test_software_pipeline_async.py
+++ b/tests/python/contrib/test_hexagon/test_software_pipeline_async.py
@@ -31,7 +31,7 @@ def compute(comp_type, outer, inner, dtype):
 
         @T.prim_func
         def a_plus_1_primfunc(
-            a_buffer: T.Buffer[(outer, inner), dtype], out: T.Buffer[(outer, inner), dtype]
+            a_buffer: T.Buffer((outer, inner), dtype), out: T.Buffer((outer, inner), dtype)
         ):
             for i in T.serial(outer):
                 for j in T.serial(inner):
@@ -44,9 +44,9 @@ def a_plus_1_primfunc(
 
         @T.prim_func
         def a_plus_b_plus_1_primfunc(
-            a_buffer: T.Buffer[(outer, inner), dtype],
-            b_buffer: T.Buffer[(outer, inner), dtype],
-            out: T.Buffer[(outer, inner), dtype],
+            a_buffer: T.Buffer((outer, inner), dtype),
+            b_buffer: T.Buffer((outer, inner), dtype),
+            out: T.Buffer((outer, inner), dtype),
         ):
             for i in T.serial(outer):
                 for j in T.serial(inner):
diff --git a/tests/python/contrib/test_hexagon/test_vtcm.py b/tests/python/contrib/test_hexagon/test_vtcm.py
index e71f890740c14..a549588e57680 100644
--- a/tests/python/contrib/test_hexagon/test_vtcm.py
+++ b/tests/python/contrib/test_hexagon/test_vtcm.py
@@ -24,7 +24,7 @@
 
 
 @T.prim_func
-def scale_by_two(buffer_a: T.Buffer[(8192,), "int8"], buffer_c: T.Buffer[(8192,), "int8"]):
+def scale_by_two(buffer_a: T.Buffer((8192,), "int8"), buffer_c: T.Buffer((8192,), "int8")):
     for i in T.serial(
         0,
         8192,
diff --git a/tests/python/contrib/test_hexagon/topi/slice_op/test_global_avg_pool2d.py b/tests/python/contrib/test_hexagon/topi/slice_op/test_global_avg_pool2d.py
index 3f7e999c7bcac..7cde83e0cb777 100755
--- a/tests/python/contrib/test_hexagon/topi/slice_op/test_global_avg_pool2d.py
+++ b/tests/python/contrib/test_hexagon/topi/slice_op/test_global_avg_pool2d.py
@@ -1,167 +1,167 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Test code for float16 and uint8 global_avg_pool2d."""
-
-import numpy as np
-
-import tvm
-from tvm import te
-from tvm.topi.testing import adaptive_pool
-import tvm.topi.hexagon.qnn as qn
-import tvm.topi.hexagon.slice_ops as sl
-from tvm.contrib.hexagon import allocate_hexagon_array
-from ...infrastructure import transform_numpy, quantize_np, get_hexagon_target
-
-
-SCALE_M_VAL = None
-ZERO_POINT_M_VAL = None
-SCALE_VAL = None
-ZERO_POINT_VAL = None
-
-
-class TestGlobalPool2D:
-    (input_shape,) = tvm.testing.parameters(
-        ([1, 32, 8, 8],),
-        ([1, 1056, 16, 16],),
-    )
-
-    # Fixed chunk layout is set as nchw-32c8h8w-2d for uint8 and nchw-32c8h4w-2d for float16.
-    # For optimization, it might get changed later.
-    # Since output shape will be NxCx1x1 which is not a
-    # multiple of fixed-chunk, output_layout is NCHW.
-    input_layout, output_layout, pool_type, layout, dtype = tvm.testing.parameters(
-        ("nchw-32c8h8w-2d", "nchw", "avg", "NCHW", "uint8"),
-        ("nchw-32c8h4w-2d", "nchw", "avg", "NCHW", "float16"),
-    )
-
-    @tvm.testing.fixture
-    def expected_output_np(
-        self,
-        input_np,
-        pool_type,
-        layout,
-    ):
-        """Generate expected output."""
-        ref_np = tvm.topi.testing.adaptive_pool(
-            input_np,
-            (1, 1),
-            pool_type,
-            layout,
-        )
-        return ref_np
-
-    @tvm.testing.fixture
-    def input_np(self, input_shape, dtype):
-        if dtype in ("uint8", "int8"):
-            dtype = "float32"
-        return np.random.random(input_shape).astype(dtype)
-
-    @tvm.testing.fixture
-    def quantize_input_np(self, input_np, dtype):
-        if dtype in ("uint8", "int8"):
-            global ZERO_POINT_VAL, SCALE_VAL
-            input_np_quantized, SCALE_VAL, ZERO_POINT_VAL = quantize_np(input_np, dtype)
-            return input_np_quantized
-
-    @tvm.testing.fixture
-    def transformed_input_np(self, input_np, quantize_input_np, input_layout, layout, dtype):
-        if dtype == "float16":
-            return transform_numpy(input_np, layout.lower(), input_layout)
-        if dtype in ("uint8", "int8"):
-            return transform_numpy(quantize_input_np, layout.lower(), input_layout)
-
-        raise RuntimeError(f"Unsupported data type '{dtype}'")
-
-    @tvm.testing.fixture
-    def quantize_expected_output_np(self, expected_output_np, dtype):
-        if dtype in ("uint8", "int8"):
-            global ZERO_POINT_M_VAL, SCALE_M_VAL
-            out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np(
-                expected_output_np, dtype
-            )
-
-            # Since output_layout is nchw, no transformation is needed.
-            return out_ref_quantized
-
-    @tvm.testing.requires_hexagon
-    def test_global_pool2d(
-        self,
-        dtype,
-        input_shape,
-        input_layout,
-        transformed_input_np,
-        expected_output_np,
-        quantize_expected_output_np,
-        hexagon_session,
-    ):
-        a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype)
-
-        if dtype == "float16":
-            m_tensor = sl.global_avg_pool2d(a_tensor)
-            tir_schedule = sl.stir_global_avg_pool2d_schedule(m_tensor, a_tensor, input_layout)
-        elif dtype in ["uint8", "int8"]:
-            m_tensor = qn.global_avg_pool2d_u8(
-                a_tensor,
-                dtype,
-                ZERO_POINT_VAL,
-                SCALE_VAL,
-                ZERO_POINT_M_VAL,
-                SCALE_M_VAL,
-            )
-            tir_schedule = qn.stir_global_avg_pool2d_u8_schedule(m_tensor, a_tensor, input_layout)
-
-        sch = tir_schedule.mod
-
-        with tvm.transform.PassContext(opt_level=3):
-            func = tvm.build(
-                sch,
-                [a_tensor, m_tensor],
-                get_hexagon_target("v69"),
-                name="global_pool2d",
-            )
-
-        input_axis_separator = [4]
-
-        a_data_nd = allocate_hexagon_array(
-            hexagon_session.device,
-            data=transformed_input_np,
-            dtype=dtype,
-            axis_separators=input_axis_separator,
-            mem_scope="global.vtcm",
-        )
-
-        m_data_nd = allocate_hexagon_array(
-            hexagon_session.device,
-            expected_output_np.shape,
-            dtype=dtype,
-        )
-
-        mod = hexagon_session.load_module(func)
-        mod(a_data_nd, m_data_nd)
-
-        # Convert nd to np
-        m_data_np = m_data_nd.numpy()
-
-        if dtype == "float16":
-            np.testing.assert_allclose(expected_output_np, m_data_np, rtol=1e-3, atol=1e-3)
-        elif dtype in ["int8", "uint8"]:
-            np.testing.assert_allclose(quantize_expected_output_np, m_data_np, atol=1)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Test code for float16 and uint8 global_avg_pool2d."""
+
+import numpy as np
+
+import tvm
+from tvm import te
+from tvm.topi.testing import adaptive_pool
+import tvm.topi.hexagon.qnn as qn
+import tvm.topi.hexagon.slice_ops as sl
+from tvm.contrib.hexagon import allocate_hexagon_array
+from ...infrastructure import transform_numpy, quantize_np, get_hexagon_target
+
+
+SCALE_M_VAL = None
+ZERO_POINT_M_VAL = None
+SCALE_VAL = None
+ZERO_POINT_VAL = None
+
+
+class TestGlobalPool2D:
+    (input_shape,) = tvm.testing.parameters(
+        ([1, 32, 8, 8],),
+        ([1, 1056, 16, 16],),
+    )
+
+    # Fixed chunk layout is set as nchw-32c8h8w-2d for uint8 and nchw-32c8h4w-2d for float16.
+    # For optimization, it might get changed later.
+    # Since output shape will be NxCx1x1 which is not a
+    # multiple of fixed-chunk, output_layout is NCHW.
+    input_layout, output_layout, pool_type, layout, dtype = tvm.testing.parameters(
+        ("nchw-32c8h8w-2d", "nchw", "avg", "NCHW", "uint8"),
+        ("nchw-32c8h4w-2d", "nchw", "avg", "NCHW", "float16"),
+    )
+
+    @tvm.testing.fixture
+    def expected_output_np(
+        self,
+        input_np,
+        pool_type,
+        layout,
+    ):
+        """Generate expected output."""
+        ref_np = tvm.topi.testing.adaptive_pool(
+            input_np,
+            (1, 1),
+            pool_type,
+            layout,
+        )
+        return ref_np
+
+    @tvm.testing.fixture
+    def input_np(self, input_shape, dtype):
+        if dtype in ("uint8", "int8"):
+            dtype = "float32"
+        return np.random.random(input_shape).astype(dtype)
+
+    @tvm.testing.fixture
+    def quantize_input_np(self, input_np, dtype):
+        if dtype in ("uint8", "int8"):
+            global ZERO_POINT_VAL, SCALE_VAL
+            input_np_quantized, SCALE_VAL, ZERO_POINT_VAL = quantize_np(input_np, dtype)
+            return input_np_quantized
+
+    @tvm.testing.fixture
+    def transformed_input_np(self, input_np, quantize_input_np, input_layout, layout, dtype):
+        if dtype == "float16":
+            return transform_numpy(input_np, layout.lower(), input_layout)
+        if dtype in ("uint8", "int8"):
+            return transform_numpy(quantize_input_np, layout.lower(), input_layout)
+
+        raise RuntimeError(f"Unsupported data type '{dtype}'")
+
+    @tvm.testing.fixture
+    def quantize_expected_output_np(self, expected_output_np, dtype):
+        if dtype in ("uint8", "int8"):
+            global ZERO_POINT_M_VAL, SCALE_M_VAL
+            out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np(
+                expected_output_np, dtype
+            )
+
+            # Since output_layout is nchw, no transformation is needed.
+            return out_ref_quantized
+
+    @tvm.testing.requires_hexagon
+    def test_global_pool2d(
+        self,
+        dtype,
+        input_shape,
+        input_layout,
+        transformed_input_np,
+        expected_output_np,
+        quantize_expected_output_np,
+        hexagon_session,
+    ):
+        a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype)
+
+        if dtype == "float16":
+            m_tensor = sl.global_avg_pool2d(a_tensor)
+            tir_schedule = sl.stir_global_avg_pool2d_schedule(m_tensor, a_tensor, input_layout)
+        elif dtype in ["uint8", "int8"]:
+            m_tensor = qn.global_avg_pool2d_u8(
+                a_tensor,
+                dtype,
+                ZERO_POINT_VAL,
+                SCALE_VAL,
+                ZERO_POINT_M_VAL,
+                SCALE_M_VAL,
+            )
+            tir_schedule = qn.stir_global_avg_pool2d_u8_schedule(m_tensor, a_tensor, input_layout)
+
+        sch = tir_schedule.mod
+
+        with tvm.transform.PassContext(opt_level=3):
+            func = tvm.build(
+                sch,
+                [a_tensor, m_tensor],
+                get_hexagon_target("v69"),
+                name="global_pool2d",
+            )
+
+        input_axis_separator = [4]
+
+        a_data_nd = allocate_hexagon_array(
+            hexagon_session.device,
+            data=transformed_input_np,
+            dtype=dtype,
+            axis_separators=input_axis_separator,
+            mem_scope="global.vtcm",
+        )
+
+        m_data_nd = allocate_hexagon_array(
+            hexagon_session.device,
+            expected_output_np.shape,
+            dtype=dtype,
+        )
+
+        mod = hexagon_session.load_module(func)
+        mod(a_data_nd, m_data_nd)
+
+        # Convert nd to np
+        m_data_np = m_data_nd.numpy()
+
+        if dtype == "float16":
+            np.testing.assert_allclose(expected_output_np, m_data_np, rtol=1e-3, atol=1e-3)
+        elif dtype in ["int8", "uint8"]:
+            np.testing.assert_allclose(quantize_expected_output_np, m_data_np, atol=1)
+
+
+if __name__ == "__main__":
+    tvm.testing.main()
diff --git a/tests/python/contrib/test_hexagon/topi/test_adaptive_avg_pool1d.py b/tests/python/contrib/test_hexagon/topi/test_adaptive_avg_pool1d.py
index 4d4aef25e33f1..e5b6c4d79065f 100755
--- a/tests/python/contrib/test_hexagon/topi/test_adaptive_avg_pool1d.py
+++ b/tests/python/contrib/test_hexagon/topi/test_adaptive_avg_pool1d.py
@@ -1,185 +1,185 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Test code for specialized case of adaptive_avg_pool1d."""
-
-import numpy as np
-
-import tvm
-from tvm import te
-from tvm.topi.testing import adaptive_pool
-import tvm.topi.hexagon.qnn as s1
-from tvm.contrib.hexagon import allocate_hexagon_array
-from ..infrastructure import transform_numpy, quantize_np
-
-
-SCALE_M_VAL = None
-ZERO_POINT_M_VAL = None
-SCALE_VAL = None
-ZERO_POINT_VAL = None
-
-
-class TestAdaptivePool1D:
-    """Test specialized case of adaptive_avg_pool1d."""
-
-    (input_shape,) = tvm.testing.parameters(
-        ([1, 128, 128],),
-        ([1, 64, 64],),
-        ([1, 64, 128],),
-        ([1, 32, 64],),
-        ([1, 128, 768],),
-    )
-
-    # Fixed chunk layout is set as ncw-32c64w-2d for now.
-    # The adaptive_avg_pool1d implementation only handles specialized case
-    # where output_size is 1 as it appears on quantized distilbert model.
-    # Since output size won't be a multiple of fixed-chunk,
-    # output_layout is ncw.
-    # For optimization, it might get changed later.
-    input_layout, output_layout, pool_type, layout, output_size, dtype, = tvm.testing.parameters(
-        (
-            "ncw-32c64w-2d",
-            "ncw",
-            "avg",
-            "NCW",
-            [1],
-            "uint8",
-        )
-    )
-
-    @tvm.testing.fixture
-    def expected_output_np(
-        self,
-        input_np,
-        output_size,
-        pool_type,
-        layout,
-    ):
-        """Generate expected output."""
-        out_width = output_size[0]
-
-        ref_np = adaptive_pool(
-            input_np,
-            out_width,
-            pool_type,
-            layout,
-        )
-        return ref_np
-
-    @tvm.testing.fixture
-    def input_np(self, input_shape, dtype):
-        if dtype in ("uint8", "int8"):
-            dtype = "float32"
-        return np.random.random(input_shape).astype(dtype)
-
-    @tvm.testing.fixture
-    def quantize_input_np(self, input_np, dtype):
-        if dtype in ("uint8", "int8"):
-            global ZERO_POINT_VAL, SCALE_VAL
-            input_np_quantized, SCALE_VAL, ZERO_POINT_VAL = quantize_np(input_np, dtype)
-            return input_np_quantized
-
-        raise RuntimeError(f"Unsupported data type '{dtype}'")
-
-    @tvm.testing.fixture
-    def transformed_input_np(self, quantize_input_np, input_layout, layout, dtype):
-        if dtype in ("uint8", "int8"):
-            return transform_numpy(quantize_input_np, layout.lower(), input_layout)
-
-        raise RuntimeError(f"Unsupported data type '{dtype}'")
-
-    @tvm.testing.fixture
-    def quantize_expected_output_np(self, expected_output_np, dtype):
-        """Generate expected output."""
-        if dtype in ("uint8", "int8"):
-            global ZERO_POINT_M_VAL, SCALE_M_VAL
-            out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np(
-                expected_output_np, dtype
-            )
-
-            # Since output_layout is ncw, no transformation is needed.
-            return out_ref_quantized
-
-        raise RuntimeError(f"Unsupported data type '{dtype}'")
-
-    @tvm.testing.requires_hexagon
-    def test_pool1d(
-        self,
-        dtype,
-        output_size,
-        input_layout,
-        output_layout,
-        input_shape,
-        transformed_input_np,
-        quantize_expected_output_np,
-        hexagon_session,
-    ):
-        """Test adaptive_avg_pool1d."""
-        target_hexagon = tvm.target.hexagon("v69")
-        a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype)
-
-        m_tensor = s1.adaptive_avg_pool1d(
-            a_tensor,
-            output_size,
-            dtype,
-            ZERO_POINT_VAL,
-            SCALE_VAL,
-            ZERO_POINT_M_VAL,
-            SCALE_M_VAL,
-        )
-
-        tir_schedule = s1.tir_adaptive_avg_pool1d_schedule(
-            m_tensor, a_tensor, output_layout, input_layout
-        )
-
-        sch = tir_schedule.mod
-
-        with tvm.transform.PassContext(opt_level=3):
-            func = tvm.build(
-                sch,
-                [a_tensor, m_tensor],
-                tvm.target.Target(target_hexagon, host=target_hexagon),
-                name="adaptive_pool1d",
-            )
-
-        input_axis_separator = [3]
-
-        a_data_nd = allocate_hexagon_array(
-            hexagon_session.device,
-            data=transformed_input_np,
-            dtype=dtype,
-            axis_separators=input_axis_separator,
-            mem_scope="global.vtcm",
-        )
-
-        m_data_nd = allocate_hexagon_array(
-            hexagon_session.device,
-            quantize_expected_output_np.shape,
-            dtype=dtype,
-        )
-
-        mod = hexagon_session.load_module(func)
-        mod(a_data_nd, m_data_nd)
-
-        # Convert nd to np
-        m_data_np = m_data_nd.numpy()
-
-        np.testing.assert_allclose(quantize_expected_output_np, m_data_np, atol=2)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Test code for specialized case of adaptive_avg_pool1d."""
+
+import numpy as np
+
+import tvm
+from tvm import te
+from tvm.topi.testing import adaptive_pool
+import tvm.topi.hexagon.qnn as s1
+from tvm.contrib.hexagon import allocate_hexagon_array
+from ..infrastructure import transform_numpy, quantize_np
+
+
+SCALE_M_VAL = None
+ZERO_POINT_M_VAL = None
+SCALE_VAL = None
+ZERO_POINT_VAL = None
+
+
+class TestAdaptivePool1D:
+    """Test specialized case of adaptive_avg_pool1d."""
+
+    (input_shape,) = tvm.testing.parameters(
+        ([1, 128, 128],),
+        ([1, 64, 64],),
+        ([1, 64, 128],),
+        ([1, 32, 64],),
+        ([1, 128, 768],),
+    )
+
+    # Fixed chunk layout is set as ncw-32c64w-2d for now.
+    # The adaptive_avg_pool1d implementation only handles specialized case
+    # where output_size is 1 as it appears on quantized distilbert model.
+    # Since output size won't be a multiple of fixed-chunk,
+    # output_layout is ncw.
+    # For optimization, it might get changed later.
+    input_layout, output_layout, pool_type, layout, output_size, dtype, = tvm.testing.parameters(
+        (
+            "ncw-32c64w-2d",
+            "ncw",
+            "avg",
+            "NCW",
+            [1],
+            "uint8",
+        )
+    )
+
+    @tvm.testing.fixture
+    def expected_output_np(
+        self,
+        input_np,
+        output_size,
+        pool_type,
+        layout,
+    ):
+        """Generate expected output."""
+        out_width = output_size[0]
+
+        ref_np = adaptive_pool(
+            input_np,
+            out_width,
+            pool_type,
+            layout,
+        )
+        return ref_np
+
+    @tvm.testing.fixture
+    def input_np(self, input_shape, dtype):
+        if dtype in ("uint8", "int8"):
+            dtype = "float32"
+        return np.random.random(input_shape).astype(dtype)
+
+    @tvm.testing.fixture
+    def quantize_input_np(self, input_np, dtype):
+        if dtype in ("uint8", "int8"):
+            global ZERO_POINT_VAL, SCALE_VAL
+            input_np_quantized, SCALE_VAL, ZERO_POINT_VAL = quantize_np(input_np, dtype)
+            return input_np_quantized
+
+        raise RuntimeError(f"Unsupported data type '{dtype}'")
+
+    @tvm.testing.fixture
+    def transformed_input_np(self, quantize_input_np, input_layout, layout, dtype):
+        if dtype in ("uint8", "int8"):
+            return transform_numpy(quantize_input_np, layout.lower(), input_layout)
+
+        raise RuntimeError(f"Unsupported data type '{dtype}'")
+
+    @tvm.testing.fixture
+    def quantize_expected_output_np(self, expected_output_np, dtype):
+        """Generate expected output."""
+        if dtype in ("uint8", "int8"):
+            global ZERO_POINT_M_VAL, SCALE_M_VAL
+            out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np(
+                expected_output_np, dtype
+            )
+
+            # Since output_layout is ncw, no transformation is needed.
+            return out_ref_quantized
+
+        raise RuntimeError(f"Unsupported data type '{dtype}'")
+
+    @tvm.testing.requires_hexagon
+    def test_pool1d(
+        self,
+        dtype,
+        output_size,
+        input_layout,
+        output_layout,
+        input_shape,
+        transformed_input_np,
+        quantize_expected_output_np,
+        hexagon_session,
+    ):
+        """Test adaptive_avg_pool1d."""
+        target_hexagon = tvm.target.hexagon("v69")
+        a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype)
+
+        m_tensor = s1.adaptive_avg_pool1d(
+            a_tensor,
+            output_size,
+            dtype,
+            ZERO_POINT_VAL,
+            SCALE_VAL,
+            ZERO_POINT_M_VAL,
+            SCALE_M_VAL,
+        )
+
+        tir_schedule = s1.tir_adaptive_avg_pool1d_schedule(
+            m_tensor, a_tensor, output_layout, input_layout
+        )
+
+        sch = tir_schedule.mod
+
+        with tvm.transform.PassContext(opt_level=3):
+            func = tvm.build(
+                sch,
+                [a_tensor, m_tensor],
+                tvm.target.Target(target_hexagon, host=target_hexagon),
+                name="adaptive_pool1d",
+            )
+
+        input_axis_separator = [3]
+
+        a_data_nd = allocate_hexagon_array(
+            hexagon_session.device,
+            data=transformed_input_np,
+            dtype=dtype,
+            axis_separators=input_axis_separator,
+            mem_scope="global.vtcm",
+        )
+
+        m_data_nd = allocate_hexagon_array(
+            hexagon_session.device,
+            quantize_expected_output_np.shape,
+            dtype=dtype,
+        )
+
+        mod = hexagon_session.load_module(func)
+        mod(a_data_nd, m_data_nd)
+
+        # Convert nd to np
+        m_data_np = m_data_nd.numpy()
+
+        np.testing.assert_allclose(quantize_expected_output_np, m_data_np, atol=2)
+
+
+if __name__ == "__main__":
+    tvm.testing.main()
diff --git a/tests/python/contrib/test_hexagon/topi/test_add_subtract_multiply.py b/tests/python/contrib/test_hexagon/topi/test_add_subtract_multiply.py
index e0bb6b5864d39..94cb5ffca543a 100644
--- a/tests/python/contrib/test_hexagon/topi/test_add_subtract_multiply.py
+++ b/tests/python/contrib/test_hexagon/topi/test_add_subtract_multiply.py
@@ -1,411 +1,411 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test code for Add, Subtract and Multiply."""
-import numpy as np
-
-import tvm
-from tvm import te
-import tvm.topi.hexagon.slice_ops as sl
-import tvm.topi.hexagon.qnn as qn
-from tvm.contrib.hexagon import allocate_hexagon_array
-from ..infrastructure import (
-    transform_numpy,
-    quantize_np,
-    get_hexagon_target,
-)
-
-ZERO_POINT_A_VAL = None
-SCALE_A_VAL = None
-
-ZERO_POINT_B_VAL = None
-SCALE_B_VAL = None
-
-ZERO_POINT_M_VAL = None
-SCALE_M_VAL = None
-
-
-def hexagon_wrapper_allocation(
-    device,
-    layout,
-    axis_separators,
-    tensor_shape=None,
-    data_original=None,
-    transformed_data=None,
-    dtype=None,
-):
-    """Input layout can either be nhwc-8h2w32c2w-2d or nhwc"""
-    if layout in ["nhwc-8h2w32c2w-2d", "nhwc-8h8w32c-2d"]:
-        data_nd = allocate_hexagon_array(
-            device,
-            tensor_shape=tensor_shape,
-            data=transformed_data,
-            dtype=dtype,
-            axis_separators=axis_separators,
-            mem_scope="global.vtcm",
-        )
-    elif layout == "nhwc":
-        data_nd = allocate_hexagon_array(
-            device,
-            data=data_original,
-        )
-    return data_nd
-
-
-class TestAddSubtractMultiplyBroadcast2d:
-    """Test Add, Subtract and Multiply class."""
-
-    (
-        input_shape_a,
-        input_shape_b,
-        input_a_layout,
-        input_b_layout,
-        output_layout,
-        dtype,
-    ) = tvm.testing.parameters(
-        # no broadcast needed - short input
-        (
-            [1, 8, 4, 32],
-            [1, 8, 4, 32],
-            "nhwc-8h2w32c2w-2d",
-            "nhwc-8h2w32c2w-2d",
-            "nhwc-8h2w32c2w-2d",
-            "float16",
-        ),
-        # no broadcast needed - large input
-        (
-            [1, 56, 64, 128],
-            [1, 56, 64, 128],
-            "nhwc-8h2w32c2w-2d",
-            "nhwc-8h2w32c2w-2d",
-            "nhwc-8h2w32c2w-2d",
-            "float16",
-        ),
-        # one input needs broadcast
-        (
-            [1, 56, 64, 128],
-            [1, 1, 64, 1],
-            "nhwc-8h2w32c2w-2d",
-            "nhwc",
-            "nhwc-8h2w32c2w-2d",
-            "float16",
-        ),
-        # Both input needs broadcast
-        (
-            [1, 56, 1, 128],
-            [1, 1, 64, 1],
-            "nhwc",
-            "nhwc",
-            "nhwc-8h2w32c2w-2d",
-            "float16",
-        ),
-        # One axis in one input needs broadcast
-        (
-            [1, 56, 20, 128],
-            [1, 56, 20, 1],
-            "nhwc-8h2w32c2w-2d",
-            "nhwc",
-            "nhwc-8h2w32c2w-2d",
-            "float16",
-        ),
-        # broadcast all axes in one input
-        (
-            [1, 48, 56, 32],
-            [1, 1, 1, 1],
-            "nhwc-8h2w32c2w-2d",
-            "nhwc",
-            "nhwc-8h2w32c2w-2d",
-            "float16",
-        ),
-        (
-            [1, 48, 32, 64],
-            [1, 48, 32, 64],
-            "nhwc-8h8w32c-2d",
-            "nhwc-8h8w32c-2d",
-            "nhwc-8h8w32c-2d",
-            "uint8",
-        ),
-        # broadcast axis 2 in one input
-        (
-            [1, 48, 32, 64],
-            [1, 48, 1, 64],
-            "nhwc-8h8w32c-2d",
-            "nhwc",
-            "nhwc-8h8w32c-2d",
-            "uint8",
-        ),
-        # broadcast axis 1 in one input
-        (
-            [1, 48, 32, 64],
-            [1, 1, 32, 64],
-            "nhwc-8h8w32c-2d",
-            "nhwc",
-            "nhwc-8h8w32c-2d",
-            "uint8",
-        ),
-        # broadcast axis 3 in one input
-        (
-            [1, 8, 8, 32],
-            [1, 8, 8, 1],
-            "nhwc-8h8w32c-2d",
-            "nhwc",
-            "nhwc-8h8w32c-2d",
-            "uint8",
-        ),
-        # broadcast both inputs
-        (
-            [1, 56, 1, 128],
-            [1, 1, 64, 1],
-            "nhwc",
-            "nhwc",
-            "nhwc-8h8w32c-2d",
-            "uint8",
-        ),
-        # broadcast both inputs
-        (
-            [1, 48, 1, 1],
-            [1, 1, 32, 32],
-            "nhwc",
-            "nhwc",
-            "nhwc-8h8w32c-2d",
-            "uint8",
-        ),
-        # broadcast both inputs
-        (
-            [1, 48, 1, 32],
-            [1, 1, 32, 1],
-            "nhwc",
-            "nhwc",
-            "nhwc-8h8w32c-2d",
-            "uint8",
-        ),
-        # broadcast all axes in one input
-        (
-            [1, 48, 56, 32],
-            [1, 1, 1, 1],
-            "nhwc-8h8w32c-2d",
-            "nhwc",
-            "nhwc-8h8w32c-2d",
-            "uint8",
-        ),
-    )
-
-    op_name = tvm.testing.parameter("add", "subtract", "multiply")
-
-    @tvm.testing.fixture
-    def expected_output_np(self, input_np_a, input_np_b, op_name):
-        """Generate expected output."""
-        if op_name == "add":
-            out_ref = np.add(input_np_a, input_np_b)
-        elif op_name == "subtract":
-            out_ref = np.subtract(input_np_a, input_np_b)
-        elif op_name == "multiply":
-            out_ref = np.multiply(input_np_a, input_np_b)
-        return out_ref
-
-    @tvm.testing.fixture
-    def transformed_expected_output_np(self, expected_output_np, output_layout, dtype):
-        """Generate expected output."""
-        if dtype == "float16":
-            return transform_numpy(expected_output_np, "nhwc", output_layout)
-        if dtype in ["uint8", "int8"]:
-            global ZERO_POINT_M_VAL, SCALE_M_VAL
-            out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np(
-                expected_output_np, dtype
-            )
-            return transform_numpy(out_ref_quantized, "nhwc", output_layout)
-
-        raise RuntimeError(f"Unsupported data type '{dtype}'")
-
-    @tvm.testing.fixture
-    def input_np_a(self, input_shape_a, dtype):
-        """Generate numpy input for variable a."""
-        if dtype in ["uint8", "int8"]:
-            dtype = "float32"
-        return np.random.random(input_shape_a).astype(dtype)
-
-    @tvm.testing.fixture
-    def input_np_b(self, input_shape_b, dtype):
-        """Generate numpy input for variable b."""
-        if dtype in ["uint8", "int8"]:
-            dtype = "float32"
-        return np.random.random(input_shape_b).astype(dtype)
-
-    @tvm.testing.fixture
-    def quantize_input_np_a(self, input_np_a, dtype):
-        if dtype in ["uint8", "int8"]:
-            global ZERO_POINT_A_VAL, SCALE_A_VAL
-            input_np_a_quantized, SCALE_A_VAL, ZERO_POINT_A_VAL = quantize_np(input_np_a, dtype)
-            return input_np_a_quantized
-        return None
-
-    @tvm.testing.fixture
-    def quantize_input_np_b(self, input_np_b, dtype):
-        if dtype in ["uint8", "int8"]:
-            global ZERO_POINT_B_VAL, SCALE_B_VAL
-            input_np_b_quantized, SCALE_B_VAL, ZERO_POINT_B_VAL = quantize_np(input_np_b, dtype)
-            return input_np_b_quantized
-        return None
-
-    @tvm.testing.fixture
-    def transformed_input_np_a(self, input_np_a, quantize_input_np_a, input_a_layout, dtype):
-        if dtype == "float16":
-            return transform_numpy(input_np_a, "nhwc", input_a_layout)
-        if dtype in ["uint8", "int8"]:
-            return transform_numpy(quantize_input_np_a, "nhwc", input_a_layout)
-
-        raise RuntimeError(f"Unsupported data type '{dtype}'")
-
-    @tvm.testing.fixture
-    def transformed_input_np_b(self, input_np_b, quantize_input_np_b, input_b_layout, dtype):
-        if dtype == "float16":
-            return transform_numpy(input_np_b, "nhwc", input_b_layout)
-        if dtype in ["uint8", "int8"]:
-            return transform_numpy(quantize_input_np_b, "nhwc", input_b_layout)
-
-        raise RuntimeError(f"Unsupported data type '{dtype}'")
-
-    @tvm.testing.requires_hexagon
-    def test_transform(
-        self,
-        dtype,
-        input_shape_a,
-        input_shape_b,
-        input_np_a,
-        input_np_b,
-        quantize_input_np_a,
-        quantize_input_np_b,
-        transformed_input_np_a,
-        transformed_input_np_b,
-        expected_output_np,
-        transformed_expected_output_np,
-        hexagon_session,
-        output_layout,
-        input_a_layout,
-        input_b_layout,
-        op_name,
-    ):
-        """Test transform."""
-        output_shape = expected_output_np.shape
-        a_tensor = te.placeholder(input_shape_a, name="a_tensor", dtype=dtype)
-        b_tensor = te.placeholder(input_shape_b, name="b_tensor", dtype=dtype)
-        if dtype == "float16":
-            if op_name == "add":
-                m_tensor = sl.add_broadcast_compute(a_tensor, b_tensor)
-            elif op_name == "subtract":
-                m_tensor = sl.subtract_broadcast_compute(a_tensor, b_tensor)
-            elif op_name == "multiply":
-                m_tensor = sl.multiply_broadcast_compute(a_tensor, b_tensor)
-            tir_schedule = sl.tir_broadcast_schedule(
-                m_tensor, a_tensor, b_tensor, output_layout, input_a_layout, input_b_layout, op_name
-            )
-        elif dtype in ["uint8", "int8"]:
-            args = [
-                a_tensor,
-                b_tensor,
-                output_shape,
-                ZERO_POINT_A_VAL,
-                SCALE_A_VAL,
-                ZERO_POINT_B_VAL,
-                SCALE_B_VAL,
-                ZERO_POINT_M_VAL,
-                SCALE_M_VAL,
-                dtype,
-            ]
-            if op_name == "add":
-                m_tensor = qn.qadd_broadcast_compute(*args)
-            elif op_name == "subtract":
-                m_tensor = qn.qsubtract_broadcast_compute(*args)
-            elif op_name == "multiply":
-                m_tensor = qn.qmultiply_broadcast_compute(*args)
-            tir_schedule = qn.tir_schedule_quant(
-                m_tensor, a_tensor, b_tensor, output_layout, input_a_layout, input_b_layout
-            )
-
-        sch = tir_schedule.mod
-
-        input_axis_separator = [4]
-        if output_layout in (
-            "nhwc-8h2w32c2w-2d",
-            "nhwc-8h8w32c-2d",
-        ):
-            output_axis_separator = [4]
-        else:
-            raise RuntimeError(f"Unexpected layout '{output_layout}'")
-
-        with tvm.transform.PassContext(opt_level=3):
-            func = tvm.build(
-                sch,
-                [a_tensor, b_tensor, m_tensor],
-                get_hexagon_target("v69"),
-                name="slice_op_with_transform",
-            )
-
-        if dtype == "float16":
-            in_data_np_a = input_np_a
-            in_data_np_b = input_np_b
-        elif dtype in ["int8", "uint8"]:
-            in_data_np_a = quantize_input_np_a
-            in_data_np_b = quantize_input_np_b
-        else:
-            raise RuntimeError(f"Unsupport dtype '{dtype}'")
-
-        a_data_nd = hexagon_wrapper_allocation(
-            hexagon_session.device,
-            layout=input_a_layout,
-            data_original=in_data_np_a,
-            transformed_data=transformed_input_np_a,
-            axis_separators=input_axis_separator,
-        )
-        b_data_nd = hexagon_wrapper_allocation(
-            hexagon_session.device,
-            layout=input_b_layout,
-            data_original=in_data_np_b,
-            transformed_data=transformed_input_np_b,
-            axis_separators=input_axis_separator,
-        )
-        m_data_nd = hexagon_wrapper_allocation(
-            hexagon_session.device,
-            layout=output_layout,
-            tensor_shape=transformed_expected_output_np.shape,
-            axis_separators=output_axis_separator,
-            dtype=dtype,
-        )
-
-        mod = hexagon_session.load_module(func)
-        mod(a_data_nd, b_data_nd, m_data_nd)
-
-        batch, height, width, channel = output_shape
-        # convert nd to np and reshape to fixed chunk size layout
-        if output_layout == "nhwc-8h2w32c2w-2d":
-            m_data_np = m_data_nd.numpy().reshape(
-                [batch, height // 8, width // 4, channel // 32, 8, 2, 32, 2]
-            )
-        elif output_layout == "nhwc-8h8w32c-2d":
-            m_data_np = m_data_nd.numpy().reshape(
-                [batch, height // 8, width // 8, channel // 32, 8, 8, 32]
-            )
-
-        if dtype == "float16":
-            np.testing.assert_allclose(
-                transformed_expected_output_np, m_data_np, rtol=1e-3, atol=1e-3
-            )
-        elif dtype in ["int8", "uint8"]:
-            np.testing.assert_allclose(transformed_expected_output_np, m_data_np, rtol=1, atol=1)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Test code for Add, Subtract and Multiply."""
+import numpy as np
+
+import tvm
+from tvm import te
+import tvm.topi.hexagon.slice_ops as sl
+import tvm.topi.hexagon.qnn as qn
+from tvm.contrib.hexagon import allocate_hexagon_array
+from ..infrastructure import (
+    transform_numpy,
+    quantize_np,
+    get_hexagon_target,
+)
+
+ZERO_POINT_A_VAL = None
+SCALE_A_VAL = None
+
+ZERO_POINT_B_VAL = None
+SCALE_B_VAL = None
+
+ZERO_POINT_M_VAL = None
+SCALE_M_VAL = None
+
+
+def hexagon_wrapper_allocation(
+    device,
+    layout,
+    axis_separators,
+    tensor_shape=None,
+    data_original=None,
+    transformed_data=None,
+    dtype=None,
+):
+    """Input layout can either be nhwc-8h2w32c2w-2d or nhwc"""
+    if layout in ["nhwc-8h2w32c2w-2d", "nhwc-8h8w32c-2d"]:
+        data_nd = allocate_hexagon_array(
+            device,
+            tensor_shape=tensor_shape,
+            data=transformed_data,
+            dtype=dtype,
+            axis_separators=axis_separators,
+            mem_scope="global.vtcm",
+        )
+    elif layout == "nhwc":
+        data_nd = allocate_hexagon_array(
+            device,
+            data=data_original,
+        )
+    return data_nd
+
+
+class TestAddSubtractMultiplyBroadcast2d:
+    """Test Add, Subtract and Multiply class."""
+
+    (
+        input_shape_a,
+        input_shape_b,
+        input_a_layout,
+        input_b_layout,
+        output_layout,
+        dtype,
+    ) = tvm.testing.parameters(
+        # no broadcast needed - short input
+        (
+            [1, 8, 4, 32],
+            [1, 8, 4, 32],
+            "nhwc-8h2w32c2w-2d",
+            "nhwc-8h2w32c2w-2d",
+            "nhwc-8h2w32c2w-2d",
+            "float16",
+        ),
+        # no broadcast needed - large input
+        (
+            [1, 56, 64, 128],
+            [1, 56, 64, 128],
+            "nhwc-8h2w32c2w-2d",
+            "nhwc-8h2w32c2w-2d",
+            "nhwc-8h2w32c2w-2d",
+            "float16",
+        ),
+        # one input needs broadcast
+        (
+            [1, 56, 64, 128],
+            [1, 1, 64, 1],
+            "nhwc-8h2w32c2w-2d",
+            "nhwc",
+            "nhwc-8h2w32c2w-2d",
+            "float16",
+        ),
+        # Both input needs broadcast
+        (
+            [1, 56, 1, 128],
+            [1, 1, 64, 1],
+            "nhwc",
+            "nhwc",
+            "nhwc-8h2w32c2w-2d",
+            "float16",
+        ),
+        # One axis in one input needs broadcast
+        (
+            [1, 56, 20, 128],
+            [1, 56, 20, 1],
+            "nhwc-8h2w32c2w-2d",
+            "nhwc",
+            "nhwc-8h2w32c2w-2d",
+            "float16",
+        ),
+        # broadcast all axes in one input
+        (
+            [1, 48, 56, 32],
+            [1, 1, 1, 1],
+            "nhwc-8h2w32c2w-2d",
+            "nhwc",
+            "nhwc-8h2w32c2w-2d",
+            "float16",
+        ),
+        (
+            [1, 48, 32, 64],
+            [1, 48, 32, 64],
+            "nhwc-8h8w32c-2d",
+            "nhwc-8h8w32c-2d",
+            "nhwc-8h8w32c-2d",
+            "uint8",
+        ),
+        # broadcast axis 2 in one input
+        (
+            [1, 48, 32, 64],
+            [1, 48, 1, 64],
+            "nhwc-8h8w32c-2d",
+            "nhwc",
+            "nhwc-8h8w32c-2d",
+            "uint8",
+        ),
+        # broadcast axis 1 in one input
+        (
+            [1, 48, 32, 64],
+            [1, 1, 32, 64],
+            "nhwc-8h8w32c-2d",
+            "nhwc",
+            "nhwc-8h8w32c-2d",
+            "uint8",
+        ),
+        # broadcast axis 3 in one input
+        (
+            [1, 8, 8, 32],
+            [1, 8, 8, 1],
+            "nhwc-8h8w32c-2d",
+            "nhwc",
+            "nhwc-8h8w32c-2d",
+            "uint8",
+        ),
+        # broadcast both inputs
+        (
+            [1, 56, 1, 128],
+            [1, 1, 64, 1],
+            "nhwc",
+            "nhwc",
+            "nhwc-8h8w32c-2d",
+            "uint8",
+        ),
+        # broadcast both inputs
+        (
+            [1, 48, 1, 1],
+            [1, 1, 32, 32],
+            "nhwc",
+            "nhwc",
+            "nhwc-8h8w32c-2d",
+            "uint8",
+        ),
+        # broadcast both inputs
+        (
+            [1, 48, 1, 32],
+            [1, 1, 32, 1],
+            "nhwc",
+            "nhwc",
+            "nhwc-8h8w32c-2d",
+            "uint8",
+        ),
+        # broadcast all axes in one input
+        (
+            [1, 48, 56, 32],
+            [1, 1, 1, 1],
+            "nhwc-8h8w32c-2d",
+            "nhwc",
+            "nhwc-8h8w32c-2d",
+            "uint8",
+        ),
+    )
+
+    op_name = tvm.testing.parameter("add", "subtract", "multiply")
+
+    @tvm.testing.fixture
+    def expected_output_np(self, input_np_a, input_np_b, op_name):
+        """Generate expected output."""
+        if op_name == "add":
+            out_ref = np.add(input_np_a, input_np_b)
+        elif op_name == "subtract":
+            out_ref = np.subtract(input_np_a, input_np_b)
+        elif op_name == "multiply":
+            out_ref = np.multiply(input_np_a, input_np_b)
+        return out_ref
+
+    @tvm.testing.fixture
+    def transformed_expected_output_np(self, expected_output_np, output_layout, dtype):
+        """Generate expected output."""
+        if dtype == "float16":
+            return transform_numpy(expected_output_np, "nhwc", output_layout)
+        if dtype in ["uint8", "int8"]:
+            global ZERO_POINT_M_VAL, SCALE_M_VAL
+            out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np(
+                expected_output_np, dtype
+            )
+            return transform_numpy(out_ref_quantized, "nhwc", output_layout)
+
+        raise RuntimeError(f"Unsupported data type '{dtype}'")
+
+    @tvm.testing.fixture
+    def input_np_a(self, input_shape_a, dtype):
+        """Generate numpy input for variable a."""
+        if dtype in ["uint8", "int8"]:
+            dtype = "float32"
+        return np.random.random(input_shape_a).astype(dtype)
+
+    @tvm.testing.fixture
+    def input_np_b(self, input_shape_b, dtype):
+        """Generate numpy input for variable b."""
+        if dtype in ["uint8", "int8"]:
+            dtype = "float32"
+        return np.random.random(input_shape_b).astype(dtype)
+
+    @tvm.testing.fixture
+    def quantize_input_np_a(self, input_np_a, dtype):
+        if dtype in ["uint8", "int8"]:
+            global ZERO_POINT_A_VAL, SCALE_A_VAL
+            input_np_a_quantized, SCALE_A_VAL, ZERO_POINT_A_VAL = quantize_np(input_np_a, dtype)
+            return input_np_a_quantized
+        return None
+
+    @tvm.testing.fixture
+    def quantize_input_np_b(self, input_np_b, dtype):
+        if dtype in ["uint8", "int8"]:
+            global ZERO_POINT_B_VAL, SCALE_B_VAL
+            input_np_b_quantized, SCALE_B_VAL, ZERO_POINT_B_VAL = quantize_np(input_np_b, dtype)
+            return input_np_b_quantized
+        return None
+
+    @tvm.testing.fixture
+    def transformed_input_np_a(self, input_np_a, quantize_input_np_a, input_a_layout, dtype):
+        if dtype == "float16":
+            return transform_numpy(input_np_a, "nhwc", input_a_layout)
+        if dtype in ["uint8", "int8"]:
+            return transform_numpy(quantize_input_np_a, "nhwc", input_a_layout)
+
+        raise RuntimeError(f"Unsupported data type '{dtype}'")
+
+    @tvm.testing.fixture
+    def transformed_input_np_b(self, input_np_b, quantize_input_np_b, input_b_layout, dtype):
+        if dtype == "float16":
+            return transform_numpy(input_np_b, "nhwc", input_b_layout)
+        if dtype in ["uint8", "int8"]:
+            return transform_numpy(quantize_input_np_b, "nhwc", input_b_layout)
+
+        raise RuntimeError(f"Unsupported data type '{dtype}'")
+
+    @tvm.testing.requires_hexagon
+    def test_transform(
+        self,
+        dtype,
+        input_shape_a,
+        input_shape_b,
+        input_np_a,
+        input_np_b,
+        quantize_input_np_a,
+        quantize_input_np_b,
+        transformed_input_np_a,
+        transformed_input_np_b,
+        expected_output_np,
+        transformed_expected_output_np,
+        hexagon_session,
+        output_layout,
+        input_a_layout,
+        input_b_layout,
+        op_name,
+    ):
+        """Test transform."""
+        output_shape = expected_output_np.shape
+        a_tensor = te.placeholder(input_shape_a, name="a_tensor", dtype=dtype)
+        b_tensor = te.placeholder(input_shape_b, name="b_tensor", dtype=dtype)
+        if dtype == "float16":
+            if op_name == "add":
+                m_tensor = sl.add_broadcast_compute(a_tensor, b_tensor)
+            elif op_name == "subtract":
+                m_tensor = sl.subtract_broadcast_compute(a_tensor, b_tensor)
+            elif op_name == "multiply":
+                m_tensor = sl.multiply_broadcast_compute(a_tensor, b_tensor)
+            tir_schedule = sl.tir_broadcast_schedule(
+                m_tensor, a_tensor, b_tensor, output_layout, input_a_layout, input_b_layout, op_name
+            )
+        elif dtype in ["uint8", "int8"]:
+            args = [
+                a_tensor,
+                b_tensor,
+                output_shape,
+                ZERO_POINT_A_VAL,
+                SCALE_A_VAL,
+                ZERO_POINT_B_VAL,
+                SCALE_B_VAL,
+                ZERO_POINT_M_VAL,
+                SCALE_M_VAL,
+                dtype,
+            ]
+            if op_name == "add":
+                m_tensor = qn.qadd_broadcast_compute(*args)
+            elif op_name == "subtract":
+                m_tensor = qn.qsubtract_broadcast_compute(*args)
+            elif op_name == "multiply":
+                m_tensor = qn.qmultiply_broadcast_compute(*args)
+            tir_schedule = qn.tir_schedule_quant(
+                m_tensor, a_tensor, b_tensor, output_layout, input_a_layout, input_b_layout
+            )
+
+        sch = tir_schedule.mod
+
+        input_axis_separator = [4]
+        if output_layout in (
+            "nhwc-8h2w32c2w-2d",
+            "nhwc-8h8w32c-2d",
+        ):
+            output_axis_separator = [4]
+        else:
+            raise RuntimeError(f"Unexpected layout '{output_layout}'")
+
+        with tvm.transform.PassContext(opt_level=3):
+            func = tvm.build(
+                sch,
+                [a_tensor, b_tensor, m_tensor],
+                get_hexagon_target("v69"),
+                name="slice_op_with_transform",
+            )
+
+        if dtype == "float16":
+            in_data_np_a = input_np_a
+            in_data_np_b = input_np_b
+        elif dtype in ["int8", "uint8"]:
+            in_data_np_a = quantize_input_np_a
+            in_data_np_b = quantize_input_np_b
+        else:
+            raise RuntimeError(f"Unsupport dtype '{dtype}'")
+
+        a_data_nd = hexagon_wrapper_allocation(
+            hexagon_session.device,
+            layout=input_a_layout,
+            data_original=in_data_np_a,
+            transformed_data=transformed_input_np_a,
+            axis_separators=input_axis_separator,
+        )
+        b_data_nd = hexagon_wrapper_allocation(
+            hexagon_session.device,
+            layout=input_b_layout,
+            data_original=in_data_np_b,
+            transformed_data=transformed_input_np_b,
+            axis_separators=input_axis_separator,
+        )
+        m_data_nd = hexagon_wrapper_allocation(
+            hexagon_session.device,
+            layout=output_layout,
+            tensor_shape=transformed_expected_output_np.shape,
+            axis_separators=output_axis_separator,
+            dtype=dtype,
+        )
+
+        mod = hexagon_session.load_module(func)
+        mod(a_data_nd, b_data_nd, m_data_nd)
+
+        batch, height, width, channel = output_shape
+        # convert nd to np and reshape to fixed chunk size layout
+        if output_layout == "nhwc-8h2w32c2w-2d":
+            m_data_np = m_data_nd.numpy().reshape(
+                [batch, height // 8, width // 4, channel // 32, 8, 2, 32, 2]
+            )
+        elif output_layout == "nhwc-8h8w32c-2d":
+            m_data_np = m_data_nd.numpy().reshape(
+                [batch, height // 8, width // 8, channel // 32, 8, 8, 32]
+            )
+
+        if dtype == "float16":
+            np.testing.assert_allclose(
+                transformed_expected_output_np, m_data_np, rtol=1e-3, atol=1e-3
+            )
+        elif dtype in ["int8", "uint8"]:
+            np.testing.assert_allclose(transformed_expected_output_np, m_data_np, rtol=1, atol=1)
+
+
+if __name__ == "__main__":
+    tvm.testing.main()
diff --git a/tests/python/contrib/test_hexagon/topi/test_quantize.py b/tests/python/contrib/test_hexagon/topi/test_quantize.py
index ac4f4d4e30471..2c54b12ab98e5 100644
--- a/tests/python/contrib/test_hexagon/topi/test_quantize.py
+++ b/tests/python/contrib/test_hexagon/topi/test_quantize.py
@@ -1,128 +1,128 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""TIR quantize schedule tests."""
-import numpy as np
-
-import tvm
-from tvm import te
-import tvm.topi.hexagon.qnn as s1
-from tvm.contrib.hexagon import allocate_hexagon_array
-from ..infrastructure import (
-    transform_numpy,
-    quantize_np,
-    get_hexagon_target,
-)
-
-QUANTIZE_SCALE = None
-QUANTIZE_ZERO_POINT = None
-
-
-class TestQuantize:
-    """Test quantize class."""
-
-    @tvm.testing.fixture
-    def expected_output_np(self, input_np, output_dtype):
-        global QUANTIZE_SCALE, QUANTIZE_ZERO_POINT
-        quant_np, QUANTIZE_SCALE, QUANTIZE_ZERO_POINT = quantize_np(input_np, output_dtype)
-        return quant_np
-
-    @tvm.testing.fixture
-    def input_np(self, input_shape, input_dtype):
-        return np.random.random(input_shape).astype(input_dtype)
-
-    @tvm.testing.fixture
-    def transformed_input_np(self, input_np, input_crouton_layout):
-        return transform_numpy(input_np, "nhwc", input_crouton_layout)
-
-    @tvm.testing.fixture
-    def transformed_expected_output_np(self, expected_output_np, output_layout):
-        return transform_numpy(expected_output_np, "nhwc", output_layout)
-
-    input_crouton_layout, output_layout, input_dtype = tvm.testing.parameters(
-        ("nhwc-4h2w32c2w-2d", "nhwc-8h8w32c-2d", "float32"),
-    )
-
-    output_dtype = tvm.testing.parameter("uint8", "int8")
-
-    input_shape = tvm.testing.parameter(
-        (1, 8, 8, 32), (1, 16, 16, 32), (1, 16, 16, 128), (1, 64, 64, 64)
-    )
-
-    @tvm.testing.requires_hexagon
-    def test_quantize(
-        self,
-        input_dtype,
-        output_dtype,
-        transformed_input_np,
-        input_shape,
-        expected_output_np,
-        transformed_expected_output_np,
-        input_crouton_layout,
-        output_layout,
-        hexagon_session,
-    ):
-        """Test quantize."""
-        a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=input_dtype)
-
-        m_tensor = s1.quantize_compute(a_tensor, QUANTIZE_SCALE, QUANTIZE_ZERO_POINT, output_dtype)
-
-        tir_schedule = s1.tir_quantize_schedule(
-            m_tensor, a_tensor, input_crouton_layout, output_layout
-        )
-
-        sch = tir_schedule.mod
-
-        input_axis_separator = [4]
-        output_axis_separator = [4]
-
-        with tvm.transform.PassContext(opt_level=3):
-            func = tvm.build(
-                sch,
-                [a_tensor, m_tensor],
-                get_hexagon_target("v69"),
-                name="quantize",
-            )
-
-        a_data_nd = allocate_hexagon_array(
-            hexagon_session.device,
-            data=transformed_input_np,
-            dtype=input_dtype,
-            axis_separators=input_axis_separator,
-            mem_scope="global.vtcm",
-        )
-
-        m_data_nd = allocate_hexagon_array(
-            hexagon_session.device,
-            tensor_shape=transformed_expected_output_np.shape,
-            dtype=output_dtype,
-            axis_separators=output_axis_separator,
-            mem_scope="global.vtcm",
-        )
-
-        mod = hexagon_session.load_module(func)
-        mod(a_data_nd, m_data_nd)
-
-        b, h, weight, c = expected_output_np.shape
-
-        # convert nd to np and reshape to fixed chunk size layout
-        m_data_np = m_data_nd.numpy().reshape([b, h // 8, weight // 8, c // 32, 8, 8, 32])
-
-        np.testing.assert_allclose(transformed_expected_output_np, m_data_np, atol=1)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""TIR quantize schedule tests."""
+import numpy as np
+
+import tvm
+from tvm import te
+import tvm.topi.hexagon.qnn as s1
+from tvm.contrib.hexagon import allocate_hexagon_array
+from ..infrastructure import (
+    transform_numpy,
+    quantize_np,
+    get_hexagon_target,
+)
+
+QUANTIZE_SCALE = None
+QUANTIZE_ZERO_POINT = None
+
+
+class TestQuantize:
+    """Test quantize class."""
+
+    @tvm.testing.fixture
+    def expected_output_np(self, input_np, output_dtype):
+        global QUANTIZE_SCALE, QUANTIZE_ZERO_POINT
+        quant_np, QUANTIZE_SCALE, QUANTIZE_ZERO_POINT = quantize_np(input_np, output_dtype)
+        return quant_np
+
+    @tvm.testing.fixture
+    def input_np(self, input_shape, input_dtype):
+        return np.random.random(input_shape).astype(input_dtype)
+
+    @tvm.testing.fixture
+    def transformed_input_np(self, input_np, input_crouton_layout):
+        return transform_numpy(input_np, "nhwc", input_crouton_layout)
+
+    @tvm.testing.fixture
+    def transformed_expected_output_np(self, expected_output_np, output_layout):
+        return transform_numpy(expected_output_np, "nhwc", output_layout)
+
+    input_crouton_layout, output_layout, input_dtype = tvm.testing.parameters(
+        ("nhwc-4h2w32c2w-2d", "nhwc-8h8w32c-2d", "float32"),
+    )
+
+    output_dtype = tvm.testing.parameter("uint8", "int8")
+
+    input_shape = tvm.testing.parameter(
+        (1, 8, 8, 32), (1, 16, 16, 32), (1, 16, 16, 128), (1, 64, 64, 64)
+    )
+
+    @tvm.testing.requires_hexagon
+    def test_quantize(
+        self,
+        input_dtype,
+        output_dtype,
+        transformed_input_np,
+        input_shape,
+        expected_output_np,
+        transformed_expected_output_np,
+        input_crouton_layout,
+        output_layout,
+        hexagon_session,
+    ):
+        """Test quantize."""
+        a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=input_dtype)
+
+        m_tensor = s1.quantize_compute(a_tensor, QUANTIZE_SCALE, QUANTIZE_ZERO_POINT, output_dtype)
+
+        tir_schedule = s1.tir_quantize_schedule(
+            m_tensor, a_tensor, input_crouton_layout, output_layout
+        )
+
+        sch = tir_schedule.mod
+
+        input_axis_separator = [4]
+        output_axis_separator = [4]
+
+        with tvm.transform.PassContext(opt_level=3):
+            func = tvm.build(
+                sch,
+                [a_tensor, m_tensor],
+                get_hexagon_target("v69"),
+                name="quantize",
+            )
+
+        a_data_nd = allocate_hexagon_array(
+            hexagon_session.device,
+            data=transformed_input_np,
+            dtype=input_dtype,
+            axis_separators=input_axis_separator,
+            mem_scope="global.vtcm",
+        )
+
+        m_data_nd = allocate_hexagon_array(
+            hexagon_session.device,
+            tensor_shape=transformed_expected_output_np.shape,
+            dtype=output_dtype,
+            axis_separators=output_axis_separator,
+            mem_scope="global.vtcm",
+        )
+
+        mod = hexagon_session.load_module(func)
+        mod(a_data_nd, m_data_nd)
+
+        b, h, weight, c = expected_output_np.shape
+
+        # convert nd to np and reshape to fixed chunk size layout
+        m_data_np = m_data_nd.numpy().reshape([b, h // 8, weight // 8, c // 32, 8, 8, 32])
+
+        np.testing.assert_allclose(transformed_expected_output_np, m_data_np, atol=1)
+
+
+if __name__ == "__main__":
+    tvm.testing.main()
diff --git a/tests/python/contrib/test_hexagon/topi/test_resize2d.py b/tests/python/contrib/test_hexagon/topi/test_resize2d.py
index c0c6e7ca0fb43..7d7d80fc7f7cc 100644
--- a/tests/python/contrib/test_hexagon/topi/test_resize2d.py
+++ b/tests/python/contrib/test_hexagon/topi/test_resize2d.py
@@ -1,202 +1,202 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Resize 2D tesst.
-"""
-import numpy as np
-
-import tvm
-from tvm import te
-from tvm.topi.testing import resize2d_python
-import tvm.topi.hexagon as s1
-from tvm.contrib.hexagon import allocate_hexagon_array
-
-from ..infrastructure import transform_numpy, get_hexagon_target
-
-
-class TestResize2d:
-    """Test resize 2D class."""
-
-    (batch, channel, in_height, in_width, out_height, out_width,) = tvm.testing.parameters(
-        (
-            1,
-            32,
-            8,
-            8,
-            16,
-            16,
-        ),
-        (
-            1,
-            32,
-            48,
-            48,
-            8,
-            8,
-        ),
-    )
-
-    (layout, input_crouton_layout, output_layout, dtype,) = tvm.testing.parameters(
-        ("NHWC", "nhwc-8h2w32c2w-2d", "nhwc-8h2w32c2w-2d", "float16"),
-        ("NHWC", "nhwc-8h8w32c-2d", "nhwc-8h8w32c-2d", "uint8"),
-    )
-
-    coord_trans = tvm.testing.parameter("asymmetric", "align_corners", "half_pixel")
-    method = tvm.testing.parameter("nearest_neighbor", "linear")
-
-    @tvm.testing.fixture
-    def expected_output_np(
-        self,
-        input_np,
-        in_height,
-        in_width,
-        out_height,
-        out_width,
-        layout,
-        method,
-        coord_trans,
-    ):
-        """Generate expected output."""
-        scale_h = out_height / in_height
-        scale_w = out_width / in_width
-
-        return resize2d_python(input_np, (scale_h, scale_w), layout, method, coord_trans)
-
-    @tvm.testing.fixture
-    def input_np(self, input_shape, dtype):
-        if dtype == "float16":
-            return np.random.random(input_shape).astype(dtype)
-        if dtype == "uint8":
-            return np.random.randint(0, 255, input_shape).astype(dtype)
-        if dtype == "int8":
-            return np.random.randint(-128, 127, input_shape).astype(dtype)
-        raise RuntimeError(f"dtype {dtype} is not valid.")
-
-    @tvm.testing.fixture
-    def transformed_input_np(self, input_np, layout, input_crouton_layout, dtype):
-        if dtype in ["float16", "uint8", "int8"]:
-            return transform_numpy(input_np, layout.lower(), input_crouton_layout)
-
-        raise RuntimeError(f"Unsupported data type '{dtype}'")
-
-    @tvm.testing.fixture
-    def transformed_expected_output_np(self, expected_output_np, layout, output_layout, dtype):
-        if dtype in ["float16", "uint8", "int8"]:
-            return transform_numpy(expected_output_np, layout.lower(), output_layout)
-
-        raise RuntimeError(f"Unsupported data type '{dtype}'")
-
-    @tvm.testing.fixture
-    def input_shape(self, batch, channel, in_height, in_width):
-        return (batch, in_height, in_width, channel)
-
-    @tvm.testing.fixture
-    def output_shape(self, batch, channel, out_height, out_width):
-        return (batch, out_height, out_width, channel)
-
-    @tvm.testing.requires_hexagon
-    def test_resize2d(
-        self,
-        dtype,
-        input_np,
-        transformed_input_np,
-        input_shape,
-        output_shape,
-        expected_output_np,
-        transformed_expected_output_np,
-        layout,
-        input_crouton_layout,
-        output_layout,
-        coord_trans,
-        method,
-        hexagon_session,
-    ):
-        """Test resize 2D."""
-        a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype)
-
-        m_tensor = s1.resize2d_compute(
-            a_tensor,
-            [0.0] * 4,
-            (output_shape[1], output_shape[2]),
-            layout=layout,
-            coordinate_transformation_mode=coord_trans,
-            method=method,
-            out_dtype=dtype,
-        )
-
-        tir_schedule = s1.tir_resize2d_schedule(
-            m_tensor, a_tensor, input_crouton_layout, output_layout
-        )
-
-        sch = tir_schedule.mod
-
-        input_axis_separator = [4]
-        if output_layout in (
-            "nhwc-8h2w32c2w-2d",
-            "nhwc-8h8w32c-2d",
-        ):
-            output_axis_separator = [4]
-        else:
-            raise RuntimeError(f"Unexpected layout '{output_layout}'")
-
-        with tvm.transform.PassContext(opt_level=3):
-            func = tvm.build(
-                sch,
-                [a_tensor, m_tensor],
-                get_hexagon_target("v69"),
-                name="resize2d",
-            )
-
-        a_data_nd = allocate_hexagon_array(
-            hexagon_session.device,
-            data=transformed_input_np,
-            dtype=dtype,
-            axis_separators=input_axis_separator,
-            mem_scope="global.vtcm",
-        )
-
-        m_data_nd = allocate_hexagon_array(
-            hexagon_session.device,
-            transformed_expected_output_np.shape,
-            dtype=dtype,
-            axis_separators=output_axis_separator,
-            mem_scope="global.vtcm",
-        )
-
-        mod = hexagon_session.load_module(func)
-        mod(a_data_nd, m_data_nd)
-
-        batch_size, height, width, channel = output_shape
-        # convert nd to np and reshape to fixed chunk size layout
-        if output_layout == "nhwc-8h2w32c2w-2d":
-            m_data_np = m_data_nd.numpy().reshape(
-                [batch_size, height // 8, width // 4, channel // 32, 8, 2, 32, 2]
-            )
-        elif output_layout == "nhwc-8h8w32c-2d":
-            m_data_np = m_data_nd.numpy().reshape(
-                [batch_size, height // 8, width // 8, channel // 32, 8, 8, 32]
-            )
-
-        if dtype == "float16":
-            np.testing.assert_allclose(
-                transformed_expected_output_np, m_data_np, rtol=1e-3, atol=1e-3
-            )
-        elif dtype in ["int8", "uint8"]:
-            np.testing.assert_allclose(transformed_expected_output_np, m_data_np, rtol=1, atol=1)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Resize 2D tesst.
+"""
+import numpy as np
+
+import tvm
+from tvm import te
+from tvm.topi.testing import resize2d_python
+import tvm.topi.hexagon as s1
+from tvm.contrib.hexagon import allocate_hexagon_array
+
+from ..infrastructure import transform_numpy, get_hexagon_target
+
+
+class TestResize2d:
+    """Test resize 2D class."""
+
+    (batch, channel, in_height, in_width, out_height, out_width,) = tvm.testing.parameters(
+        (
+            1,
+            32,
+            8,
+            8,
+            16,
+            16,
+        ),
+        (
+            1,
+            32,
+            48,
+            48,
+            8,
+            8,
+        ),
+    )
+
+    (layout, input_crouton_layout, output_layout, dtype,) = tvm.testing.parameters(
+        ("NHWC", "nhwc-8h2w32c2w-2d", "nhwc-8h2w32c2w-2d", "float16"),
+        ("NHWC", "nhwc-8h8w32c-2d", "nhwc-8h8w32c-2d", "uint8"),
+    )
+
+    coord_trans = tvm.testing.parameter("asymmetric", "align_corners", "half_pixel")
+    method = tvm.testing.parameter("nearest_neighbor", "linear")
+
+    @tvm.testing.fixture
+    def expected_output_np(
+        self,
+        input_np,
+        in_height,
+        in_width,
+        out_height,
+        out_width,
+        layout,
+        method,
+        coord_trans,
+    ):
+        """Generate expected output."""
+        scale_h = out_height / in_height
+        scale_w = out_width / in_width
+
+        return resize2d_python(input_np, (scale_h, scale_w), layout, method, coord_trans)
+
+    @tvm.testing.fixture
+    def input_np(self, input_shape, dtype):
+        if dtype == "float16":
+            return np.random.random(input_shape).astype(dtype)
+        if dtype == "uint8":
+            return np.random.randint(0, 255, input_shape).astype(dtype)
+        if dtype == "int8":
+            return np.random.randint(-128, 127, input_shape).astype(dtype)
+        raise RuntimeError(f"dtype {dtype} is not valid.")
+
+    @tvm.testing.fixture
+    def transformed_input_np(self, input_np, layout, input_crouton_layout, dtype):
+        if dtype in ["float16", "uint8", "int8"]:
+            return transform_numpy(input_np, layout.lower(), input_crouton_layout)
+
+        raise RuntimeError(f"Unsupported data type '{dtype}'")
+
+    @tvm.testing.fixture
+    def transformed_expected_output_np(self, expected_output_np, layout, output_layout, dtype):
+        if dtype in ["float16", "uint8", "int8"]:
+            return transform_numpy(expected_output_np, layout.lower(), output_layout)
+
+        raise RuntimeError(f"Unsupported data type '{dtype}'")
+
+    @tvm.testing.fixture
+    def input_shape(self, batch, channel, in_height, in_width):
+        return (batch, in_height, in_width, channel)
+
+    @tvm.testing.fixture
+    def output_shape(self, batch, channel, out_height, out_width):
+        return (batch, out_height, out_width, channel)
+
+    @tvm.testing.requires_hexagon
+    def test_resize2d(
+        self,
+        dtype,
+        input_np,
+        transformed_input_np,
+        input_shape,
+        output_shape,
+        expected_output_np,
+        transformed_expected_output_np,
+        layout,
+        input_crouton_layout,
+        output_layout,
+        coord_trans,
+        method,
+        hexagon_session,
+    ):
+        """Test resize 2D."""
+        a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype)
+
+        m_tensor = s1.resize2d_compute(
+            a_tensor,
+            [0.0] * 4,
+            (output_shape[1], output_shape[2]),
+            layout=layout,
+            coordinate_transformation_mode=coord_trans,
+            method=method,
+            out_dtype=dtype,
+        )
+
+        tir_schedule = s1.tir_resize2d_schedule(
+            m_tensor, a_tensor, input_crouton_layout, output_layout
+        )
+
+        sch = tir_schedule.mod
+
+        input_axis_separator = [4]
+        if output_layout in (
+            "nhwc-8h2w32c2w-2d",
+            "nhwc-8h8w32c-2d",
+        ):
+            output_axis_separator = [4]
+        else:
+            raise RuntimeError(f"Unexpected layout '{output_layout}'")
+
+        with tvm.transform.PassContext(opt_level=3):
+            func = tvm.build(
+                sch,
+                [a_tensor, m_tensor],
+                get_hexagon_target("v69"),
+                name="resize2d",
+            )
+
+        a_data_nd = allocate_hexagon_array(
+            hexagon_session.device,
+            data=transformed_input_np,
+            dtype=dtype,
+            axis_separators=input_axis_separator,
+            mem_scope="global.vtcm",
+        )
+
+        m_data_nd = allocate_hexagon_array(
+            hexagon_session.device,
+            transformed_expected_output_np.shape,
+            dtype=dtype,
+            axis_separators=output_axis_separator,
+            mem_scope="global.vtcm",
+        )
+
+        mod = hexagon_session.load_module(func)
+        mod(a_data_nd, m_data_nd)
+
+        batch_size, height, width, channel = output_shape
+        # convert nd to np and reshape to fixed chunk size layout
+        if output_layout == "nhwc-8h2w32c2w-2d":
+            m_data_np = m_data_nd.numpy().reshape(
+                [batch_size, height // 8, width // 4, channel // 32, 8, 2, 32, 2]
+            )
+        elif output_layout == "nhwc-8h8w32c-2d":
+            m_data_np = m_data_nd.numpy().reshape(
+                [batch_size, height // 8, width // 8, channel // 32, 8, 8, 32]
+            )
+
+        if dtype == "float16":
+            np.testing.assert_allclose(
+                transformed_expected_output_np, m_data_np, rtol=1e-3, atol=1e-3
+            )
+        elif dtype in ["int8", "uint8"]:
+            np.testing.assert_allclose(transformed_expected_output_np, m_data_np, rtol=1, atol=1)
+
+
+if __name__ == "__main__":
+    tvm.testing.main()
diff --git a/tests/python/frontend/darknet/test_forward.py b/tests/python/frontend/darknet/test_forward.py
index 58695e1fd63f8..e78e35ff5c7c1 100644
--- a/tests/python/frontend/darknet/test_forward.py
+++ b/tests/python/frontend/darknet/test_forward.py
@@ -63,9 +63,9 @@ def astext(program, unify_free_vars=False):
     """check that program is parsable in text format"""
     text = program.astext()
     if isinstance(program, relay.Expr):
-        roundtrip_program = tvm.parser.parse_expr(text)
+        roundtrip_program = tvm.relay.parse_expr(text)
     else:
-        roundtrip_program = tvm.parser.fromtext(text)
+        roundtrip_program = tvm.relay.fromtext(text)
 
     tvm.ir.assert_structural_equal(roundtrip_program, program, map_free_vars=True)
 
diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py
index 2fb7c74f60a1e..5459810fee643 100755
--- a/tests/python/frontend/tensorflow/test_forward.py
+++ b/tests/python/frontend/tensorflow/test_forward.py
@@ -5772,7 +5772,7 @@ def @main(%A: Tensor[(4, 176, 8, 8), float32]) {
         divide(%528, %533) /* truediv */
     }
     """
-    mod_golden = tvm.parser.parse('#[version = "0.0.5"]\n' + program)
+    mod_golden = tvm.relay.parse('#[version = "0.0.5"]\n' + program)
     tvm.ir.assert_structural_equal(mod["main"].body, mod_golden["main"].body, map_free_vars=True)
 
 
diff --git a/tests/python/integration/test_legacy_tuning.py b/tests/python/integration/test_legacy_tuning.py
index 04c5f85ce5d4c..5dc6aa2106a8f 100644
--- a/tests/python/integration/test_legacy_tuning.py
+++ b/tests/python/integration/test_legacy_tuning.py
@@ -340,7 +340,7 @@ def test_tuning_cpu():
     """Test tuning on cpu."""
 
     def runner():
-        ir_mod = tvm.parser.fromtext(
+        ir_mod = tvm.relay.fromtext(
             textwrap.dedent(
                 """
             #[version = "0.0.5"]
diff --git a/tests/python/relay/aot/test_aot_create_executor_metadata.py b/tests/python/relay/aot/test_aot_create_executor_metadata.py
index 0ef4449541f88..1bc79fe2a607a 100644
--- a/tests/python/relay/aot/test_aot_create_executor_metadata.py
+++ b/tests/python/relay/aot/test_aot_create_executor_metadata.py
@@ -53,7 +53,7 @@ def test_create_executor_metadata_single_func():
     class Module:
         @T.prim_func
         def __tvm_main__(
-            a: T.handle, output: T.handle, workspace: T.Ptr[T.uint8], constants: T.Ptr[T.uint8]
+            a: T.handle, output: T.handle, workspace: T.Ptr(T.uint8), constants: T.Ptr(T.uint8)
         ) -> None:
             # function attr dict
             T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind": "llvm", "tag": "", "keys": ["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": ["test_device"]})
diff --git a/tests/python/relay/aot/test_cpp_aot.py b/tests/python/relay/aot/test_cpp_aot.py
index 89c34eaac8b6e..0c5931a55d313 100644
--- a/tests/python/relay/aot/test_cpp_aot.py
+++ b/tests/python/relay/aot/test_cpp_aot.py
@@ -81,7 +81,7 @@ def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5),
         }
     """
     )
-    ir_mod = tvm.parser.fromtext(relay_model)
+    ir_mod = tvm.relay.fromtext(relay_model)
 
     main_func = ir_mod["main"]
     shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
index 2e7e23ead65f9..d99d6173bc5f3 100644
--- a/tests/python/relay/aot/test_crt_aot.py
+++ b/tests/python/relay/aot/test_crt_aot.py
@@ -773,7 +773,7 @@ def test_aot_codegen_backend_alloc_workspace_calls():
     # small tensors that would get lowered to stack allocations in the CPU PrimFuncs.
     # However, the AoT executor codegen should retain them as TVMBAW calls
     # pylint: disable=line-too-long
-    relay_mod = tvm.parser.fromtext(
+    relay_mod = tvm.relay.fromtext(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(1, 4, 4, 4), float32], %weight: Tensor[(4, 4, 3, 3), float32], src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 4, 4, 4), float32] {
diff --git a/tests/python/relay/aot/test_crt_aot_usmp.py b/tests/python/relay/aot/test_crt_aot_usmp.py
index 75613d81e1453..12c60a7266517 100644
--- a/tests/python/relay/aot/test_crt_aot_usmp.py
+++ b/tests/python/relay/aot/test_crt_aot_usmp.py
@@ -456,7 +456,7 @@ def test_tflite_model_u3_usecase_single_external_pool(model_url, usmp_algo):
 def test_tflite_model_u3_usecase_conv2d_var_cons(usmp_algo):
     """This checks for inference using workspace and constant pools placed in the application"""
 
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """\
         #[version = "0.0.5"]
         def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) {
diff --git a/tests/python/relay/aot/test_pass_aot_lower_main.py b/tests/python/relay/aot/test_pass_aot_lower_main.py
index b523e019299c5..0aeca5b2acf1e 100644
--- a/tests/python/relay/aot/test_pass_aot_lower_main.py
+++ b/tests/python/relay/aot/test_pass_aot_lower_main.py
@@ -52,7 +52,7 @@ def _assert_lowered_main(mod, main_func, call_type, print_script=False):
 
 
 def test_single_call_cpacked():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
@@ -79,7 +79,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_single_call_packed():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
@@ -106,7 +106,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_single_call_unpacked():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
@@ -133,7 +133,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_constant():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) { %x }
@@ -164,7 +164,7 @@ def func(a: T.handle, output: T.handle) -> None:
 # TODO(@mbaret) There seems to be a TVMScript round-trip bug causing this to fail
 @pytest.mark.xfail()
 def test_copy_to_output():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
@@ -184,7 +184,7 @@ def func(a: T.handle, output: T.handle) -> None:
         a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
         output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
         # body
-        tmp_write: T.Ptr[T.uint8] = output_buffer.data
+        tmp_write: T.Ptr(T.uint8) = output_buffer.data
         tmp_write_1 = T.Buffer([T.uint64(140)], dtype="uint8", data=tmp_write)
         for i in T.serial(140):
             tmp_write_1[i] = T.let(tmp_read, a_buffer.data, tmp_read_1[i])
@@ -194,7 +194,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_two_calls():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
@@ -225,7 +225,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_tuple_output():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) { (%x, %x) }
@@ -253,7 +253,7 @@ def func(a: T.handle, output0: T.handle, output1: T.handle) -> None:
 
 
 def test_tuple_intermediate():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add_0(%x: Tensor[(5, 7), float32]) -> (Tensor[(5, 7), float32], Tensor[(5, 7), float32]) { (%x, %x) }
@@ -286,7 +286,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_multi_input():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) { %x }
@@ -314,7 +314,7 @@ def func(a: T.handle, b: T.handle, output: T.handle) -> None:
 
 
 def test_let_binding():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { %x }
@@ -342,7 +342,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_let_binding_branch():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add_0(%x: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { %x }
@@ -383,7 +383,7 @@ def func(a: T.handle, output: T.handle) -> None:
 
 
 def test_device_hooks():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
 #[version = "0.0.5"]
 def @test_fused_add(%x: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { %x }
diff --git a/tests/python/relay/backend/test_pass_lower_te.py b/tests/python/relay/backend/test_pass_lower_te.py
index d439f22b12465..89bd62fe5aa8e 100644
--- a/tests/python/relay/backend/test_pass_lower_te.py
+++ b/tests/python/relay/backend/test_pass_lower_te.py
@@ -50,7 +50,7 @@ def transform(mod):
 
 
 def test_lower_primitive():
-    input_mod = tvm.parser.parse(
+    input_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
@@ -95,7 +95,7 @@ def test_lower_compiler():
     def relay_ext_test_pass_lower_te(func):
         return None
 
-    input_mod = tvm.parser.parse(
+    input_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
@@ -140,7 +140,7 @@ def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
 
 
 def test_lower_extern():
-    input_mod = tvm.parser.parse(
+    input_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
@@ -183,7 +183,7 @@ def @my_add(%x : Tensor[(5, 7), float32], %y : Tensor[(5, 7), float32], Extern=1
 
 
 def test_lower_extern_with_dynamic_shape():
-    input_mod = tvm.parser.parse(
+    input_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(?, ?), float32] {
diff --git a/tests/python/relay/backend/test_pass_remove_standalone_reshapes.py b/tests/python/relay/backend/test_pass_remove_standalone_reshapes.py
index 2113ae7b5c726..8b1b10d68e168 100644
--- a/tests/python/relay/backend/test_pass_remove_standalone_reshapes.py
+++ b/tests/python/relay/backend/test_pass_remove_standalone_reshapes.py
@@ -75,7 +75,7 @@ def reshape_primfunc(a: T.handle, d: T.handle) -> None:
 
     reshape_gv = relay.GlobalVar("reshape", type_annot=reshape_ty)
     mod[reshape_gv] = reshape_primfunc
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(128, 128), float32],
@@ -141,7 +141,7 @@ def reshape_primfunc(a: T.handle, d: T.handle) -> None:
 
     reshape_gv = relay.GlobalVar("reshape", type_annot=reshape_ty)
     mod[reshape_gv] = reshape_primfunc
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(128, 128), float32],
@@ -221,7 +221,7 @@ def fused_reshape_primfunc(a: T.handle, d: T.handle) -> None:
 
     reshape_gv = relay.GlobalVar("fused_reshape", type_annot=reshape_ty)
     mod[reshape_gv] = fused_reshape_primfunc
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(128, 128), float32],
diff --git a/tests/python/relay/collage/menangerie.py b/tests/python/relay/collage/menangerie.py
index d5275fbd34c57..e74059282e3e6 100644
--- a/tests/python/relay/collage/menangerie.py
+++ b/tests/python/relay/collage/menangerie.py
@@ -86,7 +86,7 @@ def mnist_consts(dtype):
 
 def mnist():
     metatable = {"relay.Constant": mnist_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1, 1, 28, 28), float32]) -> Tensor[(1, 10), float32] {
@@ -301,7 +301,7 @@ def gpt2_consts(dtype):
 
 def gpt2():
     metatable = {"relay.Constant": gpt2_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1, 50, 32), int64]) -> (Tensor[(1, 50, 32, 768), float32],
@@ -1252,7 +1252,7 @@ def @main(%x: Tensor[(1, 50, 32), int64]) -> (Tensor[(1, 50, 32, 768), float32],
 
 def gpt2_16():
     metatable = {"relay.Constant": gpt2_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1, 50, 32), int64]) -> (Tensor[(1, 50, 32, 768), float16],
@@ -2218,7 +2218,7 @@ def gpt2_extract_consts(dtype):
 
 def gpt2_extract():
     metatable = {"relay.Constant": gpt2_extract_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1600, 768), float32]) -> Tensor[(50, 32, 3072), float32] {
@@ -2265,7 +2265,7 @@ def @main(%x: Tensor[(1600, 768), float32]) -> Tensor[(50, 32, 3072), float32] {
 
 def gpt2_extract_16():
     metatable = {"relay.Constant": gpt2_extract_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1600, 768), float16]) -> Tensor[(50, 32, 3072), float16] {
@@ -2325,7 +2325,7 @@ def gpt2_16_for_cutlass_extract_consts(dtype):
 
 def gpt2_16_for_cutlass_extract():
     metatable = {"relay.Constant": gpt2_16_for_cutlass_extract_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0: Tensor[(1600, 768), float16],
@@ -2622,7 +2622,7 @@ def resnet50_consts(dtype):
 
 def resnet50():
     metatable = {"relay.Constant": resnet50_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 1000), float32] {
@@ -2870,7 +2870,7 @@ def @main(%data: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 1000), float32
 
 def resnet50_16():
     metatable = {"relay.Constant": resnet50_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(1, 3, 224, 224), float16]) -> Tensor[(1, 1000), float16] {
@@ -3392,7 +3392,7 @@ def mobilenet_consts(dtype):
 
 def mobilenet():
     metatable = {"relay.Constant": mobilenet_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 1000), float32] {
@@ -3622,7 +3622,7 @@ def @main(%data: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 1000), float32
 
 def mobilenet_16():
     metatable = {"relay.Constant": mobilenet_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(1, 3, 224, 224), float16]) -> Tensor[(1, 1000), float16] {
@@ -3861,7 +3861,7 @@ def batch_norm_extract():
         ],
     )
     metatable = {"relay.Constant": consts}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%FunctionVar_0: Tensor[(1, 32, 112, 112), float32]) -> Tensor[(1, 32, 112, 112), float32] {
@@ -3945,7 +3945,7 @@ def resnext50_32x4d_consts(dtype):
 
 def resnext50_32x4d():
     metatable = {"relay.Constant": resnext50_32x4d_consts("float32")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1, 64, 56, 56), float32]) {
@@ -4083,7 +4083,7 @@ def @main(%x: Tensor[(1, 64, 56, 56), float32]) {
 
 def resnext50_32x4d_16():
     metatable = {"relay.Constant": resnext50_32x4d_consts("float16")}
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(1, 64, 56, 56), float16]) {
diff --git a/tests/python/relay/collage/test_sub_graph.py b/tests/python/relay/collage/test_sub_graph.py
index 21f12c43dccd9..785bdf750169f 100644
--- a/tests/python/relay/collage/test_sub_graph.py
+++ b/tests/python/relay/collage/test_sub_graph.py
@@ -53,7 +53,7 @@ def run(in_mod, expected_mod, max_outputs, allow_taps, compiler, map):
 
 def test_single_op():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -66,7 +66,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -83,7 +83,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
 
 def test_multi_output():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -96,7 +96,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -117,7 +117,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
 
 def test_classic_conv2d_add_relu():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32],
@@ -131,7 +131,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32],
@@ -151,7 +151,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
 
 def test_diamond_single_output():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -165,7 +165,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -185,7 +185,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
 
 def test_diamond_multi_output():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -199,7 +199,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -222,7 +222,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
 
 def test_with_tap():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -234,7 +234,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]) {
@@ -258,7 +258,7 @@ def @main(%a: Tensor[(5, 3, 32, 32), float32], %b: Tensor[(2, 3, 5, 5), float32]
 
 def test_no_cycles():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
@@ -270,7 +270,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
@@ -291,7 +291,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
 
 def test_labels_direct_connection():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32]) {
@@ -309,7 +309,7 @@ def @main(%a: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32]) {
@@ -337,7 +337,7 @@ def @main(%a: Tensor[(5, 7), float32]) {
 
 def test_labels_nested_tap():
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32]) {
@@ -355,7 +355,7 @@ def @main(%a: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32]) {
diff --git a/tests/python/relay/dyn/test_dynamic_op_level3.py b/tests/python/relay/dyn/test_dynamic_op_level3.py
index ab562f0f49f50..afc42c778a724 100644
--- a/tests/python/relay/dyn/test_dynamic_op_level3.py
+++ b/tests/python/relay/dyn/test_dynamic_op_level3.py
@@ -428,7 +428,7 @@ def verify_sparse_fill_empty_rows(
 def test_dyn_copy():
     target = tvm.target.Target("llvm")
     dev = tvm.cpu()
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(?, 3), int64]) -> Tensor[(?, 3), int64] {
@@ -444,7 +444,7 @@ def @main(%x: Tensor[(?, 3), int64]) -> Tensor[(?, 3), int64] {
 def test_dyn_copy_scalar():
     target = tvm.target.Target("llvm")
     dev = tvm.cpu()
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """
         #[version = "0.0.5"]
         def @main(%x: int32, %y: Tensor[(?), int32]) -> Tensor[(?), int32] {
@@ -464,7 +464,7 @@ def @main(%x: int32, %y: Tensor[(?), int32]) -> Tensor[(?), int32] {
 def test_dyn_cast():
     target = tvm.target.Target("llvm")
     dev = tvm.cpu()
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """
         #[version = "0.0.5"]
         def @main(%x: Tensor[(?, 3), int64]) -> Tensor[(?, 3), int32] {
diff --git a/tests/python/relay/test_backend_graph_executor.py b/tests/python/relay/test_backend_graph_executor.py
index 179077e8742d4..fc6ec59a6d51c 100644
--- a/tests/python/relay/test_backend_graph_executor.py
+++ b/tests/python/relay/test_backend_graph_executor.py
@@ -196,7 +196,7 @@ def test_plan_2d_memory():
         ]
     }
 
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data1: Tensor[(1, 32, 40, 40), float32],
diff --git a/tests/python/relay/test_dataflow_pattern.py b/tests/python/relay/test_dataflow_pattern.py
index a11673bf69307..1bd05f5258b1e 100644
--- a/tests/python/relay/test_dataflow_pattern.py
+++ b/tests/python/relay/test_dataflow_pattern.py
@@ -1826,7 +1826,7 @@ def test_matched_outside_but_dominated():
     """In this example the pattern matches the nn.conv2d/add/multiply flow. Even though the
     add output is consumed by the sigmoid, the sigmoid itself is dominated by the multiply.
     So partitioning can proceed, all be it with a duplication of the add."""
-    in_mod = tvm.parser.parse(
+    in_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(16, 16, 32, 32), float16], %weight: Tensor[(32, 16, 3, 3), float16], %bias: Tensor[(32), float32]) -> Tensor[(16, 32, 32, 32), float32] {
@@ -1843,7 +1843,7 @@ def @main(%data: Tensor[(16, 16, 32, 32), float16], %weight: Tensor[(32, 16, 3,
         }
         """
     )
-    expected_mod = tvm.parser.parse(
+    expected_mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data: Tensor[(16, 16, 32, 32), float16], %weight: Tensor[(32, 16, 3, 3), float16], %bias: Tensor[(32), float32]) -> Tensor[(16, 32, 32, 32), float32] {
diff --git a/tests/python/relay/test_ir_parser.py b/tests/python/relay/test_ir_parser.py
index 08fa01f0b39bf..7e8f8c54f486b 100644
--- a/tests/python/relay/test_ir_parser.py
+++ b/tests/python/relay/test_ir_parser.py
@@ -75,18 +75,18 @@ def graph_equal(lhs, rhs):
 
 def roundtrip_expr(expr):
     text = expr.astext()
-    x = tvm.parser.parse_expr(text)
+    x = tvm.relay.parse_expr(text)
     assert_graph_equal(x, expr)
 
 
 # Testing Utilities for expressions.
 def roundtrip(expr):
-    x = tvm.parser.fromtext(expr.astext())
+    x = tvm.relay.fromtext(expr.astext())
     assert_graph_equal(x, expr)
 
 
 def parse_text(code):
-    expr = tvm.parser.parse_expr(code)
+    expr = tvm.relay.parse_expr(code)
     roundtrip_expr(expr)
     return expr
 
@@ -100,7 +100,7 @@ def parses_as(code, expr):
 
 # Testing Utilities for full modules.
 def parse_module(code):
-    mod = tvm.parser.parse(SEMVER + code)
+    mod = tvm.relay.parse(SEMVER + code)
     roundtrip(mod)
     return mod
 
@@ -423,7 +423,7 @@ def @main(%x: float32) {
         ref_read(%0)
     }
     """
-    tvm.parser.parse(program)
+    tvm.relay.parse(program)
 
 
 def test_call():
@@ -868,7 +868,7 @@ def test_import_grad():
 def test_mlp():
     mod, _ = relay.testing.mlp.get_workload(1)
     text = mod.astext()
-    parsed_mod = tvm.parser.parse(text)
+    parsed_mod = tvm.relay.parse(text)
     tvm.ir.assert_structural_equal(mod, parsed_mod)
 
 
@@ -893,7 +893,7 @@ def test_mlp_inlined_params():
     mod = inline_params(mod, params)
     mod = relay.transform.InferType()(mod)
     text = mod.astext()
-    parsed_mod = tvm.parser.parse(text)
+    parsed_mod = tvm.relay.parse(text)
     tvm.ir.assert_structural_equal(mod, parsed_mod)
 
 
@@ -945,7 +945,7 @@ def test_op_string_attr():
 def test_load_prelude():
     mod = tvm.IRModule()
     mod.import_from_std("prelude.rly")
-    tvm.parser.parse(mod.astext())
+    tvm.relay.parse(mod.astext())
 
 
 def test_call_attrs():
@@ -1006,7 +1006,7 @@ def test_func_attrs():
 
 def test_init_module_and_metatable():
     init_metatable = {"relay.Constant": [relay.const(np.random.rand(2, 3), dtype="float32")]}
-    init_module = tvm.parser.fromtext(
+    init_module = tvm.relay.fromtext(
         SEMVER
         + """
             def @f(%y : Tensor[(2, 3), float32]) -> Tensor[(2, 3), float32] {
@@ -1014,7 +1014,7 @@ def @f(%y : Tensor[(2, 3), float32]) -> Tensor[(2, 3), float32] {
             }
         """,
     )
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         SEMVER
         + """
             def @main(%x: Tensor[(2, 3), float32]) {
diff --git a/tests/python/relay/test_ir_text_printer.py b/tests/python/relay/test_ir_text_printer.py
index ba3b2b348acc5..b1599c1b919fd 100644
--- a/tests/python/relay/test_ir_text_printer.py
+++ b/tests/python/relay/test_ir_text_printer.py
@@ -33,9 +33,9 @@ def astext(program, unify_free_vars=False):
     text = program.astext()
 
     if isinstance(program, Expr):
-        roundtrip_program = tvm.parser.parse_expr(text)
+        roundtrip_program = tvm.relay.parse_expr(text)
     else:
-        roundtrip_program = tvm.parser.fromtext(text)
+        roundtrip_program = tvm.relay.fromtext(text)
 
     tvm.ir.assert_structural_equal(roundtrip_program, program, map_free_vars=True)
 
@@ -254,7 +254,7 @@ def @main[A]() -> fn (A, List[A]) -> List[A] {
   Cons
 }
     """
-    mod = tvm.parser.parse(SEMVER + type_def_str + main_def_str)
+    mod = tvm.relay.parse(SEMVER + type_def_str + main_def_str)
     mod_str = str(mod)
     # ensure constructors are printed correctly in type definitions (with their
     # signature) and as exprs (without their signature)
diff --git a/tests/python/relay/test_op_grad_level2.py b/tests/python/relay/test_op_grad_level2.py
index 32e7d2ca3867e..bbd851dc9c7f4 100644
--- a/tests/python/relay/test_op_grad_level2.py
+++ b/tests/python/relay/test_op_grad_level2.py
@@ -353,7 +353,7 @@ def test_conv2d_backward_weight_infer_type():
     SEMVER = '#[version = "0.0.5"]\n'
 
     for code in [normal_conv_code, depthwise_conv_code]:
-        expr = tvm.parser.parse_expr(SEMVER + code)
+        expr = tvm.relay.parse_expr(SEMVER + code)
         fmod = tvm.IRModule.from_expr(expr)
 
         mod = relay.transform.InferType()(fmod)
diff --git a/tests/python/relay/test_op_level1.py b/tests/python/relay/test_op_level1.py
index 0549f4f2fbccf..e7def019239b9 100644
--- a/tests/python/relay/test_op_level1.py
+++ b/tests/python/relay/test_op_level1.py
@@ -901,7 +901,7 @@ def test_extern_concat_injective_fuse():
     # do not have their elem_offset explicitly set as a variable.
 
     # fmt: off
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """
        #[version = "0.0.5"]
        def @main(%p0844: Tensor[(1, 384), int64], %p1652: Tensor[(2016, 128), float16]) {
diff --git a/tests/python/relay/test_op_level10.py b/tests/python/relay/test_op_level10.py
index ed044989ac184..9db1bcf78b2a3 100644
--- a/tests/python/relay/test_op_level10.py
+++ b/tests/python/relay/test_op_level10.py
@@ -73,7 +73,7 @@ def test_checkpoint_alpha_equal():
         mod = tvm.transform.Sequential(passes)(tvm.IRModule.from_expr(df))
         df = mod["main"]
 
-    df_parsed = tvm.parser.parse_expr(
+    df_parsed = tvm.relay.parse_expr(
         """
         #[version = "0.0.5"]
         fn (%x: Tensor[(1), float32], %y: Tensor[(1), float32],
@@ -137,7 +137,7 @@ def test_checkpoint_alpha_equal_tuple():
         mod = tvm.transform.Sequential(passes)(tvm.IRModule.from_expr(df))
         df = mod["main"]
 
-    df_parsed = tvm.parser.parse_expr(
+    df_parsed = tvm.relay.parse_expr(
         """
         #[version = "0.0.5"]
         fn (%x: Tensor[(1), float32], %y: Tensor[(1), float32],
diff --git a/tests/python/relay/test_pass_auto_quantize.py b/tests/python/relay/test_pass_auto_quantize.py
index 24cdabd2a6c38..488866ab6ff87 100644
--- a/tests/python/relay/test_pass_auto_quantize.py
+++ b/tests/python/relay/test_pass_auto_quantize.py
@@ -232,7 +232,7 @@ def _eval_mod(mod):
 
 
 def test_add_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
@@ -247,7 +247,7 @@ def @main(
 
 
 def test_conv2d_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
@@ -266,7 +266,7 @@ def @main(
 
 
 def test_multiple_arg_conversions_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
@@ -295,7 +295,7 @@ def @main(
 
 
 def test_unquantizable_prefix_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
@@ -318,7 +318,7 @@ def @main(
 
 
 def test_unquantizable_core_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
@@ -351,7 +351,7 @@ def @main(
 
 
 def test_unquantizable_suffix_partition():
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
     #[version = "0.0.5"]
     def @main(
diff --git a/tests/python/relay/test_pass_collage_partition.py b/tests/python/relay/test_pass_collage_partition.py
index fa7e0a472a499..f40631628ea54 100644
--- a/tests/python/relay/test_pass_collage_partition.py
+++ b/tests/python/relay/test_pass_collage_partition.py
@@ -91,7 +91,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         nn.relu(%x)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -99,7 +99,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
         nn.relu(%x)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -122,7 +122,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         nn.relu(%x)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -137,7 +137,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
         @collage_example_target_hook_nn_relu(%x)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -164,7 +164,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         add(%1, %2)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_3_txt = """
       #[version = "0.0.5"]
@@ -218,7 +218,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
         @collage_example_target_hook(%5)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_1_txt if byoc_max_depth == 1 else expected_3_txt)
+    expected_mod = tvm.relay.fromtext(expected_1_txt if byoc_max_depth == 1 else expected_3_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -246,7 +246,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         nn.relu(%1)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txts = {
         1: """
@@ -294,7 +294,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
           }
         """,
     }
-    expected_mod = tvm.parser.fromtext(expected_txts[tvm_max_depth])
+    expected_mod = tvm.relay.fromtext(expected_txts[tvm_max_depth])
 
     targets = [
         tvm.target.Target("llvm"),
@@ -322,7 +322,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         nn.relu(%1)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txts = {
         1: """
@@ -373,7 +373,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
           }
         """,
     }
-    expected_mod = tvm.parser.fromtext(expected_txts[byoc_max_depth])
+    expected_mod = tvm.relay.fromtext(expected_txts[byoc_max_depth])
 
     targets = [
         tvm.target.Target("llvm"),
@@ -401,7 +401,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         (%0, %1, %2)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -425,7 +425,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> (Tensor[(10, 10), float32], Tensor[(
         (%6, %5, %7)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -451,7 +451,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         concatenate(%2)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -482,7 +482,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(20, 10), float32] {
         @collage_example_target_hook_concatenate(%8)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -511,7 +511,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         abs(%5)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -544,7 +544,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
         abs(%10)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -570,7 +570,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         add(%1, %2)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -598,7 +598,7 @@ def @main(%x: Tensor[(10, 10), float32]) -> Tensor[(10, 10), float32] {
         @collage_example_target_hook_add_add(%5, %4)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
@@ -630,7 +630,7 @@ def @main(%x: Tensor[(10, 10), float32]) {
         add(%1, %2)
       }
     """
-    mod = tvm.parser.fromtext(mod_txt)
+    mod = tvm.relay.fromtext(mod_txt)
 
     expected_txt = """
       #[version = "0.0.5"]
@@ -661,7 +661,7 @@ def @main(%x: Tensor[(10, 10), float32] ) -> Tensor[(10, 10), float32] {
         @collage_example_target_hook_nn_relu_nn_relu_add_add(%x)
       }
     """
-    expected_mod = tvm.parser.fromtext(expected_txt)
+    expected_mod = tvm.relay.fromtext(expected_txt)
 
     targets = [
         tvm.target.Target("llvm"),
diff --git a/tests/python/relay/test_pass_dead_code_elimination.py b/tests/python/relay/test_pass_dead_code_elimination.py
index abd9be99e3d98..68d2919ec38d1 100644
--- a/tests/python/relay/test_pass_dead_code_elimination.py
+++ b/tests/python/relay/test_pass_dead_code_elimination.py
@@ -28,9 +28,9 @@
 
 def optimize_and_check(before_program, after_program, passes):
     if isinstance(before_program, str):
-        before_program = tvm.parser.parse(before_program)
+        before_program = tvm.relay.parse(before_program)
     if isinstance(after_program, str):
-        after_program = tvm.parser.parse(after_program)
+        after_program = tvm.relay.parse(after_program)
     if not isinstance(passes, list):
         passes = [passes]
     optimize = tvm.transform.Sequential(passes)
@@ -229,7 +229,7 @@ def @main() {
 
 def test_impure_op():
     """Don't elide calls to side-effecting operators."""
-    before_program = tvm.parser.parse(
+    before_program = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main() {
@@ -245,7 +245,7 @@ def @main() {
         metatable,
     )
 
-    after_program = tvm.parser.parse(
+    after_program = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main() {
@@ -268,7 +268,7 @@ def @main() {
 
 def test_impure_func():
     """Don't elide calls to side-effecting functions."""
-    before_program = tvm.parser.parse(
+    before_program = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @f() -> int {
@@ -288,7 +288,7 @@ def @main() -> int {
         metatable,
     )
 
-    after_program = tvm.parser.parse(
+    after_program = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @f() -> int {
diff --git a/tests/python/relay/test_pass_defunctionalization.py b/tests/python/relay/test_pass_defunctionalization.py
index a01c1c7e39d7f..96c061bd93b1b 100644
--- a/tests/python/relay/test_pass_defunctionalization.py
+++ b/tests/python/relay/test_pass_defunctionalization.py
@@ -142,7 +142,7 @@ def @main(%l: Tensor[(5, 5), float32]) -> Tensor[(5, 5), float32] {
   @simple(%0, %l)
 }
 """
-    mod = tvm.parser.fromtext(code)
+    mod = tvm.relay.fromtext(code)
     defunc_mod = defunctionalized(mod)
 
     input = np.random.rand(5, 5).astype("float32")
@@ -174,7 +174,7 @@ def @main(%l: List[float32]) -> List[float32] {
   @map(@id, %l)
 }
 """
-    mod = tvm.parser.fromtext(code)
+    mod = tvm.relay.fromtext(code)
     defunc_mod = defunctionalized(mod)
 
     input = np.random.rand(10).astype("float32")
@@ -212,7 +212,7 @@ def @main(%l: List[int32]) -> int32 {
   @sum(@id, %l)
 }
 """
-    mod = tvm.parser.fromtext(code)
+    mod = tvm.relay.fromtext(code)
     defunc_mod = defunctionalized(mod)
 
     input = np.random.randint(1, 100, 10)
diff --git a/tests/python/relay/test_pass_eta_expand.py b/tests/python/relay/test_pass_eta_expand.py
index 62cc27d9c94b9..b1776cb801aa5 100644
--- a/tests/python/relay/test_pass_eta_expand.py
+++ b/tests/python/relay/test_pass_eta_expand.py
@@ -25,7 +25,7 @@
 
 
 def test_eta_expand_global_var():
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         r"""
         #[version = "0.0.5"]
         def @aux(%x: Tensor[(), int32]) -> Tensor[(), int32] {
@@ -39,7 +39,7 @@ def @main() -> fn(Tensor[(), int32]) -> Tensor[(), int32] {
     seq = tvm.transform.Sequential([_transform.EtaExpand(expand_global_var=True)])
     with tvm.transform.PassContext(opt_level=3):
         mod = seq(mod)
-    expected = tvm.parser.fromtext(
+    expected = tvm.relay.fromtext(
         r"""
         #[version = "0.0.5"]
         def @aux(%x: Tensor[(), int32]) -> Tensor[(), int32] {
@@ -56,7 +56,7 @@ def @main() -> fn(Tensor[(), int32]) -> Tensor[(), int32] {
 
 
 def test_eta_expand_constructor():
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         r"""
         #[version = "0.0.5"]
         type List[A] {
@@ -73,7 +73,7 @@ def @main[A]() -> fn(A, List[A]) -> List[A] {
     )
     with tvm.transform.PassContext(opt_level=3):
         mod = seq(mod)
-    expected = tvm.parser.fromtext(
+    expected = tvm.relay.fromtext(
         r"""
         #[version = "0.0.5"]
         type List[A] {
diff --git a/tests/python/relay/test_pass_manifest_lifetimes.py b/tests/python/relay/test_pass_manifest_lifetimes.py
index f5b4cab207087..98e203e697bed 100644
--- a/tests/python/relay/test_pass_manifest_lifetimes.py
+++ b/tests/python/relay/test_pass_manifest_lifetimes.py
@@ -24,9 +24,9 @@
 
 def optimize_and_check(before_program, after_program, passes):
     if isinstance(before_program, str):
-        before_program = tvm.parser.parse(before_program)
+        before_program = tvm.relay.parse(before_program)
     if isinstance(after_program, str):
-        after_program = tvm.parser.parse(after_program)
+        after_program = tvm.relay.parse(after_program)
     if not isinstance(passes, list):
         passes = [passes]
     optimize = tvm.transform.Sequential(passes)
diff --git a/tests/python/relay/test_pass_plan_devices.py b/tests/python/relay/test_pass_plan_devices.py
index 1c48589a51aa2..3ff49389cbdc2 100644
--- a/tests/python/relay/test_pass_plan_devices.py
+++ b/tests/python/relay/test_pass_plan_devices.py
@@ -113,7 +113,7 @@ def test_plain():
 
     # Everything defaults to GPU
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -129,7 +129,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32],
@@ -156,7 +156,7 @@ def test_left_add_on_cpu():
 
     # Force some args to be on CPU, rest default to GPU.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -173,7 +173,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -202,7 +202,7 @@ def test_left_add_on_cpu_via_copy():
 
     # As for test_left_add_on_cpu, but with an explicit device_copy.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -219,7 +219,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -248,7 +248,7 @@ def test_left_add_on_cpu_via_copy_as_map():
 
     # As for test_left_add_on_cpu, but with an explicit device_copy.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -272,7 +272,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
     # Same expected result as for test_left_add_on_cpu, but we'll include indexes to help
     # the test make sense.
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], // index 0
@@ -323,7 +323,7 @@ def test_both_adds_on_cpu():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -341,7 +341,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -372,7 +372,7 @@ def test_sharing():
 
     # The same add sub-expression is annotated twice.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
@@ -388,7 +388,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -418,7 +418,7 @@ def test_let_on_cpu():
 
     # The device for a let-bound expression can flow from uses of the let-bound var.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -435,7 +435,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -464,7 +464,7 @@ def test_func_param_on_cpu():
 
     # Devices for function parameters flow to call sites.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -484,7 +484,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -515,7 +515,7 @@ def test_func_result_on_cpu():
 
     # Devices for call sites flow to function results.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -535,7 +535,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -568,7 +568,7 @@ def test_higher_order():
 
     # The constraint on %a flows back to %y via %f and %h
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
@@ -593,7 +593,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -635,7 +635,7 @@ def test_function_in_tuple():
 
     # Since %f ends up in a tuple its argument and result is forced to be on the CPU
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
@@ -655,7 +655,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -686,7 +686,7 @@ def test_device_copy():
     metatable = {"VirtualDevice": [CPU, GPU], "relay.Constant": [relay.const(const)]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32]) {
@@ -700,7 +700,7 @@ def @main(%x: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -727,7 +727,7 @@ def test_shape_of():
     # result defaults to the result device for @main which is the CPU, thus forcing a copy.
     # TODO(mbs): Perhaps the defaulting heuristics are being too clever?
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(?, ?), float32]) {
@@ -741,7 +741,7 @@ def @main(%x: Tensor[(?, ?), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(?, ?), float32],
@@ -764,7 +764,7 @@ def test_alloc_storage():
     metatable = {"VirtualDevice": [HOST, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%size: int64, %alignment: int64) {
@@ -777,7 +777,7 @@ def @main(%size: int64, %alignment: int64) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%size {virtual_device=meta[VirtualDevice][0]}: int64, %alignment {virtual_device=meta[VirtualDevice][0]}: int64,
@@ -802,7 +802,7 @@ def test_alloc_tensor():
     }
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%sto: Storage[]) {
@@ -816,7 +816,7 @@ def @main(%sto: Storage[]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%sto {virtual_device=meta[VirtualDevice][1]}: Storage[], virtual_device=meta[VirtualDevice][1]) {
@@ -842,7 +842,7 @@ def test_reshape_tensor():
     }
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(2, 8), float32]) {
@@ -855,7 +855,7 @@ def @main(%x: Tensor[(2, 8), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(2, 8), float32],
@@ -880,7 +880,7 @@ def test_dynamic_input():
 
     # There's nothing special about inferring devices for partially unknown types.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x0: Tensor[(?, ?), float32], %x1: Tensor[(?, ?), float32]) {
@@ -893,7 +893,7 @@ def @main(%x0: Tensor[(?, ?), float32], %x1: Tensor[(?, ?), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x0 {virtual_device=meta[VirtualDevice][0]}: Tensor[(?, ?), float32], %x1 {virtual_device=meta[VirtualDevice][0]}: Tensor[(?, ?), float32],
@@ -916,7 +916,7 @@ def test_redundant_annotation():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) {
@@ -933,7 +933,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %z {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32],
@@ -963,7 +963,7 @@ def test_annotate_expr():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) {
@@ -979,7 +979,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %z {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1005,7 +1005,7 @@ def test_annotate_all():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) {
@@ -1021,7 +1021,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %z {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1057,7 +1057,7 @@ def test_conv_network():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%data1: Tensor[(1, 64, 56, 56), float32], %data2: Tensor[(1, 64, 56, 56), float32],
@@ -1078,7 +1078,7 @@ def @main(%data1: Tensor[(1, 64, 56, 56), float32], %data2: Tensor[(1, 64, 56, 5
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%data1 {virtual_device=meta[VirtualDevice][0]}: Tensor[(1, 64, 56, 56), float32], %data2 {virtual_device=meta[VirtualDevice][0]}: Tensor[(1, 64, 56, 56), float32],
@@ -1111,7 +1111,7 @@ def test_tuple_get_item():
     # Note that the device copy should be placed after projection rather than before. This is handled by
     # a heuristic in the pass.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(3, 3, 4), float32]) {
@@ -1130,7 +1130,7 @@ def @main(%x: Tensor[(3, 3, 4), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(3, 3, 4), float32],
@@ -1175,7 +1175,7 @@ def test_propogation():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32]) {
@@ -1198,7 +1198,7 @@ def @main(%x: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1247,7 +1247,7 @@ def test_fusible_network():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
@@ -1268,7 +1268,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32],
@@ -1315,7 +1315,7 @@ def test_unpropagatable_graph():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
@@ -1334,7 +1334,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1363,7 +1363,7 @@ def test_conditional():
 
     # The conditional is over a function type, thus exercising the first-order/higher-order domain handling.
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: bool, %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) {
@@ -1388,7 +1388,7 @@ def @main(%x: bool, %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: bool, %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %z {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1429,7 +1429,7 @@ def test_global():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @f(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
@@ -1447,7 +1447,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) -> Tensor[(5
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @f(%a {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %b {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1479,7 +1479,7 @@ def test_ref():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
@@ -1496,7 +1496,7 @@ def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(5, 7), float32], %y {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32],
@@ -1526,7 +1526,7 @@ def test_adt():
     metatable = {"VirtualDevice": [CPU, GPU]}
 
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             type List[A] {
@@ -1549,7 +1549,7 @@ def @main(%x : Tensor[(5, 7), float32], %y : Tensor[(5, 7), float32]) {
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             type List[A] {
@@ -1592,7 +1592,7 @@ def test_free_on_device():
 
     # Everything defaults to GPU
     def input():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @on_scope_b(%x {virtual_device=meta[VirtualDevice][2]}: Tensor[(5, 7), float32],
@@ -1618,7 +1618,7 @@ def @main(%a {virtual_device=meta[VirtualDevice][0]}: Tensor[(5, 7), float32], %
         )
 
     def expected():
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @on_scope_b(%x {virtual_device=meta[VirtualDevice][2]}: Tensor[(5, 7), float32],
@@ -1702,7 +1702,7 @@ def input():
         # - %y on CPU "scopeB", so will flow in to second param of gem.
         # - %z on CPU "scopeA", so will clash with third param of gem and will need device_copy.
         # - result on CPU "scopeB", but result of gem on "scopeA" so will need device_copy
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][0]}: Tensor[(128, 128), float32],
@@ -1724,7 +1724,7 @@ def expected():
         # - %y still on CPU "scopeB", no device_copy needed.
         # - %z still on CPU "scopeA", needs device_copy to "scopeB".
         # - result still on CPU "scopeB", needs device_copy  from "scopeA".
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             """
             #[version = "0.0.5"]
             def @main(%x {virtual_device=meta[VirtualDevice][1]}: Tensor[(128, 128), float32],
@@ -1767,7 +1767,7 @@ def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32],
         mod += "\n\t" + "add(%" + str(end - 1) + ", %" + str(end - 2) + ")"
         mod += "\n\t}"
 
-        return tvm.parser.parse(
+        return tvm.relay.parse(
             mod,
             "from_string",
             None,
@@ -1792,7 +1792,7 @@ def test_primitive():
         ]
     }
 
-    mod = tvm.parser.parse(
+    mod = tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%data1: Tensor[(1, 32, 40, 40), float32],
diff --git a/tests/python/relay/test_pass_unmatched_cases.py b/tests/python/relay/test_pass_unmatched_cases.py
index 885f260251671..528dc4b6826e5 100644
--- a/tests/python/relay/test_pass_unmatched_cases.py
+++ b/tests/python/relay/test_pass_unmatched_cases.py
@@ -416,7 +416,7 @@ def @shallow_opt[A](%a: Arith[A]) -> Arith[A] {
     }
 }
 """
-    tvm.parser.fromtext(code)
+    tvm.relay.fromtext(code)
     # fromtext parse the module, then checked it (which include strictness checking).
 
 
@@ -444,7 +444,7 @@ def @expand_on_nil_match(%a: List[(List[()],)]) -> int {
     # Cons((Nil), Nil) passes the first pattern
     # Cons((Cons(*, *)), Nil) fails the first pattern, passes _
     # Note Nil() is passed to ExpandWildcardsConstructor many times in the above!
-    tvm.parser.fromtext(code)
+    tvm.relay.fromtext(code)
 
 
 def test_expanding_empty_tuple():
@@ -463,7 +463,7 @@ def @expand_on_empty_tuple_match(%a: (List[()], ())) -> int {
     }
 }
 """
-    tvm.parser.fromtext(code)
+    tvm.relay.fromtext(code)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/relay/test_target_hooks.py b/tests/python/relay/test_target_hooks.py
index 3a76fd2fbbf3a..1df2b2a919a31 100644
--- a/tests/python/relay/test_target_hooks.py
+++ b/tests/python/relay/test_target_hooks.py
@@ -73,7 +73,7 @@ def test_tir_external_generation_outline_with_target_instance(check_result):
     extern_codegen_target = tvm.target.Target(
         "example_target_hook -example_attribute=42", host=host_target
     )
-    mod = tvm.parser.fromtext(
+    mod = tvm.relay.fromtext(
         """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(8), float32], %y: Tensor[(8), float32]) -> Tensor[(8), float32] {
diff --git a/tests/python/relay/test_type_infer.py b/tests/python/relay/test_type_infer.py
index 13d164c2caf6d..1874555702167 100644
--- a/tests/python/relay/test_type_infer.py
+++ b/tests/python/relay/test_type_infer.py
@@ -399,7 +399,7 @@ def @main(%f: float32) -> float32 {
   @id(%f)
 }
 """
-    mod = tvm.parser.fromtext(code)
+    mod = tvm.relay.fromtext(code)
     mod = transform.InferType()(mod)
     tvm.ir.assert_structural_equal(mod["main"].body.type_args, [relay.TensorType((), "float32")])
 
diff --git a/tests/python/relay/test_vm.py b/tests/python/relay/test_vm.py
index 45e305c9a1950..6443d50f9e98d 100644
--- a/tests/python/relay/test_vm.py
+++ b/tests/python/relay/test_vm.py
@@ -1267,7 +1267,7 @@ def test_storage_size_and_offset_on_cpu():
     # CPU = device type 1
     # GPU = device type 2
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%a: Tensor[(5, 7), float32],
@@ -1303,7 +1303,7 @@ def test_reshape_shape_on_cpu():
     # CPU = device type 1
     # GPU = device type 2
     def input():
-        return tvm.parser.fromtext(
+        return tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%x: Tensor[(2, 8), float32],
diff --git a/tests/python/relay/transform/test_capture_postdfsindex_in_spans.py b/tests/python/relay/transform/test_capture_postdfsindex_in_spans.py
index 16a7bd447992b..ab585fb4e0111 100644
--- a/tests/python/relay/transform/test_capture_postdfsindex_in_spans.py
+++ b/tests/python/relay/transform/test_capture_postdfsindex_in_spans.py
@@ -42,7 +42,7 @@ def make_consts(dtype, shapes):
 
 
 def input_mod():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
diff --git a/tests/python/relay/transform/test_compiler_function_utils.py b/tests/python/relay/transform/test_compiler_function_utils.py
index 1bb07e268439f..2e5f3b5ecf0e9 100644
--- a/tests/python/relay/transform/test_compiler_function_utils.py
+++ b/tests/python/relay/transform/test_compiler_function_utils.py
@@ -43,7 +43,7 @@ def make_consts(dtype, shapes):
 
 
 def original_mod():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
@@ -76,7 +76,7 @@ def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float1
 
 
 def original_mod_let_bound():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
@@ -109,7 +109,7 @@ def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float1
 
 
 def expected_outlined_mod():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
@@ -143,7 +143,7 @@ def @tvmgen_default_cutlass_main_0(%y_0_i0: Tensor[(1600, 768), float16], %y_0_i
 
 
 def expected_extern_mod():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
@@ -177,7 +177,7 @@ def @tvmgen_default_cutlass_main_0(%y_0_i0: Tensor[(1600, 768), float16], %y_0_i
 
 
 def expected_inlined_mod():
-    return tvm.parser.parse(
+    return tvm.relay.parse(
         """
         #[version = "0.0.5"]
         def @main(%x0 : Tensor[(1600, 768), float16], %x3 : Tensor[(600, 32, 64), float16]) -> (Tensor[(1600, 2304), float16], Tensor[(600, 32, 32), float16]) {
diff --git a/tests/python/relay/utils/assert_diagnostic.py b/tests/python/relay/utils/assert_diagnostic.py
index ba73d8755e0cc..aea76fdd3eb2a 100644
--- a/tests/python/relay/utils/assert_diagnostic.py
+++ b/tests/python/relay/utils/assert_diagnostic.py
@@ -18,7 +18,7 @@
 
 from tvm import register_func, get_global_func, IRModule
 from tvm import relay
-from tvm.parser import SpanCheck
+from tvm.relay.import SpanCheck
 from tvm.relay.transform import AnnotateSpans
 from tvm.runtime import Object
 from tvm.ir.diagnostics import get_renderer, override_renderer
diff --git a/tests/python/unittest/test_aot_legalize_packed_call.py b/tests/python/unittest/test_aot_legalize_packed_call.py
index 3f6e3f776cff9..ad970d52c0824 100644
--- a/tests/python/unittest/test_aot_legalize_packed_call.py
+++ b/tests/python/unittest/test_aot_legalize_packed_call.py
@@ -26,10 +26,10 @@
 class Module:
     @T.prim_func
     def tvm_test_cpacked(
-        A: T.Buffer[(1,), "float32"],
-        B: T.Buffer[(1,), "float32"],
-        C: T.Buffer[(1,), "float32"],
-        device_context: T.Buffer[(1,), "float32"],
+        A: T.Buffer((1,), "float32"),
+        B: T.Buffer((1,), "float32"),
+        C: T.Buffer((1,), "float32"),
+        device_context: T.Buffer((1,), "float32"),
     ) -> T.handle:
         T.evaluate(C.data)
 
@@ -56,10 +56,10 @@ def tir_packed_call() -> None:
 class Expected:
     @T.prim_func
     def tvm_test_cpacked(
-        A: T.Buffer[(1,), "float32"],
-        B: T.Buffer[(1,), "float32"],
-        C: T.Buffer[(1,), "float32"],
-        device_context: T.Buffer[(1,), "float32"],
+        A: T.Buffer((1,), "float32"),
+        B: T.Buffer((1,), "float32"),
+        C: T.Buffer((1,), "float32"),
+        device_context: T.Buffer((1,), "float32"),
     ) -> T.handle:
         T.evaluate(C.data)
 
diff --git a/tests/python/unittest/test_auto_scheduler_feature.py b/tests/python/unittest/test_auto_scheduler_feature.py
index 8be6e0a8f2ed0..ddd86347c2ec9 100644
--- a/tests/python/unittest/test_auto_scheduler_feature.py
+++ b/tests/python/unittest/test_auto_scheduler_feature.py
@@ -203,9 +203,9 @@ def test_gpu_feature():
 
 @T.prim_func
 def tir_matmul(
-    A: T.Buffer[(256, 256), "float32"],
-    B: T.Buffer[(256, 256), "float32"],
-    C: T.Buffer[(256, 256), "float32"],
+    A: T.Buffer((256, 256), "float32"),
+    B: T.Buffer((256, 256), "float32"),
+    C: T.Buffer((256, 256), "float32"),
 ) -> None:
     # function attr dict
     T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
@@ -263,7 +263,7 @@ def test_dense_lowered():
 
 
 @T.prim_func
-def negative_extent(A: T.Buffer[(1,), "float32"]):
+def negative_extent(A: T.Buffer((1,), "float32")):
     for j in range(0, -1):
         A[j] = A[j] + 1.0
 
@@ -275,8 +275,8 @@ def test_negative_extent():
 
 @T.prim_func
 def zero_dim(
-    p2: T.Buffer[(), "float32"],
-    T_cast: T.Buffer[(T.int64(1), T.int64(768)), "int8"],
+    p2: T.Buffer((), "float32"),
+    T_cast: T.Buffer((T.int64(1), T.int64(768)), "int8"),
 ):
     # function attr dict
     T.func_attr(
diff --git a/tests/python/unittest/test_crt.py b/tests/python/unittest/test_crt.py
index 198a5816321e6..e51745d08be1f 100644
--- a/tests/python/unittest/test_crt.py
+++ b/tests/python/unittest/test_crt.py
@@ -118,7 +118,7 @@ def test_graph_executor():
     """Test use of the graph executor with microTVM."""
 
     temp_dir = tvm.contrib.utils.tempdir()
-    relay_mod = tvm.parser.fromtext(
+    relay_mod = tvm.relay.fromtext(
         """
       #[version = "0.0.5"]
       def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
@@ -166,7 +166,7 @@ def test_aot_executor():
     """Test use of the AOT executor with microTVM."""
 
     temp_dir = tvm.contrib.utils.tempdir()
-    relay_mod = tvm.parser.fromtext(
+    relay_mod = tvm.relay.fromtext(
         """
       #[version = "0.0.5"]
       def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
@@ -217,7 +217,7 @@ def test_aot_executor_usmp_const_pool():
     """Test the AOT executor with microTVM using USMP to generate a constant data pool."""
 
     temp_dir = tvm.contrib.utils.tempdir()
-    relay_mod = tvm.parser.fromtext(
+    relay_mod = tvm.relay.fromtext(
         """
       #[version = "0.0.5"]
       def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8], %c : Tensor[(1,2), uint8]) {
diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py
index 70caa99c9bca1..594567eff3ae5 100644
--- a/tests/python/unittest/test_link_params.py
+++ b/tests/python/unittest/test_link_params.py
@@ -176,7 +176,7 @@ def _add_decl(name, dtype):
         "}",
     ]
 
-    mod = tvm.parser.fromtext("\n".join(mod_lines))
+    mod = tvm.relay.fromtext("\n".join(mod_lines))
     return mod, param_init
 
 
diff --git a/tests/python/unittest/test_lower_build.py b/tests/python/unittest/test_lower_build.py
index 4c188d2f834b1..e94a4f09ec565 100644
--- a/tests/python/unittest/test_lower_build.py
+++ b/tests/python/unittest/test_lower_build.py
@@ -54,9 +54,9 @@ def matmul(a: T.handle, b: T.handle, c: T.handle) -> None:
 class LoweredModule:
     @T.prim_func
     def main(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "from_legacy_te_schedule": True, "tir.noalias": True})
@@ -76,9 +76,9 @@ def main(
 class LoweredTIRModule:
     @T.prim_func
     def main(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py b/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py
index 701e1826b38a4..88947962d69d3 100644
--- a/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py
+++ b/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py
@@ -31,9 +31,9 @@
 
 @T.prim_func
 def matmul(
-    A: T.Buffer[(512, 512), "float32"],
-    B: T.Buffer[(512, 512), "float32"],
-    C: T.Buffer[(512, 512), "float32"],
+    A: T.Buffer((512, 512), "float32"),
+    B: T.Buffer((512, 512), "float32"),
+    C: T.Buffer((512, 512), "float32"),
 ) -> None:
     # function attr dict
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -56,7 +56,7 @@ def matmul(
 @tvm.script.ir_module
 class LayoutTransform:
     @T.prim_func
-    def main(placeholder: T.Buffer[(1, 16, 7, 7, 32), "float32"], placeholder_1: T.Buffer[(25088,), "float32"], T_layout_trans: T.Buffer[(1, 1, 7, 7, 512), "float32"]) -> None:
+    def main(placeholder: T.Buffer((1, 16, 7, 7, 32), "float32"), placeholder_1: T.Buffer((25088,), "float32"), T_layout_trans: T.Buffer((1, 1, 7, 7, 512), "float32")) -> None:
         # function attr dict
         T.func_attr({"tir.noalias": True, "global_symbol": "main"})
         # body
@@ -1595,7 +1595,7 @@ def test_cpu_layout_transform():
 
 
 @T.prim_func
-def negative_extent(A: T.Buffer[(1,), "float32"]):
+def negative_extent(A: T.Buffer((1,), "float32")):
     for j in range(0, -1):
         A[j] = A[j] + 1.0
 
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_cooperative_fetch.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_cooperative_fetch.py
index c82bc697c993d..9bb550e79e4ad 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_cooperative_fetch.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_cooperative_fetch.py
@@ -107,9 +107,9 @@ def main(var_A: T.handle, var_B: T.handle, var_C: T.handle) -> None:
 class WarpExecutionAfterRewrite:
     @T.prim_func
     def main(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        C: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        C: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_layout.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_layout.py
index 80ca954cca5ce..c03ba83c02291 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_layout.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_layout.py
@@ -74,9 +74,9 @@ class TestTIRMatmul(BaseBeforeAfter):
     """
 
     def before(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 16), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 16), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [1]})
         for i0, j, k0, i1, k1 in T.grid(4, 16, 4, 4, 4):
@@ -89,9 +89,9 @@ def before(
                 C[vi, vj] = C[vi, vj] + A[vi, vk] * B[vk, vj]
 
     def expected(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 16), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 16), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [1]})
         B_reindex = T.alloc_buffer([16, 4, 4], dtype="float32")
@@ -114,7 +114,7 @@ class TestRewrittenBuffersMustOccurWithinBlock(BaseBeforeAfter):
     """Buffers must occur within a Block"""
 
     def before(
-        A: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [0]})
         for i, j in T.grid(16, 16):
@@ -131,7 +131,7 @@ class TestExtentOne(BaseBeforeAfter):
     """
 
     def before(
-        A: T.Buffer[(16, 1), "float32"],
+        A: T.Buffer((16, 1), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [0]})
         for i, j in T.grid(16, 1):
@@ -139,7 +139,7 @@ def before(
                 vi, vj = T.axis.remap("SS", [i, j])
                 T.evaluate(A[vi, vj])
 
-    def expected(A: T.Buffer[(16, 1), "float32"]):
+    def expected(A: T.Buffer((16, 1), "float32")):
         T.func_attr({"layout_free_buffers": [0]})
 
         A_global = T.alloc_buffer([16], dtype="float32")
@@ -157,9 +157,9 @@ def expected(A: T.Buffer[(16, 1), "float32"]):
 
 @T.prim_func
 def tir_matmul(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ) -> None:
     T.func_attr({"layout_free_buffers": [1]})
     for i0, j, k0, i1, k1 in T.grid(4, 16, 4, 4, 4):
@@ -174,9 +174,9 @@ def tir_matmul(
 
 @T.prim_func
 def rewritten_tir_matmul(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ) -> None:
     T.func_attr({"layout_free_buffers": [1]})
     B_reindex = T.alloc_buffer([16, 4, 4], dtype="float32")
@@ -208,7 +208,7 @@ def test_layout_rewrite():
 @tvm.script.ir_module
 class Conv2dCacheRead:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(3, 3, 64, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 56, 56, 64), "float32"]):
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((3, 3, 64, 64), "float32"), conv2d_nhwc: T.Buffer((1, 56, 56, 64), "float32")):
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
         pad_temp = T.alloc_buffer([1, 58, 58, 64], dtype="float32")
         conv2d_nhwc_global = T.alloc_buffer([1, 56, 56, 64], dtype="float32")
@@ -285,7 +285,7 @@ def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(3, 3, 64, 64),
 @tvm.script.ir_module
 class Conv2dCacheReadRewritten:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(3, 3, 64, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 56, 56, 64), "float32"]):
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((3, 3, 64, 64), "float32"), conv2d_nhwc: T.Buffer((1, 56, 56, 64), "float32")):
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
         pad_temp = T.alloc_buffer([1, 58, 58, 64], dtype="float32")
         conv2d_nhwc_global = T.alloc_buffer([1, 56, 56, 64], dtype="float32")
@@ -370,7 +370,7 @@ def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(3, 3, 64, 64),
 @tvm.script.ir_module
 class Conv2dCacheReadMultipleRewritten:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(3, 3, 64, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 56, 56, 64), "float32"]):
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((3, 3, 64, 64), "float32"), conv2d_nhwc: T.Buffer((1, 56, 56, 64), "float32")):
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
         pad_temp = T.alloc_buffer([1, 58, 58, 64], dtype="float32")
         conv2d_nhwc_global = T.alloc_buffer([1, 56, 56, 64], dtype="float32")
@@ -482,9 +482,9 @@ def test_layout_rewrite_cache_read_multiple():
 
 class TestLayoutRewriteInt64Index(BaseBeforeAfter):
     def before(
-        p0: T.Buffer[(T.int64(12), T.int64(197), T.int64(64)), "int8"],
-        p1: T.Buffer[(T.int64(12), T.int64(197), T.int64(64)), "int8"],
-        T_batch_matmul_NT: T.Buffer[(T.int64(12), T.int64(197), T.int64(197)), "int32"],
+        p0: T.Buffer((T.int64(12), T.int64(197), T.int64(64)), "int8"),
+        p1: T.Buffer((T.int64(12), T.int64(197), T.int64(64)), "int8"),
+        T_batch_matmul_NT: T.Buffer((T.int64(12), T.int64(197), T.int64(197)), "int32"),
     ):
         T.func_attr({"layout_free_buffers": [1], "global_symbol": "main", "tir.noalias": True})
         for b_0_i_0_fused in T.parallel(T.int64(394)):
@@ -542,9 +542,9 @@ def before(
                             )
 
     def expected(
-        p0: T.Buffer[(T.int64(12), T.int64(197), T.int64(64)), "int8"],
-        p1: T.Buffer[(T.int64(12), T.int64(197), T.int64(64)), "int8"],
-        T_batch_matmul_NT: T.Buffer[(T.int64(12), T.int64(197), T.int64(197)), "int32"],
+        p0: T.Buffer((T.int64(12), T.int64(197), T.int64(64)), "int8"),
+        p1: T.Buffer((T.int64(12), T.int64(197), T.int64(64)), "int8"),
+        T_batch_matmul_NT: T.Buffer((T.int64(12), T.int64(197), T.int64(197)), "int32"),
     ):
         T.func_attr({"tir.noalias": True, "global_symbol": "main", "layout_free_buffers": [1]})
         p1_global = T.alloc_buffer(
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_parallel_vectorize_unroll.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_parallel_vectorize_unroll.py
index 44b0e79f0cc2a..a3b1cc5e01394 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_parallel_vectorize_unroll.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_parallel_vectorize_unroll.py
@@ -73,7 +73,7 @@ def Move_PUV0(a: T.handle, b: T.handle) -> None:
 @tvm.script.ir_module
 class Fused_NN_Dense:
     @T.prim_func
-    def main(placeholder: T.Buffer[(64, 768), "float32"], placeholder_1: T.Buffer[(768, 768), "float32"], T_matmul_NT: T.Buffer[(64, 768), "float32"]) -> None:
+    def main(placeholder: T.Buffer((64, 768), "float32"), placeholder_1: T.Buffer((768, 768), "float32"), T_matmul_NT: T.Buffer((64, 768), "float32")) -> None:
         for i0, i1, i2 in T.grid(64, 768, 768):
             with T.block("T_matmul_NT"):
                 i, j, k = T.axis.remap("SSR", [i0, i1, i2])
@@ -85,9 +85,9 @@ def main(placeholder: T.Buffer[(64, 768), "float32"], placeholder_1: T.Buffer[(7
 
 @T.prim_func
 def before_matmul_vectorize(
-    placeholder: T.Buffer[(64, 768), "float32"],
-    placeholder_1: T.Buffer[(768, 768), "float32"],
-    T_matmul_NT: T.Buffer[(64, 768), "float32"],
+    placeholder: T.Buffer((64, 768), "float32"),
+    placeholder_1: T.Buffer((768, 768), "float32"),
+    T_matmul_NT: T.Buffer((64, 768), "float32"),
 ) -> None:
     with T.block("root"):
         T.reads()
@@ -115,9 +115,9 @@ def before_matmul_vectorize(
 
 @T.prim_func
 def after_matmul_vectorize(
-    placeholder: T.Buffer[(64, 768), "float32"],
-    placeholder_1: T.Buffer[(768, 768), "float32"],
-    T_matmul_NT: T.Buffer[(64, 768), "float32"],
+    placeholder: T.Buffer((64, 768), "float32"),
+    placeholder_1: T.Buffer((768, 768), "float32"),
+    T_matmul_NT: T.Buffer((64, 768), "float32"),
 ) -> None:
     T_matmul_NT_global = T.alloc_buffer([64, 768], dtype="float32")
     for i0_0, i1_0, i0_1, i1_1 in T.grid(1, 16, 1, 3):
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_reduction_block.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_reduction_block.py
index 7e499424058df..347b773b7ed0f 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_reduction_block.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_reduction_block.py
@@ -158,7 +158,7 @@ def main(var_A: T.handle, var_B: T.handle, var_C: T.handle) -> None:
 @tvm.script.ir_module
 class Softmax_cross_thread_reduction:
     @T.prim_func
-    def main(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def main(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         T_softmax_maxelem_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
         T_softmax_expsum_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
         for i0 in T.serial(256):
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_tensorize.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_tensorize.py
index 21755e1338eb5..a8ce704bd0ce1 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_tensorize.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_tensorize.py
@@ -25,9 +25,9 @@
 class Conv2dNCHWcVNNIModuleTiled:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for (
@@ -145,9 +145,9 @@ def main(
 class Conv2dNCHWcVNNIModuleTensorized:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -248,9 +248,9 @@ def main(
 class DenseDP4ATiled:
     @T.prim_func
     def main(
-        X: T.Buffer[(128, 128), "int8"],
-        W: T.Buffer[(128, 128), "int8"],
-        compute: T.Buffer[(128, 128), "int32"],
+        X: T.Buffer((128, 128), "int8"),
+        W: T.Buffer((128, 128), "int8"),
+        compute: T.Buffer((128, 128), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         compute_local = T.alloc_buffer([128, 128], dtype="int32", scope="local")
@@ -334,9 +334,9 @@ def main(
 class DenseDP4ATensorized:
     @T.prim_func
     def main(
-        X: T.Buffer[(128, 128), "int8"],
-        W: T.Buffer[(128, 128), "int8"],
-        compute: T.Buffer[(128, 128), "int32"],
+        X: T.Buffer((128, 128), "int8"),
+        W: T.Buffer((128, 128), "int8"),
+        compute: T.Buffer((128, 128), "int32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_meta_schedule_postproc_rewrite_unbound_block.py b/tests/python/unittest/test_meta_schedule_postproc_rewrite_unbound_block.py
index b01447ad4a9ef..963f660ffb674 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_rewrite_unbound_block.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_rewrite_unbound_block.py
@@ -73,7 +73,7 @@ def main(var_A: T.handle, var_B: T.handle) -> None:
 @tvm.script.ir_module
 class Before_norm_bmn:
     @T.prim_func
-    def main(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[(1,), "float32"]) -> None:
+    def main(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer((1,), "float32")) -> None:
         C = T.alloc_buffer([1], dtype="float32")
         for i0, i1, i2 in T.grid(1, 256, 256):
             with T.block("C"):
@@ -90,7 +90,7 @@ def main(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[(1,), "float32"]) ->
 @tvm.script.ir_module
 class After_norm_bmn:
     @T.prim_func
-    def main(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[(1,), "float32"]) -> None:
+    def main(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer((1,), "float32")) -> None:
         C = T.alloc_buffer([1], dtype="float32")
         for i0_fused_0 in T.thread_binding(1, thread="blockIdx.x"):
             for i0_fused_1 in T.thread_binding(1, thread="threadIdx.x"):
@@ -112,7 +112,7 @@ def main(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[(1,), "float32"]) ->
 class Bert_fused_reshape_transpose_reshape:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(12, 64, 64), "float32"], T_reshape: T.Buffer[(64, 768), "float32"]
+        placeholder: T.Buffer((12, 64, 64), "float32"), T_reshape: T.Buffer((64, 768), "float32")
     ) -> None:
         for i0_i1_fused_0, i0_i1_fused_1 in T.grid(1536, 32):
             with T.block("T_reshape_1"):
@@ -131,7 +131,7 @@ def main(
 class Bert_fused_reshape_transpose_reshape_large:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(12, 64, 64), "float32"], T_reshape: T.Buffer[(64, 768), "float32"]
+        placeholder: T.Buffer((12, 64, 64), "float32"), T_reshape: T.Buffer((64, 768), "float32")
     ) -> None:
         for i0_i1_fused_0, i0_i1_fused_1 in T.grid(1536000, 32):
             with T.block("T_reshape_1"):
@@ -150,7 +150,7 @@ def main(
 class Bert_fused_reshape_transpose_reshape_after_rub:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(12, 64, 64), "float32"], T_reshape: T.Buffer[(64, 768), "float32"]
+        placeholder: T.Buffer((12, 64, 64), "float32"), T_reshape: T.Buffer((64, 768), "float32")
     ) -> None:
         for i0_i1_fused_0_i0_i1_fused_1_fused_0 in T.thread_binding(48, thread="blockIdx.x"):
             for i0_i1_fused_0_i0_i1_fused_1_fused_1 in T.thread_binding(1024, thread="threadIdx.x"):
@@ -202,7 +202,7 @@ def main(
 class Bert_fused_reshape_transpose_reshape_after_rub_large:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(12, 64, 64), "float32"], T_reshape: T.Buffer[(64, 768), "float32"]
+        placeholder: T.Buffer((12, 64, 64), "float32"), T_reshape: T.Buffer((64, 768), "float32")
     ) -> None:
         # body
         # with T.block("root")
@@ -269,7 +269,7 @@ def main(
 
 @T.prim_func
 def before_unrolled_loop(
-    placeholder: T.Buffer[(1, 56, 56, 64), "float32"],
+    placeholder: T.Buffer((1, 56, 56, 64), "float32"),
 ) -> None:
     # function attr dict
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -294,7 +294,7 @@ def before_unrolled_loop(
 
 @T.prim_func
 def after_unrolled_loop(
-    placeholder: T.Buffer[(1, 56, 56, 64), "float32"],
+    placeholder: T.Buffer((1, 56, 56, 64), "float32"),
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     # body
diff --git a/tests/python/unittest/test_meta_schedule_postproc_verify_gpu_code.py b/tests/python/unittest/test_meta_schedule_postproc_verify_gpu_code.py
index 86a88af403092..59de0b0c570a9 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_verify_gpu_code.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_verify_gpu_code.py
@@ -220,7 +220,7 @@ def main(a: T.handle, b: T.handle) -> None:
             B[blockIdx_z * 131072 + blockIdx_y * 16384 + threadIdx_y * 2048 + ff_inner_inner_inner * 256 + blockIdx_x * 64 + threadIdx_x * 8 + nn_inner_inner_inner] = B_local[ff_inner_inner_inner * 8 + nn_inner_inner_inner]# fmt: on
 
 @T.prim_func
-def GmmCuda0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+def GmmCuda0(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
     Z_local = T.alloc_buffer([1, 128, 128], dtype="float32", scope="local")
     X_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
     Y_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
@@ -274,7 +274,7 @@ def GmmCuda0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "
                         Z[v0, v1, v2] = Z_local[v0, v1, v2]
 
 @T.prim_func
-def GmmCuda1(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+def GmmCuda1(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
     Z_local = T.alloc_buffer([1, 128, 128], dtype="float32", scope="local")
     X_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
     Y_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
@@ -333,7 +333,7 @@ def GmmCuda1(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "
 
 
 @T.prim_func
-def GmmCuda2(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+def GmmCuda2(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
     Z_local = T.alloc_buffer([1, 128, 128], dtype="float32", scope="local")
     X_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
     Y_shared = T.alloc_buffer([1, 128, 128], dtype="float32", scope="shared")
@@ -393,9 +393,9 @@ def GmmCuda2(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "
 
 @T.prim_func
 def GMMCUDATensorCore(
-    X: T.Buffer[(1024, 1024), "float16"],
-    Y: T.Buffer[(1024, 1024), "float16"],
-    Z: T.Buffer[(1024, 1024), "float32"],
+    X: T.Buffer((1024, 1024), "float16"),
+    Y: T.Buffer((1024, 1024), "float16"),
+    Z: T.Buffer((1024, 1024), "float32"),
 ) -> None:
     # function attr dict
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_meta_schedule_postproc_verify_vtcm_limit.py b/tests/python/unittest/test_meta_schedule_postproc_verify_vtcm_limit.py
index 55ea0a6ed80f4..cb47672219154 100644
--- a/tests/python/unittest/test_meta_schedule_postproc_verify_vtcm_limit.py
+++ b/tests/python/unittest/test_meta_schedule_postproc_verify_vtcm_limit.py
@@ -42,7 +42,7 @@ def _create_context(mod, target) -> ms.TuneContext:
 @tvm.script.ir_module
 class Conv2dNCHWcVTCM:
     @T.prim_func
-    def main(p0: T.Buffer[(T.int64(1), T.int64(2), T.int64(56), T.int64(56), T.int64(32)), "uint8"], p1: T.Buffer[(T.int64(2), T.int64(2), T.int64(3), T.int64(3), T.int64(8), T.int64(32), T.int64(4)), "uint8"], conv2d_NCHWc_int8: T.Buffer[(T.int64(1), T.int64(2), T.int64(54), T.int64(54), T.int64(32)), "int32"]):
+    def main(p0: T.Buffer((T.int64(1), T.int64(2), T.int64(56), T.int64(56), T.int64(32)), "uint8"), p1: T.Buffer((T.int64(2), T.int64(2), T.int64(3), T.int64(3), T.int64(8), T.int64(32), T.int64(4)), "uint8"), conv2d_NCHWc_int8: T.Buffer((T.int64(1), T.int64(2), T.int64(54), T.int64(54), T.int64(32)), "int32")):
         T.func_attr({"tir.noalias": True, "global_symbol": "main"})
         p0_global_vtcm = T.alloc_buffer([T.int64(1), T.int64(2), T.int64(56), T.int64(56), T.int64(32)], dtype="uint8", scope="global.vtcm")
         p1_global_vtcm = T.alloc_buffer([T.int64(2), T.int64(2), T.int64(3), T.int64(3), T.int64(8), T.int64(32), T.int64(4)], dtype="uint8", scope="global.vtcm")
diff --git a/tests/python/unittest/test_meta_schedule_relay_integration.py b/tests/python/unittest/test_meta_schedule_relay_integration.py
index 8cd58e5a6f363..90be1ec0a1e9d 100644
--- a/tests/python/unittest/test_meta_schedule_relay_integration.py
+++ b/tests/python/unittest/test_meta_schedule_relay_integration.py
@@ -408,8 +408,8 @@ def test_meta_schedule_te2primfunc_argument_order_and_lowering():
     class _fused_layout_transform:
         @T.prim_func
         def main( # type: ignore
-            placeholder: T.Buffer[(T.int64(1), T.int64(3), T.int64(16), T.int64(16)), "float32"], # type: ignore
-            T_layout_trans: T.Buffer[(T.int64(1), T.int64(1), T.int64(16), T.int64(16), T.int64(3)), "float32"], # type: ignore
+            placeholder: T.Buffer((T.int64(1), T.int64(3), T.int64(16), T.int64(16)), "float32"), # type: ignore
+            T_layout_trans: T.Buffer((T.int64(1), T.int64(1), T.int64(16), T.int64(16), T.int64(3)), "float32"), # type: ignore
         ) -> None: # type: ignore
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -430,7 +430,7 @@ def main( # type: ignore
     @tvm.script.ir_module
     class _fused_layout_transform_1:
         @T.prim_func
-        def main(placeholder: T.Buffer[(T.int64(1), T.int64(2), T.int64(16), T.int64(16), T.int64(4)), "float32"], T_layout_trans: T.Buffer[(T.int64(1), T.int64(8), T.int64(16), T.int64(16)), "float32"]) -> None: # type: ignore
+        def main(placeholder: T.Buffer((T.int64(1), T.int64(2), T.int64(16), T.int64(16), T.int64(4)), "float32"), T_layout_trans: T.Buffer((T.int64(1), T.int64(8), T.int64(16), T.int64(16)), "float32")) -> None: # type: ignore
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
             # body
@@ -445,7 +445,7 @@ def main(placeholder: T.Buffer[(T.int64(1), T.int64(2), T.int64(16), T.int64(16)
     @tvm.script.ir_module
     class _fused_nn_contrib_conv2d_NCHWc:
         @T.prim_func
-        def main(placeholder: T.Buffer[(T.int64(1), T.int64(1), T.int64(16), T.int64(16), T.int64(3)), "float32"], placeholder_1: T.Buffer[(T.int64(2), T.int64(1), T.int64(5), T.int64(5), T.int64(3), T.int64(4)), "float32"], conv2d_NCHWc: T.Buffer[(T.int64(1), T.int64(2), T.int64(16), T.int64(16), T.int64(4)), "float32"]) -> None: # type: ignore
+        def main(placeholder: T.Buffer((T.int64(1), T.int64(1), T.int64(16), T.int64(16), T.int64(3)), "float32"), placeholder_1: T.Buffer((T.int64(2), T.int64(1), T.int64(5), T.int64(5), T.int64(3), T.int64(4)), "float32"), conv2d_NCHWc: T.Buffer((T.int64(1), T.int64(2), T.int64(16), T.int64(16), T.int64(4)), "float32")) -> None: # type: ignore
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
             # body
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_add_rfactor.py b/tests/python/unittest/test_meta_schedule_schedule_rule_add_rfactor.py
index 7f56683588ba8..b21a4e0f7ec8e 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_add_rfactor.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_add_rfactor.py
@@ -29,9 +29,9 @@
 def test_cpu_matmul():
     @T.prim_func
     def cpu_matmul_0(
-        A: T.Buffer[(4, 512), "float32"],
-        B: T.Buffer[(512, 4), "float32"],
-        C: T.Buffer[(4, 4), "float32"],
+        A: T.Buffer((4, 512), "float32"),
+        B: T.Buffer((512, 4), "float32"),
+        C: T.Buffer((4, 4), "float32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i0, i1, i2 in T.grid(4, 4, 512):
@@ -45,9 +45,9 @@ def cpu_matmul_0(
 
     @T.prim_func
     def cpu_matmul_1(
-        A: T.Buffer[(4, 512), "float32"],
-        B: T.Buffer[(512, 4), "float32"],
-        C: T.Buffer[(4, 4), "float32"],
+        A: T.Buffer((4, 512), "float32"),
+        B: T.Buffer((512, 4), "float32"),
+        C: T.Buffer((4, 4), "float32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         C_rf = T.alloc_buffer([4, 4, 128], dtype="float32")
@@ -73,9 +73,9 @@ def cpu_matmul_1(
 
     @T.prim_func
     def cpu_matmul_2(
-        A: T.Buffer[(4, 512), "float32"],
-        B: T.Buffer[(512, 4), "float32"],
-        C: T.Buffer[(4, 4), "float32"],
+        A: T.Buffer((4, 512), "float32"),
+        B: T.Buffer((512, 4), "float32"),
+        C: T.Buffer((4, 4), "float32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         C_rf = T.alloc_buffer([4, 4, 4], dtype="float32")
@@ -124,10 +124,10 @@ def cpu_matmul_2(
 def test_cpu_argmax():
     @T.prim_func
     def argmax(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[(128,), "int32"],
-        argmax_v1: T.Buffer[(128,), "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer((128,), "int32"),
+        argmax_v1: T.Buffer((128,), "float32"),
     ) -> None:
         for i0, i1 in T.grid(128, 128):
             with T.block("argmax"):
@@ -147,10 +147,10 @@ def argmax(
 
     @T.prim_func
     def argmax_0(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[128, "int32"],
-        argmax_v1: T.Buffer[128, "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer(128, "int32"),
+        argmax_v1: T.Buffer(128, "float32"),
     ) -> None:
         for i0, i1 in T.grid(128, 128):
             with T.block("argmax"):
@@ -169,10 +169,10 @@ def argmax_0(
 
     @T.prim_func
     def argmax_1(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[128, "int32"],
-        argmax_v1: T.Buffer[128, "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer(128, "int32"),
+        argmax_v1: T.Buffer(128, "float32"),
     ) -> None:
         argmax_v0_rf = T.alloc_buffer([128, 16], dtype="int32")
         argmax_v1_rf = T.alloc_buffer([128, 16], dtype="float32")
@@ -216,10 +216,10 @@ def argmax_1(
 
     @T.prim_func
     def argmax_2(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[128, "int32"],
-        argmax_v1: T.Buffer[128, "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer(128, "int32"),
+        argmax_v1: T.Buffer(128, "float32"),
     ) -> None:
         # body
         # with T.block("root")
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_auto_bind.py b/tests/python/unittest/test_meta_schedule_schedule_rule_auto_bind.py
index f0eee4138daaa..a8219ca01a68e 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_auto_bind.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_auto_bind.py
@@ -36,9 +36,9 @@ def element_wise(var_A: T.handle, var_B: T.handle) -> None:
 
 @T.prim_func
 def reduction_loop_only(
-    A: T.Buffer[2, "float32"],
-    B: T.Buffer[2, "float32"],
-    C: T.Buffer[(), "float32"],
+    A: T.Buffer(2, "float32"),
+    B: T.Buffer(2, "float32"),
+    C: T.Buffer((), "float32"),
 ) -> None:
     for i0 in T.serial(2):
         with T.block("C"):
@@ -52,9 +52,9 @@ def reduction_loop_only(
 
 @T.prim_func
 def zero_dim_add(
-    A: T.Buffer[(), "float32"],
-    B: T.Buffer[(), "float32"],
-    C: T.Buffer[(), "float32"],
+    A: T.Buffer((), "float32"),
+    B: T.Buffer((), "float32"),
+    C: T.Buffer((), "float32"),
 ) -> None:
     with T.block("C"):
         vi = T.axis.spatial(1, 0)
@@ -64,8 +64,8 @@ def zero_dim_add(
 def test_cuda_element_wise():
     @T.prim_func
     def elementwise_0(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
     ) -> None:
         # body
         # with T.block("root")
@@ -99,9 +99,9 @@ def elementwise_0(
 def test_cuda_reduction_loop_only():
     @T.prim_func
     def reduction_loop_only_0(
-        A: T.Buffer[2, "float32"],
-        B: T.Buffer[2, "float32"],
-        C: T.Buffer[(), "float32"],
+        A: T.Buffer(2, "float32"),
+        B: T.Buffer(2, "float32"),
+        C: T.Buffer((), "float32"),
     ) -> None:
         for u_fused_0 in T.thread_binding(1, thread="blockIdx.x"):
             for u_fused_1 in T.thread_binding(1, thread="threadIdx.x"):
@@ -132,9 +132,9 @@ def reduction_loop_only_0(
 def test_cuda_zero_dim_add():
     @T.prim_func
     def zero_dim_add_0(
-        A: T.Buffer[(), "float32"],
-        B: T.Buffer[(), "float32"],
-        C: T.Buffer[(), "float32"],
+        A: T.Buffer((), "float32"),
+        B: T.Buffer((), "float32"),
+        C: T.Buffer((), "float32"),
     ) -> None:
         for u_fused_0 in T.thread_binding(1, thread="blockIdx.x"):
             for u_fused_1 in T.thread_binding(1, thread="threadIdx.x"):
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_auto_inline.py b/tests/python/unittest/test_meta_schedule_schedule_rule_auto_inline.py
index 1baa13793f38b..0b2e7fc086be2 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_auto_inline.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_auto_inline.py
@@ -164,7 +164,7 @@ def main(var_X: T.handle, var_W: T.handle, var_B: T.handle, var_bn_scale: T.hand
 @tvm.script.ir_module
 class MultiLevelTiledConv2DAfterInline:
     @T.prim_func
-    def main(X: T.Buffer[(1, 512, 56, 56), "float32"], W: T.Buffer[(512, 512, 3, 3), "float32"], B: T.Buffer[(512, 1, 1), "float32"], bn_scale: T.Buffer[(512, 1, 1), "float32"], bn_offset: T.Buffer[(512, 1, 1), "float32"], compute: T.Buffer[(1, 512, 56, 56), "float32"]) -> None:
+    def main(X: T.Buffer((1, 512, 56, 56), "float32"), W: T.Buffer((512, 512, 3, 3), "float32"), B: T.Buffer((512, 1, 1), "float32"), bn_scale: T.Buffer((512, 1, 1), "float32"), bn_offset: T.Buffer((512, 1, 1), "float32"), compute: T.Buffer((1, 512, 56, 56), "float32")) -> None:
         compute_local = T.alloc_buffer([1, 512, 56, 56], dtype="float32", scope="local")
         for i0_0_i1_0_i2_0_i3_0_fused in T.thread_binding(224, thread="blockIdx.x"):
             for i0_1_i1_1_i2_1_i3_1_fused in T.thread_binding(2, thread="vthread.x"):
@@ -192,7 +192,7 @@ def main(X: T.Buffer[(1, 512, 56, 56), "float32"], W: T.Buffer[(512, 512, 3, 3),
 @tvm.script.ir_module
 class SoftmaxBeforeInline:
     @T.prim_func
-    def main(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def main(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_exp = T.alloc_buffer([256, 256], dtype="float32")
         T_softmax_expsum = T.alloc_buffer([256], dtype="float32")
@@ -221,7 +221,7 @@ def main(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256)
 @tvm.script.ir_module
 class SoftmaxAfterInline:
     @T.prim_func
-    def main(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def main(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_expsum = T.alloc_buffer([256], dtype="float32")
         for i0, i1 in T.grid(256, 256):
@@ -246,10 +246,10 @@ def main(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256)
 class BeforePureSpatial:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 384), "int64"],
-        placeholder_1: T.Buffer[(30522, 768), "float32"],
-        placeholder_2: T.Buffer[(1, 384, 768), "float32"],
-        T_add: T.Buffer[(1, 384, 768), "float32"],
+        placeholder: T.Buffer((1, 384), "int64"),
+        placeholder_1: T.Buffer((30522, 768), "float32"),
+        placeholder_2: T.Buffer((1, 384, 768), "float32"),
+        T_add: T.Buffer((1, 384, 768), "float32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         compile_engine_const = T.alloc_buffer([], dtype="int64")
@@ -310,7 +310,7 @@ def main(
 @tvm.script.ir_module
 class AfterPureSpatial:
     @T.prim_func
-    def main(placeholder: T.Buffer[(1, 384), "int64"], placeholder_1: T.Buffer[(30522, 768), "float32"], placeholder_2: T.Buffer[(1, 384, 768), "float32"], T_add: T.Buffer[(1, 384, 768), "float32"]) -> None:
+    def main(placeholder: T.Buffer((1, 384), "int64"), placeholder_1: T.Buffer((30522, 768), "float32"), placeholder_2: T.Buffer((1, 384, 768), "float32"), T_add: T.Buffer((1, 384, 768), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -325,7 +325,7 @@ def main(placeholder: T.Buffer[(1, 384), "int64"], placeholder_1: T.Buffer[(3052
 @tvm.script.ir_module
 class ConstConsumer:
     @T.prim_func
-    def main(T_full: T.Buffer[(1, 12, 4096), "int64"]) -> None:
+    def main(T_full: T.Buffer((1, 12, 4096), "int64")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -341,7 +341,7 @@ def main(T_full: T.Buffer[(1, 12, 4096), "int64"]) -> None:
 @tvm.script.ir_module
 class Conv2dInt8:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 14, 14, 256), "int8"], p1: T.Buffer[(1024, 1, 1, 256), "int8"], p2: T.Buffer[(1, 1, 1, 1024), "int32"], p3: T.Buffer[(1, 1, 1, 1024), "int32"], p4: T.Buffer[1024, "int32"], p5: T.Buffer[1024, "int32"], p6: T.Buffer[1024, "int32"], p7: T.Buffer[1, "int32"], p8: T.Buffer[(16, 14, 14, 1024), "int32"], compute: T.Buffer[(16, 14, 14, 1024), "int32"]) -> None:
+    def main(p0: T.Buffer((16, 14, 14, 256), "int8"), p1: T.Buffer((1024, 1, 1, 256), "int8"), p2: T.Buffer((1, 1, 1, 1024), "int32"), p3: T.Buffer((1, 1, 1, 1024), "int32"), p4: T.Buffer(1024, "int32"), p5: T.Buffer(1024, "int32"), p6: T.Buffer(1024, "int32"), p7: T.Buffer(1, "int32"), p8: T.Buffer((16, 14, 14, 1024), "int32"), compute: T.Buffer((16, 14, 14, 1024), "int32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_cross_thread_reduction.py b/tests/python/unittest/test_meta_schedule_schedule_rule_cross_thread_reduction.py
index c851c9bec3b5f..489b0ddef0e43 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_cross_thread_reduction.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_cross_thread_reduction.py
@@ -32,7 +32,7 @@
 class Softmax_mn_after_inline:
     @T.prim_func
     def main(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_expsum = T.alloc_buffer([256], dtype="float32")
@@ -63,8 +63,8 @@ def main(
 def test_gpu_softmax_mn():
     @T.prim_func
     def softmax_mn_0(
-        A: T.Buffer[(256, 256), "float32"],
-        T_softmax_norm: T.Buffer[(256, 256), "float32"],
+        A: T.Buffer((256, 256), "float32"),
+        T_softmax_norm: T.Buffer((256, 256), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -107,7 +107,7 @@ def softmax_mn_0(
 
     @T.prim_func
     def softmax_mn_1(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -159,7 +159,7 @@ def softmax_mn_1(
 
     @T.prim_func
     def softmax_mn_2(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -211,7 +211,7 @@ def softmax_mn_2(
 
     @T.prim_func
     def softmax_mn_3(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -299,7 +299,7 @@ def softmax_mn_3(
 def test_gpu_softmax_mn_after_inline():
     @T.prim_func
     def softmax_mn_after_inline_0(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_expsum = T.alloc_buffer([256], dtype="float32")
@@ -334,7 +334,7 @@ def softmax_mn_after_inline_0(
 
     @T.prim_func
     def softmax_mn_after_inline_1(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_expsum = T.alloc_buffer([256], dtype="float32")
@@ -371,7 +371,7 @@ def softmax_mn_after_inline_1(
 
     @T.prim_func
     def softmax_mn_after_inline_2(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         T_softmax_maxelem = T.alloc_buffer([256], dtype="float32")
         T_softmax_expsum_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
@@ -415,7 +415,7 @@ def softmax_mn_after_inline_2(
 
     @T.prim_func
     def softmax_mn_after_inline_3(
-        A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+        A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
     ) -> None:
         T_softmax_maxelem_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
         T_softmax_expsum_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
@@ -498,7 +498,7 @@ def softmax_mn_after_inline_3(
 
 def test_gpu_batch_norm_bmn():
     @T.prim_func
-    def batch_norm_bmn_0(A: T.Buffer[(1, 512, 512), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def batch_norm_bmn_0(A: T.Buffer((1, 512, 512), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -520,7 +520,7 @@ def batch_norm_bmn_0(A: T.Buffer[(1, 512, 512), "float32"], D: T.Buffer[1, "floa
                 D[b] = T.sqrt(C[b], dtype="float32")
 
     @T.prim_func
-    def batch_norm_bmn_1(A: T.Buffer[(1, 512, 512), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def batch_norm_bmn_1(A: T.Buffer((1, 512, 512), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -568,10 +568,10 @@ def batch_norm_bmn_1(A: T.Buffer[(1, 512, 512), "float32"], D: T.Buffer[1, "floa
 
 @T.prim_func
 def argmax(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1 in T.grid(128, 128):
         with T.block("argmax"):
@@ -590,10 +590,10 @@ def argmax(
 
 @T.prim_func
 def argmax_32(
-    idx: T.Buffer[(1, 32), "int32"],
-    val: T.Buffer[(1, 32), "float32"],
-    argmax_v0: T.Buffer[(1,), "int32"],
-    argmax_v1: T.Buffer[(1,), "float32"],
+    idx: T.Buffer((1, 32), "int32"),
+    val: T.Buffer((1, 32), "float32"),
+    argmax_v0: T.Buffer((1,), "int32"),
+    argmax_v1: T.Buffer((1,), "float32"),
 ) -> None:
     for i0, i1 in T.grid(1, 32):
         with T.block("argmax"):
@@ -613,10 +613,10 @@ def argmax_32(
 def test_gpu_argmax():
     @T.prim_func
     def argmax_0(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[128, "int32"],
-        argmax_v1: T.Buffer[128, "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer(128, "int32"),
+        argmax_v1: T.Buffer(128, "float32"),
     ) -> None:
         # body
         # with T.block("root")
@@ -637,10 +637,10 @@ def argmax_0(
 
     @T.prim_func
     def argmax_1(
-        idx: T.Buffer[(128, 128), "int32"],
-        val: T.Buffer[(128, 128), "float32"],
-        argmax_v0: T.Buffer[128, "int32"],
-        argmax_v1: T.Buffer[128, "float32"],
+        idx: T.Buffer((128, 128), "int32"),
+        val: T.Buffer((128, 128), "float32"),
+        argmax_v0: T.Buffer(128, "int32"),
+        argmax_v1: T.Buffer(128, "float32"),
     ) -> None:
         # body
         # with T.block("root")
@@ -686,10 +686,10 @@ def argmax_1(
 def test_gpu_argmax_32():
     @T.prim_func
     def argmax_0(
-        idx: T.Buffer[(1, 32), "int32"],
-        val: T.Buffer[(1, 32), "float32"],
-        argmax_v0: T.Buffer[(1,), "int32"],
-        argmax_v1: T.Buffer[(1,), "float32"],
+        idx: T.Buffer((1, 32), "int32"),
+        val: T.Buffer((1, 32), "float32"),
+        argmax_v0: T.Buffer((1,), "int32"),
+        argmax_v1: T.Buffer((1,), "float32"),
     ) -> None:
         # body
         # with T.block("root")
@@ -710,10 +710,10 @@ def argmax_0(
 
     @T.prim_func
     def argmax_1(
-        idx: T.Buffer[(1, 32), "int32"],
-        val: T.Buffer[(1, 32), "float32"],
-        argmax_v0: T.Buffer[(1,), "int32"],
-        argmax_v1: T.Buffer[(1,), "float32"],
+        idx: T.Buffer((1, 32), "int32"),
+        val: T.Buffer((1, 32), "float32"),
+        argmax_v0: T.Buffer((1,), "int32"),
+        argmax_v1: T.Buffer((1,), "float32"),
     ) -> None:
         # body
         # with T.block("root")
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt.py b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt.py
index f40d9427490d1..66eb819122932 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt.py
@@ -31,9 +31,9 @@
 def test_cpu_matmul():
     @T.prim_func
     def cpu_matmul_0(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        C: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        C: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -62,9 +62,9 @@ def cpu_matmul_0(
 
     @T.prim_func
     def cpu_matmul_1(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        C: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        C: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -93,9 +93,9 @@ def cpu_matmul_1(
 
     @T.prim_func
     def cpu_matmul_2(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        C: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        C: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -149,9 +149,9 @@ def cpu_matmul_2(
 def test_cpu_matmul_relu():
     @T.prim_func
     def cpu_matmul_relu_0(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        compute: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        compute: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -180,9 +180,9 @@ def cpu_matmul_relu_0(
 
     @T.prim_func
     def cpu_matmul_relu_1(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        compute: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        compute: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -211,9 +211,9 @@ def cpu_matmul_relu_1(
 
     @T.prim_func
     def cpu_matmul_relu_2(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        compute: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        compute: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -273,9 +273,9 @@ def cpu_matmul_relu_2(
 def test_cuda_matmul():
     @T.prim_func
     def cuda_matmul_0(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        C: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        C: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -379,9 +379,9 @@ def cuda_matmul_0(
 def test_cuda_matmul_relu():
     @T.prim_func
     def cuda_matmul_relu_0(
-        A: T.Buffer[(512, 512), "float32"],
-        B: T.Buffer[(512, 512), "float32"],
-        compute: T.Buffer[(512, 512), "float32"],
+        A: T.Buffer((512, 512), "float32"),
+        B: T.Buffer((512, 512), "float32"),
+        compute: T.Buffer((512, 512), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -497,8 +497,8 @@ def cuda_matmul_relu_0(
 def test_cuda_sum_with_trivial_block_iter():
     @T.prim_func
     def sum_with_trivial_block_iter(
-        A: T.Buffer[(1, 64, 768), "float32"],
-        B: T.Buffer[(1, 64, 1), "float32"],
+        A: T.Buffer((1, 64, 768), "float32"),
+        B: T.Buffer((1, 64, 1), "float32"),
     ) -> None:
         for i0, i1, i2, i3 in T.grid(1, 64, 1, 768):
             with T.block("sum"):
@@ -523,9 +523,9 @@ def sum_with_trivial_block_iter(
 def test_multi_level_tiling_hexagon():
     @T.prim_func
     def cpu_conv2d_nhwc(
-        inputs: T.Buffer[(1, 56, 56, 64), "float16"],
-        weight: T.Buffer[(3, 3, 64, 64), "float16"],
-        conv2d_nhwc: T.Buffer[(1, 56, 56, 64), "float16"],
+        inputs: T.Buffer((1, 56, 56, 64), "float16"),
+        weight: T.Buffer((3, 3, 64, 64), "float16"),
+        conv2d_nhwc: T.Buffer((1, 56, 56, 64), "float16"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -738,8 +738,8 @@ def test_max_pool_blocked():
     # fmt off
     @T.prim_func
     def pool_blocked_cache_read_write(
-        X: T.Buffer[(1, 2, 8, 8, 8, 8, 32), "uint8"],
-        pool: T.Buffer[(1, 2, 4, 4, 8, 8, 32), "uint8"],
+        X: T.Buffer((1, 2, 8, 8, 8, 8, 32), "uint8"),
+        pool: T.Buffer((1, 2, 4, 4, 8, 8, 32), "uint8"),
     ):
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         pool_global = T.alloc_buffer([1, 2, 4, 4, 8, 8, 32], dtype="uint8")
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_intrin.py b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_intrin.py
index 4667626f1706f..a1c024d287adf 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_intrin.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_intrin.py
@@ -32,9 +32,9 @@
 def test_x86_conv2d_nchwc(intrin=VNNI_INTRIN, target="llvm -mcpu=cascadelake -num-cores=4"):
     @T.prim_func
     def conv2d_nchwc(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i0, i1, i2, i3, i4, i5, i6, i7, i8, i9 in T.grid(1, 16, 56, 56, 16, 1, 1, 4, 4, 4):
@@ -69,7 +69,7 @@ def conv2d_nchwc(
 
     # fmt: off
     @T.prim_func
-    def x86_conv2d_nchwc_0(placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"], placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"], conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"]) -> None:
+    def x86_conv2d_nchwc_0(placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"), placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"), conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         conv2d_NCHWc_int8_global = T.alloc_buffer([1, 16, 56, 56, 16], dtype="int32")
         for i0_0, i1_0, i2_0, i3_0, i4_0_0, i0_1, i1_1, i2_1, i3_1, i4_0_1 in T.grid(1, 8, 28, 56, 1, 1, 2, 1, 1, 1):
@@ -114,7 +114,7 @@ def x86_conv2d_nchwc_0(placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"], place
                     conv2d_NCHWc_int8[v0, v1, v2, v3, v4] = conv2d_NCHWc_int8_global[v0, v1, v2, v3, v4]
 
     @T.prim_func
-    def x86_conv2d_nchwc_1(placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"], placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"], conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"]) -> None:
+    def x86_conv2d_nchwc_1(placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"), placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"), conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         conv2d_NCHWc_int8_global = T.alloc_buffer([1, 16, 56, 56, 16], dtype="int32")
         for i0_0, i1_0, i2_0, i3_0, i4_0_0 in T.grid(1, 8, 28, 56, 1):
@@ -159,7 +159,7 @@ def x86_conv2d_nchwc_1(placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"], place
                     conv2d_NCHWc_int8[v0, v1, v2, v3, v4] = conv2d_NCHWc_int8_global[v0, v1, v2, v3, v4]
 
     @T.prim_func
-    def x86_conv2d_nchwc_2(placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"], placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"], conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"]) -> None:
+    def x86_conv2d_nchwc_2(placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"), placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"), conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i0_0, i1_0, i2_0, i3_0, i4_0_0, i0_1, i1_1, i2_1, i3_1, i4_0_1, i5_0, i6_0, i7_0, i8_0, i9_0_0, i0_2, i1_2, i2_2, i3_2, i4_0_2, i5_1, i6_1, i7_1, i8_1, i9_0_1, i0_3, i1_3, i2_3, i3_3, i4_0_3 in T.grid(1, 8, 28, 56, 1, 1, 2, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 2, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1):
             with T.block("conv2d_NCHWc_int8_o"):
@@ -298,9 +298,9 @@ def _dense(m, n, k, in_dtype, out_dtype):
 def test_dp4a_dense():
     @T.prim_func
     def dp4a_dense_0(
-        X: T.Buffer[(128, 128), "int8"],
-        W: T.Buffer[(128, 128), "int8"],
-        compute: T.Buffer[(128, 128), "int32"],
+        X: T.Buffer((128, 128), "int8"),
+        W: T.Buffer((128, 128), "int8"),
+        compute: T.Buffer((128, 128), "int32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_tc.py b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_tc.py
index 0647699159551..9b869b4436c05 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_tc.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_mlt_tc.py
@@ -81,7 +81,7 @@ def test_matmul_relu(shared_scope):
     intrin_suffix = shared_scope.replace(".", "_")
     # fmt: off
     @T.prim_func
-    def matmul_relu_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128, 128), "float16"], compute: T.Buffer[(128, 128), "float32"]) -> None:
+    def matmul_relu_0(A: T.Buffer((128, 128), "float16"), B: T.Buffer((128, 128), "float16"), compute: T.Buffer((128, 128), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         C_reindex_shared = T.alloc_buffer([128, 128], dtype="float32", scope=shared_scope)
         C_reindex_shared_wmma_accumulator = T.alloc_buffer([128, 128], dtype="float32", scope="wmma.accumulator")
@@ -222,7 +222,7 @@ def matmul_relu_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128, 128), "f
 def test_matmul_relu_with_fallback():
     # fmt: off
     @T.prim_func
-    def matmul_relu_fallback_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128, 128), "float16"], compute: T.Buffer[(128, 128), "float32"]) -> None:
+    def matmul_relu_fallback_0(A: T.Buffer((128, 128), "float16"), B: T.Buffer((128, 128), "float16"), compute: T.Buffer((128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -371,7 +371,7 @@ def test_conv2d(shared_scope):
     intrin_suffix = shared_scope.replace(".", "_")
     # fmt: off
     @T.prim_func
-    def conv2d_0(inputs: T.Buffer[(1, 16, 16, 32), "float16"], weight: T.Buffer[(3, 3, 32, 32), "float16"], conv2d_nhwc: T.Buffer[(1, 16, 16, 32), "float32"]) -> None:
+    def conv2d_0(inputs: T.Buffer((1, 16, 16, 32), "float16"), weight: T.Buffer((3, 3, 32, 32), "float16"), conv2d_nhwc: T.Buffer((1, 16, 16, 32), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         PadInput = T.alloc_buffer([1, 18, 18, 32], dtype="float16")
         conv2d_nhwc_reindex_shared = T.alloc_buffer([256, 32], dtype="float32", scope=shared_scope)
@@ -546,7 +546,7 @@ def test_matmul_relu_pipeline(shared_scope):
     intrin_suffix = shared_scope.replace(".", "_")
     # fmt: off
     @T.prim_func
-    def matmul_relu_pipeline_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128, 128), "float16"], compute: T.Buffer[(128, 128), "float32"]) -> None:
+    def matmul_relu_pipeline_0(A: T.Buffer((128, 128), "float16"), B: T.Buffer((128, 128), "float16"), compute: T.Buffer((128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -696,7 +696,7 @@ def matmul_relu_pipeline_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128,
 def test_matmul_relu_global():
     # fmt: off
     @T.prim_func
-    def matmul_relu_global_0(A: T.Buffer[(128, 128), "float16"], B: T.Buffer[(128, 128), "float16"], compute: T.Buffer[(128, 128), "float32"]) -> None:
+    def matmul_relu_global_0(A: T.Buffer((128, 128), "float16"), B: T.Buffer((128, 128), "float16"), compute: T.Buffer((128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -851,7 +851,7 @@ def test_matmul_relu_non_tensorizable():
 def test_padded_matmul_relu():
     # fmt: off
     @T.prim_func
-    def padded_matmul_relu_0(A: T.Buffer[(127, 127), "float16"], B: T.Buffer[(127, 127), "float16"], compute: T.Buffer[(127, 127), "float32"]) -> None:
+    def padded_matmul_relu_0(A: T.Buffer((127, 127), "float16"), B: T.Buffer((127, 127), "float16"), compute: T.Buffer((127, 127), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -992,7 +992,7 @@ def padded_matmul_relu_0(A: T.Buffer[(127, 127), "float16"], B: T.Buffer[(127, 1
 def test_conv_1x1():
     # fmt: off
     @T.prim_func
-    def conv2d_1x1_0(inputs: T.Buffer[(1, 16, 16, 64), "float16"], weight: T.Buffer[(1, 1, 64, 64), "float16"], conv2d_nhwc: T.Buffer[(1, 16, 16, 64), "float32"]) -> None:
+    def conv2d_1x1_0(inputs: T.Buffer((1, 16, 16, 64), "float16"), weight: T.Buffer((1, 1, 64, 64), "float16"), conv2d_nhwc: T.Buffer((1, 16, 16, 64), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         conv2d_nhwc_reindex_shared = T.alloc_buffer([256, 64], dtype="float32", scope="shared")
         conv2d_nhwc_reindex_shared_wmma_accumulator = T.alloc_buffer([256, 64], dtype="float32", scope="wmma.accumulator")
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_parallel_vectorize_unroll.py b/tests/python/unittest/test_meta_schedule_schedule_rule_parallel_vectorize_unroll.py
index 520dfbfb1cc57..2a0a67d4c7869 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_parallel_vectorize_unroll.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_parallel_vectorize_unroll.py
@@ -67,7 +67,7 @@ def main(a: T.handle, b: T.handle, c: T.handle) -> None:
 @tvm.script.ir_module
 class PureSpatial:
     @T.prim_func
-    def main(placeholder: T.Buffer[(1, 13, 13, 3, 85), "float32"], placeholder_1: T.Buffer[(1, 26, 26, 3, 85), "float32"], placeholder_2: T.Buffer[(1, 52, 52, 3, 85), "float32"], T_expand_dims: T.Buffer[(1, 80, 10647), "float32"]) -> None:
+    def main(placeholder: T.Buffer((1, 13, 13, 3, 85), "float32"), placeholder_1: T.Buffer((1, 26, 26, 3, 85), "float32"), placeholder_2: T.Buffer((1, 52, 52, 3, 85), "float32"), T_expand_dims: T.Buffer((1, 80, 10647), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         T_strided_slice_with_axes = T.alloc_buffer([1, 52, 52, 3, 1], dtype="float32")
         T_sigmoid = T.alloc_buffer([1, 52, 52, 3, 1], dtype="float32")
@@ -224,9 +224,9 @@ def main(placeholder: T.Buffer[(1, 13, 13, 3, 85), "float32"], placeholder_1: T.
 def test_parallel_vectorize_unroll():
     @T.prim_func
     def Matmul_0(
-        A: T.Buffer[(1024, 1024), "float32"],
-        B: T.Buffer[(1024, 1024), "float32"],
-        C: T.Buffer[(1024, 1024), "float32"],
+        A: T.Buffer((1024, 1024), "float32"),
+        B: T.Buffer((1024, 1024), "float32"),
+        C: T.Buffer((1024, 1024), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main"})
diff --git a/tests/python/unittest/test_meta_schedule_schedule_rule_random_compute_location.py b/tests/python/unittest/test_meta_schedule_schedule_rule_random_compute_location.py
index 7c9433cedf50b..2e912af18a6ab 100644
--- a/tests/python/unittest/test_meta_schedule_schedule_rule_random_compute_location.py
+++ b/tests/python/unittest/test_meta_schedule_schedule_rule_random_compute_location.py
@@ -59,8 +59,8 @@ def main(a: T.handle, b: T.handle) -> None:
 def test_random_compute_location():
     @T.prim_func
     def add_0(
-        A: T.Buffer[(2048, 2048, 2048), "float32"],
-        B: T.Buffer[(2048, 2048, 2048), "float32"],
+        A: T.Buffer((2048, 2048, 2048), "float32"),
+        B: T.Buffer((2048, 2048, 2048), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main"})
diff --git a/tests/python/unittest/test_meta_schedule_space_cpu.py b/tests/python/unittest/test_meta_schedule_space_cpu.py
index 47f3e6d4cc51b..93e1bdad44384 100644
--- a/tests/python/unittest/test_meta_schedule_space_cpu.py
+++ b/tests/python/unittest/test_meta_schedule_space_cpu.py
@@ -42,7 +42,7 @@ def _design_space(mod):
 def test_cpu_c1d():
     # fmt: off
     @T.prim_func
-    def c1d_0(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 128), "float32"], conv1d_nlc: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def c1d_0(inputs: T.Buffer((1, 256, 64), "float32"), weight: T.Buffer((3, 64, 128), "float32"), conv1d_nlc: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -80,7 +80,7 @@ def c1d_0(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 12
                         T.writes(conv1d_nlc[v0, v1, v2])
                         conv1d_nlc[v0, v1, v2] = conv1d_nlc_global[v0, v1, v2]
     @T.prim_func
-    def c1d_1(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 128), "float32"], conv1d_nlc: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def c1d_1(inputs: T.Buffer((1, 256, 64), "float32"), weight: T.Buffer((3, 64, 128), "float32"), conv1d_nlc: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -122,7 +122,7 @@ def c1d_1(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 12
                         conv1d_nlc[v0, v1, v2] = conv1d_nlc_global[v0, v1, v2]
 
     @T.prim_func
-    def c1d_2(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 128), "float32"], conv1d_nlc: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def c1d_2(inputs: T.Buffer((1, 256, 64), "float32"), weight: T.Buffer((3, 64, 128), "float32"), conv1d_nlc: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -186,7 +186,7 @@ def c1d_2(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 12
 def test_cpu_c2d():
     # fmt: off
     @T.prim_func
-    def c2d_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def c2d_0(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -232,7 +232,7 @@ def c2d_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
                             T.writes(conv2d_nhwc[v0, v1, v2, v3])
                             conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def c2d_1(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def c2d_1(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -274,7 +274,7 @@ def c2d_1(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
                         T.writes(conv2d_nhwc[v0, v1, v2, v3])
                         conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def c2d_2(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def c2d_2(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -357,7 +357,7 @@ def c2d_2(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
 def test_cpu_c3d():
     # fmt: off
     @T.prim_func
-    def c3d_0(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 7, 3, 64), "float32"], conv3d_ndhwc: T.Buffer[(1, 8, 112, 112, 64), "float32"]) -> None:
+    def c3d_0(inputs: T.Buffer((1, 16, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 7, 3, 64), "float32"), conv3d_ndhwc: T.Buffer((1, 8, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -407,7 +407,7 @@ def c3d_0(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7
                             T.writes(conv3d_ndhwc[v0, v1, v2, v3, v4])
                             conv3d_ndhwc[v0, v1, v2, v3, v4] = conv3d_ndhwc_global[v0, v1, v2, v3, v4]
     @T.prim_func
-    def c3d_1(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 7, 3, 64), "float32"], conv3d_ndhwc: T.Buffer[(1, 8, 112, 112, 64), "float32"]) -> None:
+    def c3d_1(inputs: T.Buffer((1, 16, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 7, 3, 64), "float32"), conv3d_ndhwc: T.Buffer((1, 8, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -457,7 +457,7 @@ def c3d_1(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7
                         T.writes(conv3d_ndhwc[v0, v1, v2, v3, v4])
                         conv3d_ndhwc[v0, v1, v2, v3, v4] = conv3d_ndhwc_global[v0, v1, v2, v3, v4]
     @T.prim_func
-    def c3d_2(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 7, 3, 64), "float32"], conv3d_ndhwc: T.Buffer[(1, 8, 112, 112, 64), "float32"]) -> None:
+    def c3d_2(inputs: T.Buffer((1, 16, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 7, 3, 64), "float32"), conv3d_ndhwc: T.Buffer((1, 8, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -549,7 +549,7 @@ def c3d_2(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7
 def test_cpu_cap():
     # fmt: off
     @T.prim_func
-    def cap_0(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(3, 3, 4, 4, 32, 32), "float32"], conv2d_capsule_nhwijc: T.Buffer[(1, 8, 8, 4, 4, 32), "float32"]) -> None:
+    def cap_0(inputs: T.Buffer((1, 16, 16, 4, 4, 32), "float32"), weight: T.Buffer((3, 3, 4, 4, 32, 32), "float32"), conv2d_capsule_nhwijc: T.Buffer((1, 8, 8, 4, 4, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -600,7 +600,7 @@ def cap_0(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(
                             T.writes(conv2d_capsule_nhwijc[v0, v1, v2, v3, v4, v5])
                             conv2d_capsule_nhwijc[v0, v1, v2, v3, v4, v5] = conv2d_capsule_nhwijc_global[v0, v1, v2, v3, v4, v5]
     @T.prim_func
-    def cap_1(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(3, 3, 4, 4, 32, 32), "float32"], conv2d_capsule_nhwijc: T.Buffer[(1, 8, 8, 4, 4, 32), "float32"]) -> None:
+    def cap_1(inputs: T.Buffer((1, 16, 16, 4, 4, 32), "float32"), weight: T.Buffer((3, 3, 4, 4, 32, 32), "float32"), conv2d_capsule_nhwijc: T.Buffer((1, 8, 8, 4, 4, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -648,7 +648,7 @@ def cap_1(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(
                         T.writes(conv2d_capsule_nhwijc[v0, v1, v2, v3, v4, v5])
                         conv2d_capsule_nhwijc[v0, v1, v2, v3, v4, v5] = conv2d_capsule_nhwijc_global[v0, v1, v2, v3, v4, v5]
     @T.prim_func
-    def cap_2(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(3, 3, 4, 4, 32, 32), "float32"], conv2d_capsule_nhwijc: T.Buffer[(1, 8, 8, 4, 4, 32), "float32"]) -> None:
+    def cap_2(inputs: T.Buffer((1, 16, 16, 4, 4, 32), "float32"), weight: T.Buffer((3, 3, 4, 4, 32, 32), "float32"), conv2d_capsule_nhwijc: T.Buffer((1, 8, 8, 4, 4, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -737,7 +737,7 @@ def cap_2(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(
 def test_cpu_dep():
     # fmt: off
     @T.prim_func
-    def dep_0(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.Buffer[(1, 3, 3, 32), "float32"], depth_conv2d_nhwc: T.Buffer[(1, 112, 112, 32), "float32"]) -> None:
+    def dep_0(placeholder: T.Buffer((1, 112, 112, 32), "float32"), placeholder_1: T.Buffer((1, 3, 3, 32), "float32"), depth_conv2d_nhwc: T.Buffer((1, 112, 112, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -778,7 +778,7 @@ def dep_0(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.
                         T.writes(depth_conv2d_nhwc[v0, v1, v2, v3])
                         depth_conv2d_nhwc[v0, v1, v2, v3] = depth_conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def dep_1(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.Buffer[(1, 3, 3, 32), "float32"], depth_conv2d_nhwc: T.Buffer[(1, 112, 112, 32), "float32"]) -> None:
+    def dep_1(placeholder: T.Buffer((1, 112, 112, 32), "float32"), placeholder_1: T.Buffer((1, 3, 3, 32), "float32"), depth_conv2d_nhwc: T.Buffer((1, 112, 112, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -816,7 +816,7 @@ def dep_1(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.
                         T.writes(depth_conv2d_nhwc[v0, v1, v2, v3])
                         depth_conv2d_nhwc[v0, v1, v2, v3] = depth_conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def dep_2(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.Buffer[(1, 3, 3, 32), "float32"], depth_conv2d_nhwc: T.Buffer[(1, 112, 112, 32), "float32"]) -> None:
+    def dep_2(placeholder: T.Buffer((1, 112, 112, 32), "float32"), placeholder_1: T.Buffer((1, 3, 3, 32), "float32"), depth_conv2d_nhwc: T.Buffer((1, 112, 112, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -892,7 +892,7 @@ def dep_2(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.
 def test_cpu_dil():
     # fmt: off
     @T.prim_func
-    def dil_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 109, 109, 64), "float32"]) -> None:
+    def dil_0(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 109, 109, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -937,7 +937,7 @@ def dil_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
                         T.writes(conv2d_nhwc[v0, v1, v2, v3])
                         conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def dil_1(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 109, 109, 64), "float32"]) -> None:
+    def dil_1(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 109, 109, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -983,7 +983,7 @@ def dil_1(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
                         T.writes(conv2d_nhwc[v0, v1, v2, v3])
                         conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def dil_2(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 109, 109, 64), "float32"]) -> None:
+    def dil_2(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 109, 109, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1065,7 +1065,7 @@ def dil_2(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
 def test_cpu_gmm():
     # fmt: off
     @T.prim_func
-    def gmm_0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def gmm_0(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1096,7 +1096,7 @@ def gmm_0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "flo
                         T.writes(Z[v0, v1, v2])
                         Z[v0, v1, v2] = Z_global[v0, v1, v2]
     @T.prim_func
-    def gmm_1(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def gmm_1(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1127,7 +1127,7 @@ def gmm_1(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "flo
                         T.writes(Z[v0, v1, v2])
                         Z[v0, v1, v2] = Z_global[v0, v1, v2]
     @T.prim_func
-    def gmm_2(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def gmm_2(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1182,7 +1182,7 @@ def gmm_2(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "flo
 def test_cpu_grp():
     # fmt: off
     @T.prim_func
-    def grp_0(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3, 16, 128), "float32"], conv2d_nhwc: T.Buffer[(1, 28, 28, 128), "float32"]) -> None:
+    def grp_0(inputs: T.Buffer((1, 56, 56, 64), "float32"), weight: T.Buffer((3, 3, 16, 128), "float32"), conv2d_nhwc: T.Buffer((1, 28, 28, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1228,7 +1228,7 @@ def grp_0(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3,
                             T.writes(conv2d_nhwc[v0, v1, v2, v3])
                             conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def grp_1(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3, 16, 128), "float32"], conv2d_nhwc: T.Buffer[(1, 28, 28, 128), "float32"]) -> None:
+    def grp_1(inputs: T.Buffer((1, 56, 56, 64), "float32"), weight: T.Buffer((3, 3, 16, 128), "float32"), conv2d_nhwc: T.Buffer((1, 28, 28, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1270,7 +1270,7 @@ def grp_1(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3,
                         T.writes(conv2d_nhwc[v0, v1, v2, v3])
                         conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def grp_2(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3, 16, 128), "float32"], conv2d_nhwc: T.Buffer[(1, 28, 28, 128), "float32"]) -> None:
+    def grp_2(inputs: T.Buffer((1, 56, 56, 64), "float32"), weight: T.Buffer((3, 3, 16, 128), "float32"), conv2d_nhwc: T.Buffer((1, 28, 28, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1351,7 +1351,7 @@ def grp_2(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3,
 def test_cpu_t2d():
     # fmt: off
     @T.prim_func
-    def t2d_0(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def t2d_0(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1393,7 +1393,7 @@ def t2d_0(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 5
                         T.writes(conv2d_transpose_nhwc[v0, v1, v2, v3])
                         conv2d_transpose_nhwc[v0, v1, v2, v3] = conv2d_transpose_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def t2d_1(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def t2d_1(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1436,7 +1436,7 @@ def t2d_1(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 5
                         T.writes(conv2d_transpose_nhwc[v0, v1, v2, v3])
                         conv2d_transpose_nhwc[v0, v1, v2, v3] = conv2d_transpose_nhwc_global[v0, v1, v2, v3]
     @T.prim_func
-    def t2d_2(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def t2d_2(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1507,7 +1507,7 @@ def t2d_2(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 5
 def test_cpu_nrm():
     # fmt: off
     @T.prim_func
-    def nrm_0(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def nrm_0(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1540,7 +1540,7 @@ def nrm_0(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> N
                     T.writes(D[b])
                     D[b] = T.sqrt(C[b], dtype="float32")
     @T.prim_func
-    def nrm_1(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def nrm_1(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1573,7 +1573,7 @@ def nrm_1(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> N
                     T.writes(D[b])
                     D[b] = T.sqrt(C[b], dtype="float32")
     @T.prim_func
-    def nrm_2(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def nrm_2(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1626,7 +1626,7 @@ def nrm_2(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> N
 def test_cpu_sfm():
     # fmt: off
     @T.prim_func
-    def sfm_0(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_0(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1679,7 +1679,7 @@ def sfm_0(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_7, i1_2] = T.exp(A[i0_7, i1_2] - T_softmax_maxelem[i0_7], dtype="float32") / T_softmax_expsum[i0_7]
     @T.prim_func
-    def sfm_1(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_1(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1742,7 +1742,7 @@ def sfm_1(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_9, i1_2] = T_softmax_exp[i0_9, i1_2] / T_softmax_expsum[i0_9]
     @T.prim_func
-    def sfm_2(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_2(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1785,7 +1785,7 @@ def sfm_2(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_6, i1_2] = T.exp(A[i0_6, i1_2] - T_softmax_maxelem[i0_6], dtype="float32") / T_softmax_expsum[i0_6]
     @T.prim_func
-    def sfm_3(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_3(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1852,7 +1852,7 @@ def sfm_3(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_6, i1_2] = T_softmax_exp[i0_6, i1_2] / T_softmax_expsum[i0_6]
     @T.prim_func
-    def sfm_4(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_4(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1914,7 +1914,7 @@ def sfm_4(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_10, i1_4] = T_softmax_exp[i0_10, i1_4] / T_softmax_expsum[i0_10]
     @T.prim_func
-    def sfm_5(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_5(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1971,7 +1971,7 @@ def sfm_5(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                         T.block_attr({"axis":1})
                         T_softmax_norm[i0_5, i1_1] = T_softmax_exp[i0_5, i1_1] / T_softmax_expsum[i0_5]
     @T.prim_func
-    def sfm_6(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_6(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2017,7 +2017,7 @@ def sfm_6(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_6, i1_1] = T.exp(A[i0_6, i1_1] - T_softmax_maxelem[i0_6], dtype="float32") / T_softmax_expsum[i0_6]
     @T.prim_func
-    def sfm_7(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_7(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2061,7 +2061,7 @@ def sfm_7(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                     T.block_attr({"axis":1})
                     T_softmax_norm[i0_5, i1_2] = T.exp(A[i0_5, i1_2] - T_softmax_maxelem[i0_5], dtype="float32") / T_softmax_expsum[i0_5]
     @T.prim_func
-    def sfm_8(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_8(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2205,7 +2205,7 @@ def sfm_8(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
 def test_cpu_cbr():
     # fmt: off
     @T.prim_func
-    def cbr_0(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3, 64), "float32"], bias: T.Buffer[64, "float32"], bn_offset: T.Buffer[64, "float32"], bn_scale: T.Buffer[64, "float32"], compute: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def cbr_0(data: T.Buffer((1, 224, 224, 3), "float32"), kernel: T.Buffer((7, 7, 3, 64), "float32"), bias: T.Buffer(64, "float32"), bn_offset: T.Buffer(64, "float32"), bn_scale: T.Buffer(64, "float32"), compute: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2236,7 +2236,7 @@ def cbr_0(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3
                     T.writes(compute[i0_4, i1_4, i2_4, i3_4])
                     compute[i0_4, i1_4, i2_4, i3_4] = T.max((Conv2dOutput[i0_4, i1_4, i2_4, i3_4] + bias[i3_4]) * bn_scale[i3_4] + bn_offset[i3_4], T.float32(0))
     @T.prim_func
-    def cbr_1(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3, 64), "float32"], bias: T.Buffer[64, "float32"], bn_offset: T.Buffer[64, "float32"], bn_scale: T.Buffer[64, "float32"], compute: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def cbr_1(data: T.Buffer((1, 224, 224, 3), "float32"), kernel: T.Buffer((7, 7, 3, 64), "float32"), bias: T.Buffer(64, "float32"), bn_offset: T.Buffer(64, "float32"), bn_scale: T.Buffer(64, "float32"), compute: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2282,7 +2282,7 @@ def cbr_1(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3
                             T.writes(compute[i0, i1, i2, i3])
                             compute[i0, i1, i2, i3] = T.max((Conv2dOutput[i0, i1, i2, i3] + bias[i3]) * bn_scale[i3] + bn_offset[i3], T.float32(0))
     @T.prim_func
-    def cbr_2(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3, 64), "float32"], bias: T.Buffer[64, "float32"], bn_offset: T.Buffer[64, "float32"], bn_scale: T.Buffer[64, "float32"], compute: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def cbr_2(data: T.Buffer((1, 224, 224, 3), "float32"), kernel: T.Buffer((7, 7, 3, 64), "float32"), bias: T.Buffer(64, "float32"), bn_offset: T.Buffer(64, "float32"), bn_scale: T.Buffer(64, "float32"), compute: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2374,7 +2374,7 @@ def cbr_2(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3
 def test_cpu_tbg():
     # fmt: off
     @T.prim_func
-    def tbg_0(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128, 12, 64), "float32"], C: T.Buffer[(1, 12, 128, 128), "float32"]) -> None:
+    def tbg_0(query: T.Buffer((1, 128, 12, 64), "float32"), value: T.Buffer((1, 128, 12, 64), "float32"), C: T.Buffer((1, 12, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2428,7 +2428,7 @@ def tbg_0(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128,
                             T.writes(C[v0, v1, v2, v3])
                             C[v0, v1, v2, v3] = C_global[v0, v1, v2, v3]
     @T.prim_func
-    def tbg_1(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128, 12, 64), "float32"], C: T.Buffer[(1, 12, 128, 128), "float32"]) -> None:
+    def tbg_1(query: T.Buffer((1, 128, 12, 64), "float32"), value: T.Buffer((1, 128, 12, 64), "float32"), C: T.Buffer((1, 12, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -2477,7 +2477,7 @@ def tbg_1(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128,
                         T.writes(C[v0, v1, v2, v3])
                         C[v0, v1, v2, v3] = C_global[v0, v1, v2, v3]
     @T.prim_func
-    def tbg_2(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128, 12, 64), "float32"], C: T.Buffer[(1, 12, 128, 128), "float32"]) -> None:
+    def tbg_2(query: T.Buffer((1, 128, 12, 64), "float32"), value: T.Buffer((1, 128, 12, 64), "float32"), C: T.Buffer((1, 12, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
diff --git a/tests/python/unittest/test_meta_schedule_space_cpu_winograd.py b/tests/python/unittest/test_meta_schedule_space_cpu_winograd.py
index 135304286b4b3..1da2794a4cc63 100644
--- a/tests/python/unittest/test_meta_schedule_space_cpu_winograd.py
+++ b/tests/python/unittest/test_meta_schedule_space_cpu_winograd.py
@@ -41,7 +41,7 @@ def _design_space(mod):
 def test_cpu_nhwc():
     # fmt: off
     @T.prim_func
-    def cpu_nhwc_0(X: T.Buffer[(1, 14, 14, 128), "float32"], W: T.Buffer[(6, 6, 128, 128), "float32"], conv2d_winograd: T.Buffer[(1, 12, 12, 128), "float32"]) -> None:
+    def cpu_nhwc_0(X: T.Buffer((1, 14, 14, 128), "float32"), W: T.Buffer((6, 6, 128, 128), "float32"), conv2d_winograd: T.Buffer((1, 12, 12, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
diff --git a/tests/python/unittest/test_meta_schedule_space_cuda.py b/tests/python/unittest/test_meta_schedule_space_cuda.py
index 0a518c840d110..241fe63e1da00 100644
--- a/tests/python/unittest/test_meta_schedule_space_cuda.py
+++ b/tests/python/unittest/test_meta_schedule_space_cuda.py
@@ -42,7 +42,7 @@ def _design_space(mod):
 def test_cuda_c1d():
     # fmt: off
     @T.prim_func
-    def c1d_0(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 128), "float32"], conv1d_nlc: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def c1d_0(inputs: T.Buffer((1, 256, 64), "float32"), weight: T.Buffer((3, 64, 128), "float32"), conv1d_nlc: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -122,7 +122,7 @@ def c1d_0(inputs: T.Buffer[(1, 256, 64), "float32"], weight: T.Buffer[(3, 64, 12
 def test_cuda_c2d():
     # fmt: off
     @T.prim_func
-    def c2d_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def c2d_0(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
             T.reads()
@@ -206,7 +206,7 @@ def c2d_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
 def test_cuda_c3d():
     # fmt: off
     @T.prim_func
-    def c3d_0(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 7, 3, 64), "float32"], conv3d_ndhwc: T.Buffer[(1, 8, 112, 112, 64), "float32"]) -> None:
+    def c3d_0(inputs: T.Buffer((1, 16, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 7, 3, 64), "float32"), conv3d_ndhwc: T.Buffer((1, 8, 112, 112, 64), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
             T.reads()
@@ -296,7 +296,7 @@ def c3d_0(inputs: T.Buffer[(1, 16, 224, 224, 3), "float32"], weight: T.Buffer[(7
 def test_cuda_cap():
     # fmt: off
     @T.prim_func
-    def cap_0(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(3, 3, 4, 4, 32, 32), "float32"], conv2d_capsule_nhwijc: T.Buffer[(1, 8, 8, 4, 4, 32), "float32"]) -> None:
+    def cap_0(inputs: T.Buffer((1, 16, 16, 4, 4, 32), "float32"), weight: T.Buffer((3, 3, 4, 4, 32, 32), "float32"), conv2d_capsule_nhwijc: T.Buffer((1, 8, 8, 4, 4, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -392,7 +392,7 @@ def cap_0(inputs: T.Buffer[(1, 16, 16, 4, 4, 32), "float32"], weight: T.Buffer[(
 def test_cuda_dep():
     # fmt: off
     @T.prim_func
-    def dep_0(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.Buffer[(1, 3, 3, 32), "float32"], depth_conv2d_nhwc: T.Buffer[(1, 112, 112, 32), "float32"]) -> None:
+    def dep_0(placeholder: T.Buffer((1, 112, 112, 32), "float32"), placeholder_1: T.Buffer((1, 3, 3, 32), "float32"), depth_conv2d_nhwc: T.Buffer((1, 112, 112, 32), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -475,7 +475,7 @@ def dep_0(placeholder: T.Buffer[(1, 112, 112, 32), "float32"], placeholder_1: T.
 def test_cuda_dil():
     # fmt: off
     @T.prim_func
-    def dil_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7, 3, 64), "float32"], conv2d_nhwc: T.Buffer[(1, 109, 109, 64), "float32"]) -> None:
+    def dil_0(inputs: T.Buffer((1, 224, 224, 3), "float32"), weight: T.Buffer((7, 7, 3, 64), "float32"), conv2d_nhwc: T.Buffer((1, 109, 109, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -558,7 +558,7 @@ def dil_0(inputs: T.Buffer[(1, 224, 224, 3), "float32"], weight: T.Buffer[(7, 7,
 def test_cuda_gmm():
     # fmt: off
     @T.prim_func
-    def gmm_0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "float32"], Z: T.Buffer[(1, 128, 128), "float32"]) -> None:
+    def gmm_0(X: T.Buffer((1, 128, 128), "float32"), Y: T.Buffer((1, 128, 128), "float32"), Z: T.Buffer((1, 128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -634,7 +634,7 @@ def gmm_0(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "flo
 def test_cuda_grp():
     # fmt: off
     @T.prim_func
-    def grp_0(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3, 16, 128), "float32"], conv2d_nhwc: T.Buffer[(1, 28, 28, 128), "float32"]) -> None:
+    def grp_0(inputs: T.Buffer((1, 56, 56, 64), "float32"), weight: T.Buffer((3, 3, 16, 128), "float32"), conv2d_nhwc: T.Buffer((1, 28, 28, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -718,7 +718,7 @@ def grp_0(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3,
 def test_cuda_t2d():
     # fmt: off
     @T.prim_func
-    def t2d_0(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def t2d_0(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -804,7 +804,7 @@ def t2d_0(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 5
 def test_cuda_nrm():
     # fmt: off
     @T.prim_func
-    def nrm_0(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def nrm_0(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -832,7 +832,7 @@ def nrm_0(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> N
                         T.writes(D[b])
                         D[b] = T.sqrt(C[b], dtype="float32")
     @T.prim_func
-    def nrm_1(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> None:
+    def nrm_1(A: T.Buffer((1, 256, 256), "float32"), D: T.Buffer(1, "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -881,7 +881,7 @@ def nrm_1(A: T.Buffer[(1, 256, 256), "float32"], D: T.Buffer[1, "float32"]) -> N
 def test_cuda_sfm():
     # fmt: off
     @T.prim_func
-    def sfm_0(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_0(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -923,7 +923,7 @@ def sfm_0(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                         T.block_attr({"axis":1})
                         T_softmax_norm[i0, i1] = T.exp(A[i0, i1] - T_softmax_maxelem[i0], dtype="float32") / T_softmax_expsum[i0]
     @T.prim_func
-    def sfm_1(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_1(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -965,7 +965,7 @@ def sfm_1(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                         T.block_attr({"axis":1})
                         T_softmax_norm[i0, i1] = T.exp(A[i0, i1] - T_softmax_maxelem[i0], dtype="float32") / T_softmax_expsum[i0]
     @T.prim_func
-    def sfm_2(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_2(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1009,7 +1009,7 @@ def sfm_2(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
                             T.block_attr({"axis":1})
                             T_softmax_norm[i0, i1] = T.exp(A[i0, i1] - T_softmax_maxelem[i0], dtype="float32") / T_softmax_expsum_shared[i0]
     @T.prim_func
-    def sfm_3(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]) -> None:
+    def sfm_3(A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1088,7 +1088,7 @@ def sfm_3(A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256
 def test_cuda_cbr():
     # fmt: off
     @T.prim_func
-    def cbr_0(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3, 64), "float32"], bias: T.Buffer[64, "float32"], bn_offset: T.Buffer[64, "float32"], bn_scale: T.Buffer[64, "float32"], compute: T.Buffer[(1, 112, 112, 64), "float32"]) -> None:
+    def cbr_0(data: T.Buffer((1, 224, 224, 3), "float32"), kernel: T.Buffer((7, 7, 3, 64), "float32"), bias: T.Buffer(64, "float32"), bn_offset: T.Buffer(64, "float32"), bn_scale: T.Buffer(64, "float32"), compute: T.Buffer((1, 112, 112, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1173,7 +1173,7 @@ def cbr_0(data: T.Buffer[(1, 224, 224, 3), "float32"], kernel: T.Buffer[(7, 7, 3
 def test_cuda_tbg():
     # fmt: off
     @T.prim_func
-    def tbg_0(query: T.Buffer[(1, 128, 12, 64), "float32"], value: T.Buffer[(1, 128, 12, 64), "float32"], C: T.Buffer[(1, 12, 128, 128), "float32"]) -> None:
+    def tbg_0(query: T.Buffer((1, 128, 12, 64), "float32"), value: T.Buffer((1, 128, 12, 64), "float32"), C: T.Buffer((1, 12, 128, 128), "float32")) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
             T.reads()
diff --git a/tests/python/unittest/test_meta_schedule_space_cuda_winograd.py b/tests/python/unittest/test_meta_schedule_space_cuda_winograd.py
index 53a153b905226..87a8fcac98006 100644
--- a/tests/python/unittest/test_meta_schedule_space_cuda_winograd.py
+++ b/tests/python/unittest/test_meta_schedule_space_cuda_winograd.py
@@ -42,7 +42,7 @@ def _design_space(mod):
 def test_cuda_nhwc():
     # fmt: off
     @T.prim_func
-    def cuda_nhwc_0(data: T.Buffer[(1, 14, 14, 128), "float32"], weight: T.Buffer[(6, 6, 128, 128), "float32"], conv2d_winograd: T.Buffer[(1, 12, 12, 128), "float32"]) -> None:
+    def cuda_nhwc_0(data: T.Buffer((1, 14, 14, 128), "float32"), weight: T.Buffer((6, 6, 128, 128), "float32"), conv2d_winograd: T.Buffer((1, 12, 12, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -200,7 +200,7 @@ def cuda_nhwc_0(data: T.Buffer[(1, 14, 14, 128), "float32"], weight: T.Buffer[(6
 def test_cuda_nchw():
     # fmt: off
     @T.prim_func
-    def cuda_nchw_0(data: T.Buffer[(1, 64, 56, 56), "float32"], weight: T.Buffer[(6, 6, 64, 64), "float32"], conv2d_winograd: T.Buffer[(1, 64, 56, 56), "float32"]) -> None:
+    def cuda_nchw_0(data: T.Buffer((1, 64, 56, 56), "float32"), weight: T.Buffer((6, 6, 64, 64), "float32"), conv2d_winograd: T.Buffer((1, 64, 56, 56), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -353,7 +353,7 @@ def cuda_nchw_0(data: T.Buffer[(1, 64, 56, 56), "float32"], weight: T.Buffer[(6,
 def test_cuda_nchw_add_relu():
     # fmt: off
     @T.prim_func
-    def nchw_add_relu(p0: T.Buffer[(2, 2048, 50, 75), "float32"], p1: T.Buffer[(4, 4, 2048, 2048), "float32"], p2: T.Buffer[(1, 2048, 1, 1), "float32"], T_relu: T.Buffer[(2, 2048, 50, 75), "float32"]):
+    def nchw_add_relu(p0: T.Buffer((2, 2048, 50, 75), "float32"), p1: T.Buffer((4, 4, 2048, 2048), "float32"), p2: T.Buffer((1, 2048, 1, 1), "float32"), T_relu: T.Buffer((2, 2048, 50, 75), "float32")):
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -440,7 +440,7 @@ def nchw_add_relu(p0: T.Buffer[(2, 2048, 50, 75), "float32"], p1: T.Buffer[(4, 4
                 T_relu[ax0, ax1, ax2, ax3] = T.max(T_add[ax0, ax1, ax2, ax3], T.float32(0))
 
     @T.prim_func
-    def nchw_add_relu_scheduled(p0: T.Buffer[(2, 2048, 50, 75), "float32"], p1: T.Buffer[(4, 4, 2048, 2048), "float32"], p2: T.Buffer[(1, 2048, 1, 1), "float32"], T_relu: T.Buffer[(2, 2048, 50, 75), "float32"]):
+    def nchw_add_relu_scheduled(p0: T.Buffer((2, 2048, 50, 75), "float32"), p1: T.Buffer((4, 4, 2048, 2048), "float32"), p2: T.Buffer((1, 2048, 1, 1), "float32"), T_relu: T.Buffer((2, 2048, 50, 75), "float32")):
         # function attr dict
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
         # body
diff --git a/tests/python/unittest/test_meta_schedule_trace_apply.py b/tests/python/unittest/test_meta_schedule_trace_apply.py
index c242f63b98ea4..ae65cc1a815bd 100644
--- a/tests/python/unittest/test_meta_schedule_trace_apply.py
+++ b/tests/python/unittest/test_meta_schedule_trace_apply.py
@@ -33,9 +33,9 @@
 class Dense:
     @T.prim_func
     def main(
-        p0: T.Buffer[(128, 128), "float32"],
-        p1: T.Buffer[(128, 128), "float32"],
-        T_matmul_NT: T.Buffer[(128, 128), "float32"],
+        p0: T.Buffer((128, 128), "float32"),
+        p1: T.Buffer((128, 128), "float32"),
+        T_matmul_NT: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
@@ -56,9 +56,9 @@ def main(
 class DenseAdd:
     @T.prim_func
     def main(
-        p0: T.Buffer[(128, 128), "float32"],
-        p1: T.Buffer[(128, 128), "float32"],
-        T_add: T.Buffer[(128, 128), "float32"],
+        p0: T.Buffer((128, 128), "float32"),
+        p1: T.Buffer((128, 128), "float32"),
+        T_add: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
@@ -92,9 +92,9 @@ def main(
 class DenseAdd_scheduled_cpu:
     @T.prim_func
     def main(
-        p0: T.Buffer[(128, 128), "float32"],
-        p1: T.Buffer[(128, 128), "float32"],
-        T_add: T.Buffer[(128, 128), "float32"],
+        p0: T.Buffer((128, 128), "float32"),
+        p1: T.Buffer((128, 128), "float32"),
+        T_add: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
@@ -174,7 +174,7 @@ def main(
 @tvm.script.ir_module
 class DenseAdd_cpu_no_write_cache:
     @T.prim_func
-    def main(p0: T.Buffer[(128, 128), "float32"], p1: T.Buffer[(128, 128), "float32"], T_add: T.Buffer[(128, 128), "float32"]) -> None:
+    def main(p0: T.Buffer((128, 128), "float32"), p1: T.Buffer((128, 128), "float32"), T_add: T.Buffer((128, 128), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -221,9 +221,9 @@ def main(p0: T.Buffer[(128, 128), "float32"], p1: T.Buffer[(128, 128), "float32"
 class DenseAdd_scheduled_gpu:
     @T.prim_func
     def main(
-        p0: T.Buffer[(128, 128), "float32"],
-        p1: T.Buffer[(128, 128), "float32"],
-        T_add: T.Buffer[(128, 128), "float32"],
+        p0: T.Buffer((128, 128), "float32"),
+        p1: T.Buffer((128, 128), "float32"),
+        T_add: T.Buffer((128, 128), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
@@ -374,7 +374,7 @@ def main(
 @tvm.script.ir_module
 class Conv2dInt8:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[(1, 1, 1, 256), "int64"], p5: T.Buffer[(1, 1, 1, 256), "int64"], p6: T.Buffer[(1, 1, 1, 256), "int64"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], compute: T.Buffer[(16, 56, 56, 256), "int32"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer((1, 1, 1, 256), "int64"), p5: T.Buffer((1, 1, 1, 256), "int64"), p6: T.Buffer((1, 1, 1, 256), "int64"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), compute: T.Buffer((16, 56, 56, 256), "int32")) -> None:
         # function attr dict
         T.func_attr({"tir.noalias": True, "global_symbol": "main"})
         # body
@@ -490,7 +490,7 @@ def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "
 @tvm.script.ir_module
 class Conv2dInt8_target:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[(1, 1, 1, 256), "int64"], p5: T.Buffer[(1, 1, 1, 256), "int64"], p6: T.Buffer[(1, 1, 1, 256), "int64"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], p9: T.Buffer[(16, 56, 56, 256), "int32"], compute: T.Buffer[(16, 56, 56, 256), "uint8"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer((1, 1, 1, 256), "int64"), p5: T.Buffer((1, 1, 1, 256), "int64"), p6: T.Buffer((1, 1, 1, 256), "int64"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), p9: T.Buffer((16, 56, 56, 256), "int32"), compute: T.Buffer((16, 56, 56, 256), "uint8")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -634,7 +634,7 @@ def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "
 @tvm.script.ir_module
 class Conv2dInt8_tensorcore_scheduled:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[(1, 1, 1, 256), "int64"], p5: T.Buffer[(1, 1, 1, 256), "int64"], p6: T.Buffer[(1, 1, 1, 256), "int64"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], p9: T.Buffer[(16, 56, 56, 256), "int32"], compute: T.Buffer[(16, 56, 56, 256), "uint8"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer((1, 1, 1, 256), "int64"), p5: T.Buffer((1, 1, 1, 256), "int64"), p6: T.Buffer((1, 1, 1, 256), "int64"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), p9: T.Buffer((16, 56, 56, 256), "int32"), compute: T.Buffer((16, 56, 56, 256), "uint8")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         A_s0 = T.var("int32")
@@ -756,7 +756,7 @@ def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "
 @tvm.script.ir_module
 class Conv2dInt8_NCHWc:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 32, 7, 7, 16), "uint8"], p1: T.Buffer[(128, 32, 1, 1, 4, 16, 4), "int8"], p2: T.Buffer[(1, 128, 1, 1, 16), "int32"], p3: T.Buffer[(1, 128, 1, 1, 16), "float32"], p4: T.Buffer[1, "float32"], p5: T.Buffer[(1, 128, 7, 7, 16), "int32"], compute: T.Buffer[(1, 128, 7, 7, 16), "uint8"]) -> None:
+    def main(p0: T.Buffer((1, 32, 7, 7, 16), "uint8"), p1: T.Buffer((128, 32, 1, 1, 4, 16, 4), "int8"), p2: T.Buffer((1, 128, 1, 1, 16), "int32"), p3: T.Buffer((1, 128, 1, 1, 16), "float32"), p4: T.Buffer(1, "float32"), p5: T.Buffer((1, 128, 7, 7, 16), "int32"), compute: T.Buffer((1, 128, 7, 7, 16), "uint8")) -> None:
         # function attr dict
         T.func_attr({"tir.noalias": True, "global_symbol": "main"})
         # body
@@ -919,7 +919,7 @@ def main(p0: T.Buffer[(1, 32, 7, 7, 16), "uint8"], p1: T.Buffer[(128, 32, 1, 1,
 @tvm.script.ir_module
 class Conv2dInt8_NCHWc_target:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 32, 7, 7, 16), "uint8"], p1: T.Buffer[(128, 32, 1, 1, 4, 16, 4), "int8"], p2: T.Buffer[(1, 128, 1, 1, 16), "int32"], p3: T.Buffer[(1, 128, 1, 1, 16), "float32"], p4: T.Buffer[1, "float32"], p5: T.Buffer[(1, 128, 7, 7, 16), "uint8"], T_cast: T.Buffer[(1, 128, 7, 7, 16), "int32"]) -> None:
+    def main(p0: T.Buffer((1, 32, 7, 7, 16), "uint8"), p1: T.Buffer((128, 32, 1, 1, 4, 16, 4), "int8"), p2: T.Buffer((1, 128, 1, 1, 16), "int32"), p3: T.Buffer((1, 128, 1, 1, 16), "float32"), p4: T.Buffer(1, "float32"), p5: T.Buffer((1, 128, 7, 7, 16), "uint8"), T_cast: T.Buffer((1, 128, 7, 7, 16), "int32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1137,7 +1137,7 @@ def get_conv2d_vnni_mod(intrin_id):
     @tvm.script.ir_module
     class Conv2dInt8_NCHWc_scheduled:
         @T.prim_func
-        def main(p0: T.Buffer[(1, 32, 7, 7, 16), "uint8"], p1: T.Buffer[(128, 32, 1, 1, 4, 16, 4), "int8"], p2: T.Buffer[(1, 128, 1, 1, 16), "int32"], p3: T.Buffer[(1, 128, 1, 1, 16), "float32"], p4: T.Buffer[1, "float32"], p5: T.Buffer[(1, 128, 7, 7, 16), "uint8"], T_cast: T.Buffer[(1, 128, 7, 7, 16), "int32"]) -> None:
+        def main(p0: T.Buffer((1, 32, 7, 7, 16), "uint8"), p1: T.Buffer((128, 32, 1, 1, 4, 16, 4), "int8"), p2: T.Buffer((1, 128, 1, 1, 16), "int32"), p3: T.Buffer((1, 128, 1, 1, 16), "float32"), p4: T.Buffer(1, "float32"), p5: T.Buffer((1, 128, 7, 7, 16), "uint8"), T_cast: T.Buffer((1, 128, 7, 7, 16), "int32")) -> None:
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
             # body
@@ -1200,7 +1200,7 @@ def main(p0: T.Buffer[(1, 32, 7, 7, 16), "uint8"], p1: T.Buffer[(128, 32, 1, 1,
 @tvm.script.ir_module
 class Conv2dWinogradAddRelu:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64), "float32"], p2: T.Buffer[(1, 1, 1, 64), "float32"], T_relu: T.Buffer[(1, 56, 56, 64), "float32"]) -> None:
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((6, 6, 64, 64), "float32"), p2: T.Buffer((1, 1, 1, 64), "float32"), T_relu: T.Buffer((1, 56, 56, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"layout_free_buffers": [1], "tir.noalias": True, "global_symbol": "main"})
         # body
@@ -1292,7 +1292,7 @@ def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64),
 @tvm.script.ir_module
 class Conv2dWinogradAddResidualRelu:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64), "float32"], p2: T.Buffer[(1, 1, 1, 64), "float32"], p3: T.Buffer[(1, 56, 56, 64), "float32"], T_relu: T.Buffer[(1, 56, 56, 64), "float32"]) -> None:
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((6, 6, 64, 64), "float32"), p2: T.Buffer((1, 1, 1, 64), "float32"), p3: T.Buffer((1, 56, 56, 64), "float32"), T_relu: T.Buffer((1, 56, 56, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -1391,7 +1391,7 @@ def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64),
 @tvm.script.ir_module
 class Conv2dWinogradAddResidualRelu_scheduled:
     @T.prim_func
-    def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64), "float32"], p2: T.Buffer[(1, 1, 1, 64), "float32"], p3: T.Buffer[(1, 56, 56, 64), "float32"], T_relu: T.Buffer[(1, 56, 56, 64), "float32"]) -> None:
+    def main(p0: T.Buffer((1, 56, 56, 64), "float32"), p1: T.Buffer((6, 6, 64, 64), "float32"), p2: T.Buffer((1, 1, 1, 64), "float32"), p3: T.Buffer((1, 56, 56, 64), "float32"), T_relu: T.Buffer((1, 56, 56, 64), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
         # body
@@ -1531,7 +1531,7 @@ def main(p0: T.Buffer[(1, 56, 56, 64), "float32"], p1: T.Buffer[(6, 6, 64, 64),
 @tvm.script.ir_module
 class Conv2dInt8_with_predicate:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[256, "int32"], p5: T.Buffer[256, "int32"], p6: T.Buffer[256, "int32"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], compute: T.Buffer[(16, 56, 56, 256), "int32"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer(256, "int32"), p5: T.Buffer(256, "int32"), p6: T.Buffer(256, "int32"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), compute: T.Buffer((16, 56, 56, 256), "int32")) -> None:
         # function attr dict
         T.func_attr({"tir.noalias": True, "global_symbol": "main"})
         # body
@@ -1605,7 +1605,7 @@ def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "
 @tvm.script.ir_module
 class Conv2dInt8_with_predicate_target:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[256, "int32"], p5: T.Buffer[256, "int32"], p6: T.Buffer[256, "int32"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], p9: T.Buffer[(16, 56, 56, 256), "int32"], compute: T.Buffer[(16, 56, 56, 256), "int32"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer(256, "int32"), p5: T.Buffer(256, "int32"), p6: T.Buffer(256, "int32"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), p9: T.Buffer((16, 56, 56, 256), "int32"), compute: T.Buffer((16, 56, 56, 256), "int32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
@@ -1700,7 +1700,7 @@ def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "
 @tvm.script.ir_module
 class Conv2dInt8_with_predicate_scheduled:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[256, "int32"], p5: T.Buffer[256, "int32"], p6: T.Buffer[256, "int32"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], p9: T.Buffer[(16, 56, 56, 256), "int32"], compute: T.Buffer[(16, 56, 56, 256), "int32"]) -> None:
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer(256, "int32"), p5: T.Buffer(256, "int32"), p6: T.Buffer(256, "int32"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), p9: T.Buffer((16, 56, 56, 256), "int32"), compute: T.Buffer((16, 56, 56, 256), "int32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py
index 39919f3371976..734404fb34501 100644
--- a/tests/python/unittest/test_micro_model_library_format.py
+++ b/tests/python/unittest/test_micro_model_library_format.py
@@ -160,7 +160,7 @@ def test_export_model_library_format_c(
     target = tvm.target.target.micro("host")
     with utils.TempDirectory.set_keep_for_debug(True):
         with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-            relay_mod = tvm.parser.fromtext(
+            relay_mod = tvm.relay.fromtext(
                 """
             #[version = "0.0.5"]
             def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[(1, 2), float32]) {
@@ -254,7 +254,7 @@ def test_export_model_library_format_llvm():
         assert str(target)[:2] == "c "
         target = tvm.target.Target("llvm " + str(target)[2:])
         with tvm.transform.PassContext(opt_level=3):
-            relay_mod = tvm.parser.fromtext(
+            relay_mod = tvm.relay.fromtext(
                 """
             #[version = "0.0.5"]
             def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[(1, 2), float32]) {
@@ -339,7 +339,7 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
 def test_export_model_library_format_workspace(executor, runtime):
     target = tvm.target.target.micro("host")
     with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        relay_mod = tvm.parser.fromtext(
+        relay_mod = tvm.relay.fromtext(
             """
             #[version = "0.0.5"]
             def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int16], %p2: Tensor[(1, 1, 1, 128), int32]){
diff --git a/tests/python/unittest/test_slice_tir.py b/tests/python/unittest/test_slice_tir.py
index 03cd8f67d6b2e..fea2ce480e48f 100644
--- a/tests/python/unittest/test_slice_tir.py
+++ b/tests/python/unittest/test_slice_tir.py
@@ -122,7 +122,7 @@ class TestAnnotateAndSliceTIR(tvm.testing.CompareBeforeAfter):
     #    @tvm.script.ir_module
     #    class irmod_before:
     #        @T.prim_func
-    #        def main(A: T.Buffer[(1,), "int8"):
+    #        def main(A: T.Buffer((1,), "int8"):
     #            #A = T.match_buffer(a, (1,), "int8")
     #            A[0] = 0
     #            with T.block("block_foo"): # optional: give this block a name, perhaps for testing?
diff --git a/tests/python/unittest/test_target_codegen_llvm.py b/tests/python/unittest/test_target_codegen_llvm.py
index e179d17101a31..d8a853ff5dbf2 100644
--- a/tests/python/unittest/test_target_codegen_llvm.py
+++ b/tests/python/unittest/test_target_codegen_llvm.py
@@ -920,7 +920,7 @@ def test_llvm_scalar_concat():
 def test_raise_exception_during_codegen():
     @T.prim_func
     def threadpool_nested_parallel_loop(
-        A: T.Buffer[(4, 4), "float32"], B: T.Buffer[(4, 4), "float32"]
+        A: T.Buffer((4, 4), "float32"), B: T.Buffer((4, 4), "float32")
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i in T.parallel(4):
diff --git a/tests/python/unittest/test_target_codegen_vulkan.py b/tests/python/unittest/test_target_codegen_vulkan.py
index 7b71f4d4ab17c..bfb10ca85a38e 100644
--- a/tests/python/unittest/test_target_codegen_vulkan.py
+++ b/tests/python/unittest/test_target_codegen_vulkan.py
@@ -578,7 +578,7 @@ def test_negative_operand_divmod(target, dev):
     divisor = 5
 
     @T.prim_func
-    def func(A: T.Buffer[(N, 2), "int32"]):
+    def func(A: T.Buffer((N, 2), "int32")):
         for i in T.serial(N):
             with T.block("A"):
                 v_i = T.axis.spatial(N, i)
diff --git a/tests/python/unittest/test_target_codegen_x86.py b/tests/python/unittest/test_target_codegen_x86.py
index af91ed4520fd1..8ff9dbb3ddc8f 100644
--- a/tests/python/unittest/test_target_codegen_x86.py
+++ b/tests/python/unittest/test_target_codegen_x86.py
@@ -81,7 +81,7 @@ def @main(%inp : Tensor[(3), float32], %cst : Tensor[(3), float32]) {
         """
     )
 
-    ir_mod = tvm.parser.fromtext(relay_model)
+    ir_mod = tvm.relay.fromtext(relay_model)
 
     arch = "i386" if machine == "i386" else "x86_64"
     aot_factory = tvm.relay.build(
diff --git a/tests/python/unittest/test_te_create_primfunc.py b/tests/python/unittest/test_te_create_primfunc.py
index 4b8d857e86192..0b6f87b833a32 100644
--- a/tests/python/unittest/test_te_create_primfunc.py
+++ b/tests/python/unittest/test_te_create_primfunc.py
@@ -78,9 +78,9 @@ def tir_matmul(a: T.handle, b: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def tir_matmul_int64(
-    A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-    B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-    C: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+    A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+    B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+    C: T.Buffer((T.int64(128), T.int64(128)), "float32"),
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     for i0, j0, k0 in T.grid(T.int64(128), T.int64(128), T.int64(128)):
@@ -396,9 +396,9 @@ def test_tensor_attr():
 
 @T.prim_func
 def expected_layout_attr(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(128, 128), "float32"],
-    D: T.Buffer[(128, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((128, 128), "float32"),
+    D: T.Buffer((128, 128), "float32"),
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
     C = T.alloc_buffer([128, 128], dtype="float32")
@@ -417,9 +417,9 @@ def expected_layout_attr(
 
 @T.prim_func
 def expected_layout_attr_int64(
-    A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-    B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-    D: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+    A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+    B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+    D: T.Buffer((T.int64(128), T.int64(128)), "float32"),
 ):
     T.func_attr({"global_symbol": "main", "tir.noalias": True, "layout_free_buffers": [1]})
     C = T.alloc_buffer([T.int64(128), T.int64(128)], dtype="float32")
@@ -586,9 +586,9 @@ def te_func():
 
     @T.prim_func
     def expected(
-        a: T.Buffer[(), "int32"],
-        b: T.Buffer[(), "int32"],
-        c: T.Buffer[(), "int32"],
+        a: T.Buffer((), "int32"),
+        b: T.Buffer((), "int32"),
+        c: T.Buffer((), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
@@ -612,8 +612,8 @@ def te_reshape():
 
 @T.prim_func
 def tir_reshape(
-    A: T.Buffer[(T.int64(2), T.int64(4)), "float32"],
-    T_reshape: T.Buffer[(T.int64(4), T.int64(2)), "float32"],
+    A: T.Buffer((T.int64(2), T.int64(4)), "float32"),
+    T_reshape: T.Buffer((T.int64(4), T.int64(2)), "float32"),
 ):
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     for i0, i1 in T.grid(T.int64(4), T.int64(2)):
@@ -638,8 +638,8 @@ def test_reshape():
 
 @T.prim_func
 def argmax_expected(
-    p0: T.Buffer[(T.int64(1), T.int64(64), T.int64(56), T.int64(56)), "uint8"],
-    p0_red: T.Buffer[(T.int64(1), T.int64(56), T.int64(56)), "int32"],
+    p0: T.Buffer((T.int64(1), T.int64(64), T.int64(56), T.int64(56)), "uint8"),
+    p0_red: T.Buffer((T.int64(1), T.int64(56), T.int64(56)), "int32"),
 ):
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     p0_red_temp_v0 = T.alloc_buffer([T.int64(1), T.int64(56), T.int64(56)], dtype="int32")
@@ -707,7 +707,7 @@ def te_resize2d_symbolic():
 
 @T.prim_func
 def tir_resize2d_symbolic(
-    A: T.Buffer[(T.int64(2), T.int64(3), T.int64(128), T.int64(128)), "float32"],
+    A: T.Buffer((T.int64(2), T.int64(3), T.int64(128), T.int64(128)), "float32"),
     var_resize: T.handle,
 ):
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_tir_analysis_calculate_allocated_memory.py b/tests/python/unittest/test_tir_analysis_calculate_allocated_memory.py
index 1a2d50ef5d7fc..2311bfbbef3ce 100644
--- a/tests/python/unittest/test_tir_analysis_calculate_allocated_memory.py
+++ b/tests/python/unittest/test_tir_analysis_calculate_allocated_memory.py
@@ -22,14 +22,14 @@
 
 
 @T.prim_func
-def scale_by_two(a: T.Buffer[(128,), "int8"], c: T.Buffer[(128,), "int8"]):
+def scale_by_two(a: T.Buffer((128,), "int8"), c: T.Buffer((128,), "int8")):
     for i in T.serial(128):
         with T.block("C"):
             c[i] = a[i] * T.int8(2)
 
 
 @T.prim_func
-def scale_by_two_three(a: T.Buffer[(128,), "int8"], c: T.Buffer[(128,), "int8"]):
+def scale_by_two_three(a: T.Buffer((128,), "int8"), c: T.Buffer((128,), "int8")):
     B = T.alloc_buffer([128], dtype="int8", scope="global.vtcm")
     for i in T.serial(128):
         with T.block("B"):
diff --git a/tests/python/unittest/test_tir_analysis_detect_buffer_access_lca.py b/tests/python/unittest/test_tir_analysis_detect_buffer_access_lca.py
index d438427e1fe16..a1808c8413035 100644
--- a/tests/python/unittest/test_tir_analysis_detect_buffer_access_lca.py
+++ b/tests/python/unittest/test_tir_analysis_detect_buffer_access_lca.py
@@ -95,7 +95,7 @@ def match_buffer_func(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def global_buffer_with_blockidx(
-    a: T.Buffer[(1, 32), "int32"], b: T.Buffer[(1, 32), "int32"]
+    a: T.Buffer((1, 32), "int32"), b: T.Buffer((1, 32), "int32")
 ) -> None:
     for i0 in T.thread_binding(0, 1, thread="blockIdx.x"):
         for i1 in T.thread_binding(0, 32, thread="threadIdx.x"):
diff --git a/tests/python/unittest/test_tir_analysis_estimate_tir_flops.py b/tests/python/unittest/test_tir_analysis_estimate_tir_flops.py
index 8c16c81388edc..06f6fe31278dd 100644
--- a/tests/python/unittest/test_tir_analysis_estimate_tir_flops.py
+++ b/tests/python/unittest/test_tir_analysis_estimate_tir_flops.py
@@ -51,7 +51,7 @@ def test_te_workload(workload, flops):
 
 
 @T.prim_func
-def flops_with_let(a: T.Buffer[16, "float32"]):
+def flops_with_let(a: T.Buffer(16, "float32")):
     for i in range(8):
         j = i + 8
         a[j] = a[i]
@@ -63,7 +63,7 @@ def test_flops_with_let():
 
 
 @T.prim_func
-def flops_with_if(a: T.Buffer[16, "float32"], b: T.Buffer[16, "float32"]):
+def flops_with_if(a: T.Buffer(16, "float32"), b: T.Buffer(16, "float32")):
     for i in range(16):
         if i % 2 == 0:
             a[i] = b[i]
diff --git a/tests/python/unittest/test_tir_analysis_oob.py b/tests/python/unittest/test_tir_analysis_oob.py
index f910ca503be20..83c0294176243 100644
--- a/tests/python/unittest/test_tir_analysis_oob.py
+++ b/tests/python/unittest/test_tir_analysis_oob.py
@@ -21,29 +21,29 @@
 
 
 @T.prim_func
-def bad_load(A: T.Buffer[(2, 3), "float32"], B: T.Buffer[(3, 2), "float32"]):
+def bad_load(A: T.Buffer((2, 3), "float32"), B: T.Buffer((3, 2), "float32")):
     B[0, 0] = A[2, 2]
 
 
 @T.prim_func
-def bad_load_loop(A: T.Buffer[(2, 3), "float32"], B: T.Buffer[(3, 2), "float32"]):
+def bad_load_loop(A: T.Buffer((2, 3), "float32"), B: T.Buffer((3, 2), "float32")):
     for i in range(3):
         B[i, 0] = A[i, 2]
 
 
 @T.prim_func
-def bad_store(A: T.Buffer[(2, 3), "float32"], B: T.Buffer[(3, 2), "float32"]):
+def bad_store(A: T.Buffer((2, 3), "float32"), B: T.Buffer((3, 2), "float32")):
     B[0, 3] = A[1, 2]
 
 
 @T.prim_func
-def bad_store_loop(A: T.Buffer[(2, 3), "float32"], B: T.Buffer[(3, 2), "float32"]):
+def bad_store_loop(A: T.Buffer((2, 3), "float32"), B: T.Buffer((3, 2), "float32")):
     for i in range(3):
         B[0, i] = A[1, i]
 
 
 @T.prim_func
-def unknown_bounds(A: T.Buffer[(2, 3), "float32"], B: T.Buffer[(3, 2), "float32"]):
+def unknown_bounds(A: T.Buffer((2, 3), "float32"), B: T.Buffer((3, 2), "float32")):
     N = T.var("int32")
     for i in range(3):
         B[0, N] = A[1, i]
diff --git a/tests/python/unittest/test_tir_analysis_verify_well_formed.py b/tests/python/unittest/test_tir_analysis_verify_well_formed.py
index b3028a0148aa2..023d5f5f315c3 100644
--- a/tests/python/unittest/test_tir_analysis_verify_well_formed.py
+++ b/tests/python/unittest/test_tir_analysis_verify_well_formed.py
@@ -22,8 +22,8 @@
 def test_pass_simple():
     @T.prim_func
     def element_wise(
-        A: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ):
         B = T.alloc_buffer((128, 128), "float32")
         for i, j in T.grid(128, 128):
@@ -41,8 +41,8 @@ def element_wise(
 def test_fail_use_out_loop_var():
     @T.prim_func
     def element_wise(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128, 128), "float32"),
     ):
         for i, j in T.grid(128, 128):
             with T.block("B"):
diff --git a/tests/python/unittest/test_tir_imm_values.py b/tests/python/unittest/test_tir_imm_values.py
index a2a19a09ad870..416943c85da66 100644
--- a/tests/python/unittest/test_tir_imm_values.py
+++ b/tests/python/unittest/test_tir_imm_values.py
@@ -254,19 +254,19 @@ def test_tir_floatimm_const_fold():
     """Behavior check: folding fp32 match platform f32 arithmetic"""
 
     @T.prim_func
-    def float_imm_multiply(x: T.float32, y: T.float32, z: T.Buffer[(), "float32"]):
+    def float_imm_multiply(x: T.float32, y: T.float32, z: T.Buffer((), "float32")):
         z[()] = x * y
 
     @T.prim_func
-    def float_imm_add(x: T.float32, y: T.float32, z: T.Buffer[(), "float32"]):
+    def float_imm_add(x: T.float32, y: T.float32, z: T.Buffer((), "float32")):
         z[()] = x + y
 
     @T.prim_func
-    def float_imm_sub(x: T.float32, y: T.float32, z: T.Buffer[(), "float32"]):
+    def float_imm_sub(x: T.float32, y: T.float32, z: T.Buffer((), "float32")):
         z[()] = x - y
 
     @T.prim_func
-    def float_imm_div(x: T.float32, y: T.float32, z: T.Buffer[(), "float32"]):
+    def float_imm_div(x: T.float32, y: T.float32, z: T.Buffer((), "float32")):
         z[()] = x / y
 
     def __wrap_build(f):
diff --git a/tests/python/unittest/test_tir_ptx_cp_async.py b/tests/python/unittest/test_tir_ptx_cp_async.py
index dc521f3c471a8..0e61f6d1b4f9e 100644
--- a/tests/python/unittest/test_tir_ptx_cp_async.py
+++ b/tests/python/unittest/test_tir_ptx_cp_async.py
@@ -21,7 +21,7 @@
 
 
 @T.prim_func
-def ptx_cp_async(A: T.Buffer[(32, 128), "float16"], B: T.Buffer[(32, 128), "float16"]) -> None:
+def ptx_cp_async(A: T.Buffer((32, 128), "float16"), B: T.Buffer((32, 128), "float16")) -> None:
     T.func_attr({"global_symbol": "default_function", "tir.noalias": True})
     bx = T.env_thread("blockIdx.x")
     tx = T.env_thread("threadIdx.x")
diff --git a/tests/python/unittest/test_tir_ptx_ldmatrix.py b/tests/python/unittest/test_tir_ptx_ldmatrix.py
index f652be4421334..615d33ae004ec 100644
--- a/tests/python/unittest/test_tir_ptx_ldmatrix.py
+++ b/tests/python/unittest/test_tir_ptx_ldmatrix.py
@@ -23,7 +23,7 @@
 
 @T.prim_func
 def ptx_ldmatrix(
-    A: T.Buffer[(16, 16), "float16"], B: T.Buffer[(16, 16), "float16"], num: T.int32, trans: T.uint8
+    A: T.Buffer((16, 16), "float16"), B: T.Buffer((16, 16), "float16"), num: T.int32, trans: T.uint8
 ) -> None:
     T.func_attr({"global_symbol": "default_function", "tir.noalias": True})
     bx = T.env_thread("blockIdx.x")
diff --git a/tests/python/unittest/test_tir_renew_defs.py b/tests/python/unittest/test_tir_renew_defs.py
index 65f81499bdfd6..e14cd5a89832c 100644
--- a/tests/python/unittest/test_tir_renew_defs.py
+++ b/tests/python/unittest/test_tir_renew_defs.py
@@ -53,7 +53,7 @@ def _check_block_signature_remap(lhs: Block, rhs: Block):
 def test_simple():
     @T.prim_func
     # Buffer A should be remapped
-    def elementwise(A: T.Buffer[(128, 128), "float32"]):
+    def elementwise(A: T.Buffer((128, 128), "float32")):
         # Buffer B should be remapped
         B = T.alloc_buffer((128, 128), "float32")
         # i, j should be remapped
@@ -86,7 +86,7 @@ def _get_block(f):
 def test_match_buffer():
     @T.prim_func
     # A and B should be remapped
-    def func_match_buffer(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]):
+    def func_match_buffer(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")):
         with T.block("root"):
             s = T.var("int32")
             e = T.var("int32")
diff --git a/tests/python/unittest/test_tir_schedule_analysis.py b/tests/python/unittest/test_tir_schedule_analysis.py
index 349c4734c9ee8..0002de38794bf 100644
--- a/tests/python/unittest/test_tir_schedule_analysis.py
+++ b/tests/python/unittest/test_tir_schedule_analysis.py
@@ -149,9 +149,9 @@ def test_suggest_index_map_winograd():
 class DenseTIRModule:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1024, 1024), "uint8"],
-        placeholder_1: T.Buffer[(64, 256, 16, 4), "int8"],
-        compute: T.Buffer[(1024, 1024), "int32"],
+        placeholder: T.Buffer((1024, 1024), "uint8"),
+        placeholder_1: T.Buffer((64, 256, 16, 4), "int8"),
+        compute: T.Buffer((1024, 1024), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
@@ -173,9 +173,9 @@ def main(
 class Conv2dNCHWcTIRModule:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i0, i1, i2, i3, i4, i5, i6, i7, i8, i9 in T.grid(1, 16, 56, 56, 16, 1, 1, 4, 4, 4):
diff --git a/tests/python/unittest/test_tir_schedule_blockize.py b/tests/python/unittest/test_tir_schedule_blockize.py
index a68170009bb5b..cd4ce663e58e5 100644
--- a/tests/python/unittest/test_tir_schedule_blockize.py
+++ b/tests/python/unittest/test_tir_schedule_blockize.py
@@ -26,7 +26,7 @@
 # pylint: disable=no-member,invalid-name,unused-variable,line-too-long,redefined-outer-name,unexpected-keyword-arg,too-many-nested-blocks
 
 @T.prim_func
-def single_elementwise(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]):
+def single_elementwise(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")):
     for i, j in T.grid(128, 128):
         with T.block("B"):
             vi, vj = T.axis.remap("SS", [i, j])
@@ -39,8 +39,8 @@ def single_elementwise(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128
 def test_blockize_outer():
     @T.prim_func
     def after_blockize_outer(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128, 128), "float32"),
     ) -> None:
         with T.block("blockized_B"):
             vio = T.axis.spatial(1, 0)
@@ -61,8 +61,8 @@ def after_blockize_outer(
 def test_blockize_inner():
     @T.prim_func
     def after_blockize_inner(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128, 128), "float32"),
     ) -> None:
         for i in T.serial(128):
             with T.block("blockized_B"):
@@ -84,8 +84,8 @@ def after_blockize_inner(
 def test_two_elementwise_blockize_reverse_compute_at():
     @T.prim_func
     def before_blockize_rca(
-        A: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         for i, j in T.grid(8, 8):
@@ -109,8 +109,8 @@ def before_blockize_rca(
 
     @T.prim_func
     def after_blockize_rca(
-        A: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         for i, j in T.grid(8, 8):
@@ -146,8 +146,8 @@ def after_blockize_rca(
 def test_two_elementwise_blockize_compute_at():
     @T.prim_func
     def before_blockize_compute_at(
-        A: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         # body
         # with T.block("root")
@@ -175,8 +175,8 @@ def before_blockize_compute_at(
 
     @T.prim_func
     def after_blockize_compute_at(
-        A: T.Buffer[(128, 128), "float32"],
-        C: T.Buffer[(128, 128), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        C: T.Buffer((128, 128), "float32"),
     ) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         for i_0, j_0 in T.grid(8, 8):
@@ -215,7 +215,7 @@ def after_blockize_compute_at(
 
 def test_blockize_init_loops():
     @T.prim_func
-    def rowsum(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128,), "float32"]) -> None:
+    def rowsum(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128,), "float32")) -> None:
         for k, i in T.grid(128, 128):
             with T.block("B"):
                 vk, vi = T.axis.remap("RS", [k, i])
@@ -225,8 +225,8 @@ def rowsum(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128,), "float32"]) -
 
     @T.prim_func
     def after_rowsum_blockize(
-        A: T.Buffer[(128, 128), "float32"],
-        B: T.Buffer[(128,), "float32"],
+        A: T.Buffer((128, 128), "float32"),
+        B: T.Buffer((128,), "float32"),
     ) -> None:
         with T.block("blockized_B"):
             vko = T.axis.R(1, 0)
@@ -252,8 +252,8 @@ def after_rowsum_blockize(
 def test_blockize_outer_int64_shape(preserve_unit_iters):
     @T.prim_func
     def single_elementwise_int64(
-        A: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(16), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(16), T.int64(128)), "float32"),
     ) -> None:
         for i0, j0, i1, j1 in T.grid(T.int64(1), T.int64(8), T.int64(16), T.int64(16)):
             with T.block("B"):
@@ -263,8 +263,8 @@ def single_elementwise_int64(
 
     @T.prim_func
     def after_single_elementwise_int64_blockize(
-        A: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(16), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(16), T.int64(128)), "float32"),
     ) -> None:
         for i0, j0 in T.grid(T.int64(1), T.int64(8)):
             with T.block("B_o"):
@@ -279,8 +279,8 @@ def after_single_elementwise_int64_blockize(
 
     @T.prim_func
     def after_single_elementwise_int64_blockize_preserve_unit_iters(
-        A: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(16), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(16), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(16), T.int64(128)), "float32"),
     ) -> None:
         for i0, j0 in T.grid(T.int64(1), T.int64(8)):
             with T.block("B_o"):
diff --git a/tests/python/unittest/test_tir_schedule_cache_index.py b/tests/python/unittest/test_tir_schedule_cache_index.py
index d446249e018e0..a509c02b37f3c 100644
--- a/tests/python/unittest/test_tir_schedule_cache_index.py
+++ b/tests/python/unittest/test_tir_schedule_cache_index.py
@@ -41,7 +41,7 @@ def resize(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def resize_cache_index(
-    A: T.Buffer[(1, 3, 40, 40), "float32"], B: T.Buffer[(1, 3, 80, 80), "float32"]
+    A: T.Buffer((1, 3, 40, 40), "float32"), B: T.Buffer((1, 3, 80, 80), "float32")
 ) -> None:
     index_var_0 = T.alloc_buffer([80, 80], dtype="int32", strides=[1])
     index_var_1 = T.alloc_buffer([80], dtype="int32", strides=[1])
@@ -67,7 +67,7 @@ def resize_cache_index(
 
 @T.prim_func
 def bilinear_resize(
-    x: T.Buffer[(1, 3, 40, 40), "float16"], resize: T.Buffer[(1, 3, 80, 80), "float16"]
+    x: T.Buffer((1, 3, 40, 40), "float16"), resize: T.Buffer((1, 3, 80, 80), "float16")
 ):
     for i0, i1, i2, i3 in T.grid(1, 3, 80, 80):
         with T.block("resize"):
@@ -336,7 +336,7 @@ def bilinear_resize(
 
 @T.prim_func
 def cached_bilinear_resize(
-    x: T.Buffer[(1, 3, 40, 40), "float16"], resize: T.Buffer[(1, 3, 80, 80), "float16"]
+    x: T.Buffer((1, 3, 40, 40), "float16"), resize: T.Buffer((1, 3, 80, 80), "float16")
 ):
     index_var_0 = T.alloc_buffer([80], dtype="float32", strides=[1])
     index_var_1 = T.alloc_buffer([80], dtype="int32", strides=[1])
diff --git a/tests/python/unittest/test_tir_schedule_cache_read_write.py b/tests/python/unittest/test_tir_schedule_cache_read_write.py
index bcb214594cb8c..be91505f3d154 100644
--- a/tests/python/unittest/test_tir_schedule_cache_read_write.py
+++ b/tests/python/unittest/test_tir_schedule_cache_read_write.py
@@ -251,7 +251,7 @@ def func_with_block_predicate() -> None:
 
 
 @T.prim_func
-def inplace_func(data_io: T.Buffer[(64), "int32"]):
+def inplace_func(data_io: T.Buffer((64), "int32")):
     data_1d = T.alloc_buffer([64], dtype="int32")
     for i0 in T.serial(64):
         with T.block("copy_in"):
@@ -269,7 +269,7 @@ def inplace_func(data_io: T.Buffer[(64), "int32"]):
 
 
 @T.prim_func
-def inplace_call(data_io: T.Buffer[(64), "int32"]):
+def inplace_call(data_io: T.Buffer((64), "int32")):
     for i0 in T.serial(1):
         with T.block("ext_call"):
             T.reads(data_io[:64])
@@ -279,7 +279,7 @@ def inplace_call(data_io: T.Buffer[(64), "int32"]):
 
 @T.prim_func
 def cache_read_nested_seq_target(
-    B: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+    B: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
 ) -> None:
     A = T.alloc_buffer([128, 128], dtype="float32")
     A_global = T.alloc_buffer([128, 128], dtype="float32")
@@ -597,7 +597,7 @@ def cache_read_shape_int64(var_A: T.handle, var_C: T.handle) -> None:
 
 
 @T.prim_func
-def cache_read_inplace(data_io: T.Buffer[64, "int32"]) -> None:
+def cache_read_inplace(data_io: T.Buffer(64, "int32")) -> None:
     data_1d = T.alloc_buffer([64], dtype="int32")
     data_io_local = T.alloc_buffer([64], dtype="int32", scope="local")
     for ax0 in T.serial(64):
@@ -626,7 +626,7 @@ def cache_read_inplace(data_io: T.Buffer[64, "int32"]) -> None:
 
 
 @T.prim_func
-def cache_inplace_buffer(data_io: T.Buffer[64, "int32"]) -> None:
+def cache_inplace_buffer(data_io: T.Buffer(64, "int32")) -> None:
     data_io_local = T.alloc_buffer([64], dtype="int32", scope="local")
     data_io_global = T.alloc_buffer([64], dtype="int32")
     data_io_global_1 = T.alloc_buffer([64], dtype="int32")
@@ -1007,7 +1007,7 @@ def block_predicate_cache_write_output_buf() -> None:
 
 @T.prim_func
 def cache_write_allocate_const(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float16"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float16")
 ):
     B = T.alloc_buffer([128, 128], dtype="float32")
     const = T.allocate_const([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], "float32", [8])
@@ -1031,7 +1031,7 @@ def cache_write_allocate_const(
 
 @T.prim_func
 def cache_write_allocate_const_output(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float16"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float16")
 ):
     B = T.alloc_buffer([128, 128], dtype="float32")
     A_global = T.alloc_buffer([128, 128], dtype="float32")
diff --git a/tests/python/unittest/test_tir_schedule_compute_at.py b/tests/python/unittest/test_tir_schedule_compute_at.py
index 34ca937cc2bad..f94347409a6b5 100644
--- a/tests/python/unittest/test_tir_schedule_compute_at.py
+++ b/tests/python/unittest/test_tir_schedule_compute_at.py
@@ -793,7 +793,7 @@ def read_out_of_bound_after_compute_at(a: T.handle, c: T.handle) -> None:
 
 
 @T.prim_func
-def multi_reduction(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(), "float32"]):
+def multi_reduction(A: T.Buffer((16, 16), "float32"), C: T.Buffer((), "float32")):
     B = T.alloc_buffer((16, ), dtype="float32")
     for i, k in T.grid(16, 16):
         with T.block("B"):
@@ -811,8 +811,8 @@ def multi_reduction(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(), "float32"]
 
 @T.prim_func
 def multi_reduction_after_compute_at(
-    A: T.Buffer[(16, 16), "float32"],
-    C:T.Buffer[(), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    C:T.Buffer((), "float32"),
 ):
     B = T.alloc_buffer((16, ), dtype="float32")
     for k in T.grid(16):
@@ -879,9 +879,9 @@ def tiled_pooling_read_cache_after_compute_at(a: T.handle, b: T.handle) -> None:
                     cache[h + kh - 1, w + kw - 1], 0.0, dtype="float32"))
 
 @T.prim_func
-def non_uniform_tiled_conv(x: T.Buffer[(1, 3, 100, 100), "float32"],
-                           w: T.Buffer[(16, 3, 3, 3), "float32"],
-                           y: T.Buffer[(1, 16, 98, 98), "float32"]) -> None:
+def non_uniform_tiled_conv(x: T.Buffer((1, 3, 100, 100), "float32"),
+                           w: T.Buffer((16, 3, 3, 3), "float32"),
+                           y: T.Buffer((1, 16, 98, 98), "float32")) -> None:
     x_global = T.alloc_buffer([1, 3, 100, 100], dtype="float32")
     for ax0, ax1, ax2, ax3 in T.grid(1, 3, 100, 100):
         with T.block("cache"):
@@ -901,9 +901,9 @@ def non_uniform_tiled_conv(x: T.Buffer[(1, 3, 100, 100), "float32"],
                 x_global[nn, cc // 16 * 3 + rc, hh + rh, ww + rw] * w[cc, rc, rh, rw]
 
 @T.prim_func
-def non_uniform_tiled_conv_after_compute_at(x: T.Buffer[(1, 3, 100, 100), "float32"],
-                                            w: T.Buffer[(16, 3, 3, 3), "float32"],
-                                            y: T.Buffer[(1, 16, 98, 98), "float32"]) -> None:
+def non_uniform_tiled_conv_after_compute_at(x: T.Buffer((1, 3, 100, 100), "float32"),
+                                            w: T.Buffer((16, 3, 3, 3), "float32"),
+                                            y: T.Buffer((1, 16, 98, 98), "float32")) -> None:
     x_global = T.alloc_buffer([1, 3, 100, 100], dtype="float32")
     for h_o, w_o in T.grid(7, 7):
         for ax0, ax1, ax2 in T.grid(3, 17, 17):
@@ -928,9 +928,9 @@ def non_uniform_tiled_conv_after_compute_at(x: T.Buffer[(1, 3, 100, 100), "float
                     x_global[nn, cc // 16 * 3 + rc, hh + rh, ww + rw] * w[cc, rc, rh, rw]
 
 @T.prim_func
-def concat_two_elemwise(x: T.Buffer[(16,), "float32"],
-                        y: T.Buffer[(8,), "float32"],
-                        T_concat: T.Buffer[(24,), "float32"]) -> None:
+def concat_two_elemwise(x: T.Buffer((16,), "float32"),
+                        y: T.Buffer((8,), "float32"),
+                        T_concat: T.Buffer((24,), "float32")) -> None:
     T_add_1 = T.alloc_buffer([16], dtype="float32")
     T_add_2 = T.alloc_buffer([8], dtype="float32")
     for i in T.serial(16):
@@ -947,9 +947,9 @@ def concat_two_elemwise(x: T.Buffer[(16,), "float32"],
             T_concat[ax] = T.if_then_else(16 <= ax, T_add_2[ax - 16], T_add_1[ax], dtype="float32")
 
 @T.prim_func
-def concat_two_elemwise_after_compute_at(x: T.Buffer[(16,), "float32"],
-                                         y: T.Buffer[(8,), "float32"],
-                                         T_concat: T.Buffer[(24,), "float32"]) -> None:
+def concat_two_elemwise_after_compute_at(x: T.Buffer((16,), "float32"),
+                                         y: T.Buffer((8,), "float32"),
+                                         T_concat: T.Buffer((24,), "float32")) -> None:
     T_add_1 = T.alloc_buffer([16], dtype="float32")
     T_add_2 = T.alloc_buffer([8], dtype="float32")
     for i in T.serial(24):
@@ -996,7 +996,7 @@ def floordiv_and_floormod_indices_after_reverse_compute_at(a: T.handle, b: T.han
 
 
 @T.prim_func
-def tiled_repeat_op(x: T.Buffer[(4,), "float32"], T_repeat: T.Buffer[(64,), "float32"]) -> None:
+def tiled_repeat_op(x: T.Buffer((4,), "float32"), T_repeat: T.Buffer((64,), "float32")) -> None:
     T_add = T.alloc_buffer([4], dtype="float32")
     for i0 in T.serial(4):
         with T.block("T_add"):
@@ -1008,7 +1008,7 @@ def tiled_repeat_op(x: T.Buffer[(4,), "float32"], T_repeat: T.Buffer[(64,), "flo
             T_repeat[ax0] = T_add[ax0 // 16]
 
 @T.prim_func
-def tiled_repeat_op_after_compute_at(x: T.Buffer[(4,), "float32"], T_repeat: T.Buffer[(64,), "float32"]) -> None:
+def tiled_repeat_op_after_compute_at(x: T.Buffer((4,), "float32"), T_repeat: T.Buffer((64,), "float32")) -> None:
     T_add = T.alloc_buffer([4], dtype="float32")
     for i0_0 in T.serial(8):
         with T.block("T_add"):
@@ -1020,7 +1020,7 @@ def tiled_repeat_op_after_compute_at(x: T.Buffer[(4,), "float32"], T_repeat: T.B
                 T_repeat[ax0] = T_add[ax0 // 16]
 
 @T.prim_func
-def static_bound(A: T.Buffer[(32, 1), "float32"], C: T.Buffer[(32, 1), "float32"]) -> None:
+def static_bound(A: T.Buffer((32, 1), "float32"), C: T.Buffer((32, 1), "float32")) -> None:
     B = T.alloc_buffer((32, 1), "float32")
     for i, j in T.grid(32, 1):
         with T.block("B"):
@@ -1035,7 +1035,7 @@ def static_bound(A: T.Buffer[(32, 1), "float32"], C: T.Buffer[(32, 1), "float32"
             C[vi, vj] = B[vi, vj] + 1.0
 
 @T.prim_func
-def static_bound_after_compute_at(A: T.Buffer[(32, 1), "float32"], C: T.Buffer[(32, 1), "float32"]) -> None:
+def static_bound_after_compute_at(A: T.Buffer((32, 1), "float32"), C: T.Buffer((32, 1), "float32")) -> None:
     B = T.alloc_buffer((32, 1), "float32")
     for i in range(32):
         for ax0, ax1 in T.grid(1, 1):
@@ -1251,7 +1251,7 @@ def test_compute_at_simplify_static_bound(use_block_name):
 def test_compute_at_non_perfect_channel_group(use_block_name):
     @T.prim_func
     def grouped_channel_bias(
-        X: T.Buffer[(720, 8, 8), "float32"], Y: T.Buffer[(720, 8, 8), "float32"]
+        X: T.Buffer((720, 8, 8), "float32"), Y: T.Buffer((720, 8, 8), "float32")
     ):
         B = T.alloc_buffer([45], dtype="float32", scope="")
         for i in T.grid(45):
@@ -1266,7 +1266,7 @@ def grouped_channel_bias(
 
     @T.prim_func
     def grouped_channel_bias_non_perfect_tiled(
-        X: T.Buffer[(720, 8, 8), "float32"], Y: T.Buffer[(720, 8, 8), "float32"]
+        X: T.Buffer((720, 8, 8), "float32"), Y: T.Buffer((720, 8, 8), "float32")
     ):
         B = T.alloc_buffer([45], dtype="float32")
         for c_o in range(2):
@@ -1356,9 +1356,9 @@ def _create_prim_func():
 def test_compute_at_to_index():
     @T.prim_func
     def multi_producers_conv(
-        data: T.Buffer[(1, 3, 224, 224), "int8"],
-        w: T.Buffer[(16, 3, 7, 7), "int8"],
-        conv: T.Buffer[(1, 16, 112, 112), "int32"],
+        data: T.Buffer((1, 3, 224, 224), "int8"),
+        w: T.Buffer((16, 3, 7, 7), "int8"),
+        conv: T.Buffer((1, 16, 112, 112), "int32"),
     ) -> None:
         pad = T.alloc_buffer([1, 3, 230, 230], dtype="int8")
         wbuf = T.alloc_buffer([16, 3, 7, 7], dtype="int8")
@@ -1395,9 +1395,9 @@ def multi_producers_conv(
 
     @T.prim_func
     def multi_producers_after_compute_at(
-        data: T.Buffer[(1, 3, 224, 224), "int8"],
-        w: T.Buffer[(16, 3, 7, 7), "int8"],
-        conv: T.Buffer[(1, 16, 112, 112), "int32"],
+        data: T.Buffer((1, 3, 224, 224), "int8"),
+        w: T.Buffer((16, 3, 7, 7), "int8"),
+        conv: T.Buffer((1, 16, 112, 112), "int32"),
     ) -> None:
         pad = T.alloc_buffer([1, 3, 230, 230], dtype="int8")
         wbuf = T.alloc_buffer([16, 3, 7, 7], dtype="int8")
@@ -1444,7 +1444,7 @@ def multi_producers_after_compute_at(
 
 def test_reverse_compute_at_to_index():
     @T.prim_func
-    def main(A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(128, 128), "float32"]) -> None:
+    def main(A: T.Buffer((128, 128), "float32"), D: T.Buffer((128, 128), "float32")) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         C = T.alloc_buffer([128, 128], dtype="float32")
         for i_0, j_0, i_1 in T.grid(8, 8, 16):
@@ -1471,7 +1471,7 @@ def main(A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(128, 128), "float32"])
 
     @T.prim_func
     def main_reverse_compute_at(
-        A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(128, 128), "float32"]
+        A: T.Buffer((128, 128), "float32"), D: T.Buffer((128, 128), "float32")
     ) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         C = T.alloc_buffer([128, 128], dtype="float32")
@@ -1507,7 +1507,7 @@ def main_reverse_compute_at(
 
 def test_reverse_compute_at_with_unit_loop():
     @T.prim_func
-    def main(A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(1, 2, 1), "float32"]) -> None:
+    def main(A: T.Buffer((128, 128), "float32"), D: T.Buffer((1, 2, 1), "float32")) -> None:
         B = T.alloc_buffer([128, 128], dtype="float32")
         for i_0, j_0, i_1 in T.grid(T.int64(8), T.int64(8), T.int64(16)):
             for j_1 in T.serial(T.int64(16)):
@@ -1526,7 +1526,7 @@ def main(A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(1, 2, 1), "float32"])
 
     @T.prim_func
     def main_reverse_compute_at(
-        A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(1, 2, 1), "float32"]
+        A: T.Buffer((128, 128), "float32"), D: T.Buffer((1, 2, 1), "float32")
     ):
         B = T.alloc_buffer([128, 128], dtype="float32")
         for i_0, j_0, i_1 in T.grid(T.int64(8), T.int64(8), T.int64(16)):
diff --git a/tests/python/unittest/test_tir_schedule_compute_inline.py b/tests/python/unittest/test_tir_schedule_compute_inline.py
index bd46e10efaea8..ee5e85e4f05b1 100644
--- a/tests/python/unittest/test_tir_schedule_compute_inline.py
+++ b/tests/python/unittest/test_tir_schedule_compute_inline.py
@@ -172,7 +172,7 @@ def elementwise_multi_reverse_loads_inlined(a: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def elementwise_reverse_affine_load(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(8, 32, 8, 8), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((8, 32, 8, 8), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -190,7 +190,7 @@ def elementwise_reverse_affine_load(
 
 @T.prim_func
 def elementwise_reverse_affine_load_inlined(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(8, 32, 8, 8), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((8, 32, 8, 8), "float32")
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -207,9 +207,9 @@ def elementwise_reverse_affine_load_inlined(
 
 @T.prim_func
 def elementwise_reverse_affine_load_unit_iter(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(8, 16, 1), "float32"],
-    D: T.Buffer[(1, 8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((8, 16, 1), "float32"),
+    D: T.Buffer((1, 8, 16, 128), "float32"),
 ) -> None:
     C = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -224,9 +224,9 @@ def elementwise_reverse_affine_load_unit_iter(
 
 @T.prim_func
 def elementwise_reverse_affine_load_unit_iter_inlined(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(8, 16, 1), "float32"],
-    D: T.Buffer[(1, 8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((8, 16, 1), "float32"),
+    D: T.Buffer((1, 8, 16, 128), "float32"),
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -236,9 +236,9 @@ def elementwise_reverse_affine_load_unit_iter_inlined(
 
 @T.prim_func
 def elementwise_reverse_affine_load_unit_iter_simplified(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(8, 16, 1), "float32"],
-    D: T.Buffer[(1, 8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((8, 16, 1), "float32"),
+    D: T.Buffer((1, 8, 16, 128), "float32"),
 ) -> None:
     C = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -253,9 +253,9 @@ def elementwise_reverse_affine_load_unit_iter_simplified(
 
 @T.prim_func
 def elementwise_reverse_affine_load_unit_iter_simplified_inlined(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(8, 16, 1), "float32"],
-    D: T.Buffer[(1, 8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((8, 16, 1), "float32"),
+    D: T.Buffer((1, 8, 16, 128), "float32"),
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -265,7 +265,7 @@ def elementwise_reverse_affine_load_unit_iter_simplified_inlined(
 
 @T.prim_func
 def elementwise_reverse_affine_chain(
-    A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(1, 8, 16, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), D: T.Buffer((1, 8, 16, 128), "float32")
 ):
     B = T.alloc_buffer((128, 128))
     C = T.alloc_buffer((8, 16, 128))
@@ -285,7 +285,7 @@ def elementwise_reverse_affine_chain(
 
 @T.prim_func
 def elementwise_reverse_affine_chain_inlined(
-    A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(1, 8, 16, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), D: T.Buffer((1, 8, 16, 128), "float32")
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -295,8 +295,8 @@ def elementwise_reverse_affine_chain_inlined(
 
 @T.prim_func
 def elementwise_multi_reverse_affine_load(
-    A: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((8, 16, 128), "float32"),
 ) -> None:
     B = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -311,8 +311,8 @@ def elementwise_multi_reverse_affine_load(
 
 @T.prim_func
 def elementwise_multi_reverse_affine_load_inlined(
-    A: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(8, 16, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((8, 16, 128), "float32"),
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -322,7 +322,7 @@ def elementwise_multi_reverse_affine_load_inlined(
 
 @T.prim_func
 def elementwise_reverse_non_affine_load(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(8, 16, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((8, 16, 128), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -505,8 +505,8 @@ def matmul_relu(var_A: T.handle, var_B: T.handle, var_compute: T.handle) -> None
 
 @T.prim_func
 def inline_block_with_init(
-    A: T.Buffer[(1, 512, 7, 7), "float32"],
-    B: T.Buffer[(1, 512, 1, 1), "float32"],
+    A: T.Buffer((1, 512, 7, 7), "float32"),
+    B: T.Buffer((1, 512, 1, 1), "float32"),
 ) -> None:
     B_rf = T.alloc_buffer([1, 512, 1, 1, 49], dtype="float32")
     for i0, i1, i2, i3, i4, i5 in T.grid(1, 512, 1, 1, 49, 1):
@@ -542,9 +542,9 @@ def inline_block_with_init(
 
 @T.prim_func
 def exp_exp_opaque_access_with_tvm_access_ptr(
-    lookup_table: T.Buffer[(1024,), "int8"],
-    x: T.Buffer[(16,), "float16"],
-    compute: T.Buffer[(16,), "float16"],
+    lookup_table: T.Buffer((1024,), "int8"),
+    x: T.Buffer((16,), "float16"),
+    compute: T.Buffer((16,), "float16"),
 ) -> None:
     compute_1 = T.alloc_buffer([16], dtype="float16")
     for i0 in T.serial(16):
@@ -567,9 +567,9 @@ def exp_exp_opaque_access_with_tvm_access_ptr(
 
 @T.prim_func
 def exp_exp_opaque_access_with_tvm_access_ptr_inlined(
-    lookup_table: T.Buffer[(1024,), "int8"],
-    x: T.Buffer[(16,), "float16"],
-    compute: T.Buffer[(16,), "float16"],
+    lookup_table: T.Buffer((1024,), "int8"),
+    x: T.Buffer((16,), "float16"),
+    compute: T.Buffer((16,), "float16"),
 ) -> None:
     for i0 in T.serial(16):
         with T.block("compute_1"):
@@ -587,7 +587,7 @@ def exp_exp_opaque_access_with_tvm_access_ptr_inlined(
 
 @T.prim_func
 def elementwise_overcomputed_producer(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(127, 127), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((127, 127), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -602,7 +602,7 @@ def elementwise_overcomputed_producer(
 
 @T.prim_func
 def elementwise_overcomputed_producer_reverse_inlined(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(127, 127), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((127, 127), "float32")
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -613,7 +613,7 @@ def elementwise_overcomputed_producer_reverse_inlined(
 
 @T.prim_func
 def elementwise_producer_not_cover_consumer(
-    A: T.Buffer[(128, 128), "float32"], D: T.Buffer[(256, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), D: T.Buffer((256, 128), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128))
     for i, j in T.grid(128, 128):
@@ -659,7 +659,7 @@ def elementwise_predicate_producer_inlined(a: T.handle, c: T.handle) -> None:
 @tvm.script.ir_module
 class Conv2dInt8_TensorCore_with_predicate:
     @T.prim_func
-    def main(p0: T.Buffer[(16, 56, 56, 64), "int8"], p1: T.Buffer[(256, 1, 1, 64), "int8"], p2: T.Buffer[(1, 1, 1, 256), "int32"], p3: T.Buffer[(1, 1, 1, 256), "int32"], p4: T.Buffer[256, "int32"], p5: T.Buffer[256, "int32"], p6: T.Buffer[256, "int32"], p7: T.Buffer[(), "int32"], p8: T.Buffer[1, "int32"], p9: T.Buffer[(16, 56, 56, 256), "int32"], compute: T.Buffer[(16, 56, 56, 256), "int32"]):
+    def main(p0: T.Buffer((16, 56, 56, 64), "int8"), p1: T.Buffer((256, 1, 1, 64), "int8"), p2: T.Buffer((1, 1, 1, 256), "int32"), p3: T.Buffer((1, 1, 1, 256), "int32"), p4: T.Buffer(256, "int32"), p5: T.Buffer(256, "int32"), p6: T.Buffer(256, "int32"), p7: T.Buffer((), "int32"), p8: T.Buffer(1, "int32"), p9: T.Buffer((16, 56, 56, 256), "int32"), compute: T.Buffer((16, 56, 56, 256), "int32")):
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # body
diff --git a/tests/python/unittest/test_tir_schedule_decompose_padding.py b/tests/python/unittest/test_tir_schedule_decompose_padding.py
index ead8b0b332622..51e82394e42e2 100644
--- a/tests/python/unittest/test_tir_schedule_decompose_padding.py
+++ b/tests/python/unittest/test_tir_schedule_decompose_padding.py
@@ -43,14 +43,14 @@ def check_decompose_padding(origin, scheduled, expected, check_run=False):
 
 def test_1d_decompose_padding():
     @T.prim_func
-    def before_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
+    def before_decompose(x: T.Buffer(128, "int32"), y: T.Buffer(140, "int32")):
         for i in range(140):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
                 y[vi] = T.if_then_else(vi >= 6 and vi < 134, x[vi - 6], 0, dtype="int32")
 
     @T.prim_func
-    def after_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
+    def after_decompose(x: T.Buffer(128, "int32"), y: T.Buffer(140, "int32")):
         for i in T.serial(140):
             with T.block("block_pad_const"):
                 vi = T.axis.spatial(140, i)
@@ -72,7 +72,7 @@ def after_decompose(x: T.Buffer[128, "int32"], y: T.Buffer[140, "int32"]):
 
 @T.prim_func
 def sum_pool_2d(
-    x: T.Buffer[(1, 16, 225, 225), "int8"], tensor: T.Buffer[(1, 16, 225, 225), "int8"]
+    x: T.Buffer((1, 16, 225, 225), "int8"), tensor: T.Buffer((1, 16, 225, 225), "int8")
 ):
     pad_temp = T.alloc_buffer([1, 16, 231, 231], dtype="int8")
     for i0, i1, i2, i3 in T.grid(1, 16, 231, 231):
@@ -99,7 +99,7 @@ def test_decompose_hw_padding_direct():
 
     @T.prim_func
     def pooling_decompose_0(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], tensor: T.Buffer[(1, 16, 225, 225), "int8"]
+        x: T.Buffer((1, 16, 225, 225), "int8"), tensor: T.Buffer((1, 16, 225, 225), "int8")
     ):
         pad_temp = T.alloc_buffer([1, 16, 231, 231], dtype="int8")
         for i0, i1, i2, i3 in T.grid(1, 16, 231, 231):
@@ -130,7 +130,7 @@ def test_decompose_hw_padding_tiled():
 
     @T.prim_func
     def pooling_decompose_1(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], tensor: T.Buffer[(1, 16, 225, 225), "int8"]
+        x: T.Buffer((1, 16, 225, 225), "int8"), tensor: T.Buffer((1, 16, 225, 225), "int8")
     ) -> None:
         pad_temp = T.alloc_buffer([1, 16, 231, 231], dtype="int8")
         for i0, i2_0, i3_0 in T.grid(1, 3, 3):
@@ -190,7 +190,7 @@ def test_decompose_hw_padding_tiled_and_lift_pad():
 
     @T.prim_func
     def pooling_decompose_2(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], tensor: T.Buffer[(1, 16, 225, 225), "int8"]
+        x: T.Buffer((1, 16, 225, 225), "int8"), tensor: T.Buffer((1, 16, 225, 225), "int8")
     ) -> None:
         pad_temp = T.alloc_buffer([1, 16, 231, 231], dtype="int8")
         for i0, i2_0, i3_0, ax0, ax1, ax2 in T.grid(1, 3, 3, 16, 81, 81):
@@ -250,7 +250,7 @@ def test_decompose_hw_padding_non_perfect_tiled():
 
     @T.prim_func
     def pooling_decompose_3(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], tensor: T.Buffer[(1, 16, 225, 225), "int8"]
+        x: T.Buffer((1, 16, 225, 225), "int8"), tensor: T.Buffer((1, 16, 225, 225), "int8")
     ) -> None:
         pad_temp = T.alloc_buffer([1, 16, 231, 231], dtype="int8")
         for i0, i2_0, i3_0 in T.grid(1, 3, 3):
@@ -314,7 +314,7 @@ def test_decompose_wrt_single_child_subtree():
 
     @T.prim_func
     def pad_op(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], y: T.Buffer([1, 16, 231, 231], dtype="int8")
+        x: T.Buffer((1, 16, 225, 225), "int8"), y: T.Buffer([1, 16, 231, 231), dtype="int8")
     ):
         for i0, i1, i2, i3 in T.grid(1, 16, 231, 231):
             with T.block("pad_temp"):
@@ -328,7 +328,7 @@ def pad_op(
 
     @T.prim_func
     def pad_op_after(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], y: T.Buffer[(1, 16, 231, 231), "int8"]
+        x: T.Buffer((1, 16, 225, 225), "int8"), y: T.Buffer((1, 16, 231, 231), "int8")
     ):
         for i0, i1 in T.grid(1, 16):
             for i2, i3 in T.grid(231, 231):
@@ -354,7 +354,7 @@ def test_not_to_decompose_trivial_predicate():
 
     @T.prim_func
     def trivial_pad(
-        x: T.Buffer[(1, 16, 225, 225), "int8"], y: T.Buffer([1, 16, 225, 225], dtype="int8")
+        x: T.Buffer((1, 16, 225, 225), "int8"), y: T.Buffer([1, 16, 225, 225), dtype="int8")
     ):
         for i0, i1, i2, i3 in T.grid(1, 16, 225, 225):
             with T.block("pad_temp"):
diff --git a/tests/python/unittest/test_tir_schedule_for_kind.py b/tests/python/unittest/test_tir_schedule_for_kind.py
index 132e8b8b3fa51..8994f9de0ed4e 100644
--- a/tests/python/unittest/test_tir_schedule_for_kind.py
+++ b/tests/python/unittest/test_tir_schedule_for_kind.py
@@ -279,9 +279,9 @@ def thread_bound_block_inside_init(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def decomposed_gemm(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ):
     local = T.alloc_buffer((16, 16), "float32")
     for i, j in T.grid(4, 4):
@@ -305,9 +305,9 @@ def decomposed_gemm(
 
 @T.prim_func
 def decomposed_gemm_after_vectorize(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ):
     local = T.alloc_buffer((16, 16), "float32")
     for i, j in T.grid(4, 4):
@@ -332,7 +332,7 @@ def decomposed_gemm_after_vectorize(
 
 @T.prim_func
 def nested_block_bind(
-    A: T.Buffer[(16, 16, 16, 16), "float32"], B: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16, 16), "float32"), B: T.Buffer((16, 16, 16), "float32")
 ):
     for i, j in T.grid(16, 16):
         with T.block("outer"):
@@ -347,7 +347,7 @@ def nested_block_bind(
 
 @T.prim_func
 def thread_bound_nested_block(
-    A: T.Buffer[(16, 16, 16, 16), "float32"], B: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16, 16), "float32"), B: T.Buffer((16, 16, 16), "float32")
 ) -> None:
     for i in T.serial(16):
         for j in T.thread_binding(16, thread="blockIdx.x"):
@@ -364,7 +364,7 @@ def thread_bound_nested_block(
 
 @T.prim_func
 def nested_block_bind_after_cache_read(
-    A: T.Buffer[(16, 16), "float32"], B: T.Buffer[(16,), "float32"]
+    A: T.Buffer((16, 16), "float32"), B: T.Buffer((16,), "float32")
 ) -> None:
     for i in T.serial(16):
         with T.block("outer"):
@@ -385,7 +385,7 @@ def nested_block_bind_after_cache_read(
 
 @T.prim_func
 def thread_bound_nested_block_after_cache_read(
-    A: T.Buffer[(16, 16), "float32"], B: T.Buffer[(16,), "float32"]
+    A: T.Buffer((16, 16), "float32"), B: T.Buffer((16,), "float32")
 ) -> None:
     for i in T.thread_binding(16, thread="blockIdx.x"):
         with T.block("outer"):
@@ -406,9 +406,9 @@ def thread_bound_nested_block_after_cache_read(
 
 @T.prim_func
 def decomposed_gemm_parallelize_init(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ) -> None:
     local = T.alloc_buffer([16, 16], dtype="float32")
     for i, j in T.grid(4, 4):
@@ -438,7 +438,7 @@ def decomposed_gemm_parallelize_init(
 
 
 @T.prim_func
-def scatter_compute(A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"]):
+def scatter_compute(A: T.Buffer((16,), "float32"), B: T.Buffer((16,), "float32")):
     for i in T.grid(8):
         with T.block("first_half"):
             vi = T.axis.spatial(16, 8 + i)
@@ -452,7 +452,7 @@ def scatter_compute(A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"]
 
 @T.prim_func
 def scatter_compute_parallelize(
-    A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"]
+    A: T.Buffer((16,), "float32"), B: T.Buffer((16,), "float32")
 ) -> None:
     # body
     # with T.block("root")
diff --git a/tests/python/unittest/test_tir_schedule_pad_einsum.py b/tests/python/unittest/test_tir_schedule_pad_einsum.py
index 89628db4ff745..ec4d000655abe 100644
--- a/tests/python/unittest/test_tir_schedule_pad_einsum.py
+++ b/tests/python/unittest/test_tir_schedule_pad_einsum.py
@@ -31,9 +31,9 @@
 
 @T.prim_func
 def matmul_before(
-    A: T.Buffer[(128, 127), "float32"],
-    B: T.Buffer[(127, 127), "float32"],
-    C: T.Buffer[(128, 127), "float32"],
+    A: T.Buffer((128, 127), "float32"),
+    B: T.Buffer((127, 127), "float32"),
+    C: T.Buffer((128, 127), "float32"),
 ) -> None:
     A_shared = T.alloc_buffer((128, 127), "float32", scope="shared")
     B_shared = T.alloc_buffer((127, 127), "float32", scope="shared")
@@ -60,9 +60,9 @@ def matmul_before(
 
 @T.prim_func
 def matmul_expected(
-    A: T.Buffer[(128, 127), "float32"],
-    B: T.Buffer[(127, 127), "float32"],
-    C: T.Buffer[(128, 127), "float32"],
+    A: T.Buffer((128, 127), "float32"),
+    B: T.Buffer((127, 127), "float32"),
+    C: T.Buffer((128, 127), "float32"),
 ) -> None:
     A_shared_padded = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
     B_shared_padded = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
diff --git a/tests/python/unittest/test_tir_schedule_reindex.py b/tests/python/unittest/test_tir_schedule_reindex.py
index b5e66943019fb..60e3f004f59cb 100644
--- a/tests/python/unittest/test_tir_schedule_reindex.py
+++ b/tests/python/unittest/test_tir_schedule_reindex.py
@@ -26,7 +26,7 @@
 
 @T.prim_func
 def transpose_elementwise(
-    A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")
 ) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -36,7 +36,7 @@ def transpose_elementwise(
 
 @T.prim_func
 def transpose_elementwise_reindex_read(
-    A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")
 ) -> None:
     A_reindex = T.alloc_buffer((128, 128), "float32")
     for i, j in T.grid(128, 128):
@@ -51,9 +51,9 @@ def transpose_elementwise_reindex_read(
 
 @T.prim_func
 def conv2d_nhwc(
-    Input: T.Buffer[(1, 224, 224, 3), "float32"],
-    Weight: T.Buffer[(7, 7, 3, 64), "float32"],
-    Conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"],
+    Input: T.Buffer((1, 224, 224, 3), "float32"),
+    Weight: T.Buffer((7, 7, 3, 64), "float32"),
+    Conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32"),
 ) -> None:
     PadInput = T.alloc_buffer([1, 230, 230, 3], dtype="float32")
     for i0, i1, i2, i3 in T.grid(1, 230, 230, 3):
@@ -78,9 +78,9 @@ def conv2d_nhwc(
 
 @T.prim_func
 def conv2d_nhwc_reindex_data(
-    Input: T.Buffer[(1, 224, 224, 3), "float32"],
-    Weight: T.Buffer[(7, 7, 3, 64), "float32"],
-    Conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"],
+    Input: T.Buffer((1, 224, 224, 3), "float32"),
+    Weight: T.Buffer((7, 7, 3, 64), "float32"),
+    Conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32"),
 ) -> None:
     PadInput = T.alloc_buffer([1, 230, 230, 3], dtype="float32")
     ReindexInput = T.alloc_buffer([1, 112, 112, 7, 7, 3], dtype="float32")
@@ -152,9 +152,9 @@ def conv2d_nhwc_reindex_weight(
 
 @T.prim_func
 def matmul(
-    A: T.Buffer[(512, 512), "float32"],
-    B: T.Buffer[(512, 512), "float32"],
-    C: T.Buffer[(512, 512), "float32"],
+    A: T.Buffer((512, 512), "float32"),
+    B: T.Buffer((512, 512), "float32"),
+    C: T.Buffer((512, 512), "float32"),
 ) -> None:
     for i0, i1, i2 in T.grid(512, 512, 512):
         with T.block("matmul"):
@@ -168,9 +168,9 @@ def matmul(
 
 @T.prim_func
 def matmul_reindex_write(
-    A: T.Buffer[(512, 512), "float32"],
-    B: T.Buffer[(512, 512), "float32"],
-    C: T.Buffer[(512, 512), "float32"],
+    A: T.Buffer((512, 512), "float32"),
+    B: T.Buffer((512, 512), "float32"),
+    C: T.Buffer((512, 512), "float32"),
 ) -> None:
     C_reindex = T.alloc_buffer([512, 512], dtype="float32")
     for i0, i1, i2 in T.grid(512, 512, 512):
@@ -190,7 +190,7 @@ def matmul_reindex_write(
 
 
 @T.prim_func
-def multiple_read(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]) -> None:
+def multiple_read(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
             vi, vj = T.axis.remap("SS", [i, j])
@@ -199,9 +199,9 @@ def multiple_read(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "f
 
 @T.prim_func
 def mixed_dtype(
-    p0: T.Buffer[(T.int64(2), 1280), "float16"],
-    p1: T.Buffer[(1280, 1280), "float16"],
-    T_matmul_NT: T.Buffer[(T.int64(2), 1280), "float16"],
+    p0: T.Buffer((T.int64(2), 1280), "float16"),
+    p1: T.Buffer((1280, 1280), "float16"),
+    T_matmul_NT: T.Buffer((T.int64(2), 1280), "float16"),
 ) -> None:
     for i0, i1, i2 in T.grid(T.int64(2), 1280, 1280):
         with T.block("T_matmul_NT"):
@@ -216,9 +216,9 @@ def mixed_dtype(
 
 @T.prim_func
 def mixed_dtype_reindex_write(
-    p0: T.Buffer[(T.int64(2), 1280), "float16"],
-    p1: T.Buffer[(1280, 1280), "float16"],
-    T_matmul_NT: T.Buffer[(T.int64(2), 1280), "float16"],
+    p0: T.Buffer((T.int64(2), 1280), "float16"),
+    p1: T.Buffer((1280, 1280), "float16"),
+    T_matmul_NT: T.Buffer((T.int64(2), 1280), "float16"),
 ) -> None:
     T_matmul_NT_reindex = T.alloc_buffer([T.int64(2), 1280], dtype="float16")
     for i0, i1, i2 in T.grid(T.int64(2), 1280, 1280):
@@ -241,9 +241,9 @@ def mixed_dtype_reindex_write(
 
 @T.prim_func
 def matmul_unit_dim(
-    A: T.Buffer[(1, 512), "float32"],
-    B: T.Buffer[(512, 1), "float32"],
-    C: T.Buffer[(1, 1), "float32"],
+    A: T.Buffer((1, 512), "float32"),
+    B: T.Buffer((512, 1), "float32"),
+    C: T.Buffer((1, 1), "float32"),
 ) -> None:
     for i0, i1, i2 in T.grid(1, 1, 512):
         with T.block("matmul"):
@@ -257,9 +257,9 @@ def matmul_unit_dim(
 
 @T.prim_func
 def matmul_unit_dim_reindex_write(
-    A: T.Buffer[(1, 512), "float32"],
-    B: T.Buffer[(512, 1), "float32"],
-    C: T.Buffer[(1, 1), "float32"],
+    A: T.Buffer((1, 512), "float32"),
+    B: T.Buffer((512, 1), "float32"),
+    C: T.Buffer((1, 1), "float32"),
 ) -> None:
     C_reindex = T.alloc_buffer([1, 1], dtype="float32")
     for i0, i1, i2 in T.grid(1, 1, 512):
diff --git a/tests/python/unittest/test_tir_schedule_reorder.py b/tests/python/unittest/test_tir_schedule_reorder.py
index b859b655efc84..763ce8c36ef01 100644
--- a/tests/python/unittest/test_tir_schedule_reorder.py
+++ b/tests/python/unittest/test_tir_schedule_reorder.py
@@ -216,7 +216,7 @@ def test_reorder_with_opaque_access():
 
 def test_reorder_overlapped_access():
     @T.prim_func
-    def overlapped_access(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "float32"]):
+    def overlapped_access(A: T.Buffer((14, 4), "float32"), B: T.Buffer((14, 4), "float32")):
         # example to write first axis multiple times
         for v0, v1, v2 in T.grid(6, 4, 4):
             with T.block("block"):
@@ -225,7 +225,7 @@ def overlapped_access(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "flo
                 B[i, j] = A[i, j] + 1.0
 
     @T.prim_func
-    def overlapped_access_reorder(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "float32"]):
+    def overlapped_access_reorder(A: T.Buffer((14, 4), "float32"), B: T.Buffer((14, 4), "float32")):
         # example to write first axis multiple times
         for v0, v2, v1 in T.grid(6, 4, 4):
             with T.block("block"):
@@ -242,7 +242,7 @@ def overlapped_access_reorder(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14,
 
 def test_reorder_with_partial_affineness():
     @T.prim_func
-    def non_affine_func(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "float32"]):
+    def non_affine_func(A: T.Buffer((14, 4), "float32"), B: T.Buffer((14, 4), "float32")):
         for v0, v1, v2 in T.grid(6, 4, 4):
             with T.block("block"):
                 i = T.axis.spatial(14, v0 * v0 + v1)
@@ -250,7 +250,7 @@ def non_affine_func(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "float
                 B[i, j] = A[i, j] + 1.0
 
     @T.prim_func
-    def non_affine_func_reorder(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4), "float32"]):
+    def non_affine_func_reorder(A: T.Buffer((14, 4), "float32"), B: T.Buffer((14, 4), "float32")):
         for v0, v2, v1 in T.grid(6, 4, 4):
             with T.block("block"):
                 i = T.axis.spatial(14, v0 * v0 + v1)
@@ -270,7 +270,7 @@ def non_affine_func_reorder(A: T.Buffer[(14, 4), "float32"], B: T.Buffer[(14, 4)
 def test_reorder_with_cascade_tiled_ops():
     @T.prim_func
     def cascade_pool_ops(
-        x: T.Buffer[(1, 16, 112, 112), "float32"], y2: T.Buffer[(1, 16, 108, 108), "float32"]
+        x: T.Buffer((1, 16, 112, 112), "float32"), y2: T.Buffer((1, 16, 108, 108), "float32")
     ) -> None:
         y1 = T.alloc_buffer([1, 16, 110, 110], dtype="float32")
         for n, c, h, w, kh, kw in T.grid(1, 16, 110, 110, 3, 3):
@@ -288,7 +288,7 @@ def cascade_pool_ops(
 
     @T.prim_func
     def cascade_pool_ops_tile_reordered(
-        x: T.Buffer[(1, 16, 112, 112), "float32"], y2: T.Buffer[(1, 16, 108, 108), "float32"]
+        x: T.Buffer((1, 16, 112, 112), "float32"), y2: T.Buffer((1, 16, 108, 108), "float32")
     ) -> None:
         y1 = T.alloc_buffer([1, 16, 110, 110], dtype="float32")
         for n, c, h_o in T.grid(1, 16, 27):
diff --git a/tests/python/unittest/test_tir_schedule_rfactor.py b/tests/python/unittest/test_tir_schedule_rfactor.py
index 964fe772d8af5..766cc3f8671c0 100644
--- a/tests/python/unittest/test_tir_schedule_rfactor.py
+++ b/tests/python/unittest/test_tir_schedule_rfactor.py
@@ -252,7 +252,7 @@ def transformed_square_sum_square_root_factor_one_1(a: T.handle, d: T.handle) ->
 
 @T.prim_func
 def square_sum_square_root_factor_one_1_rfactor(
-    A: T.Buffer[(16, 256, 256), "float32"], D: T.Buffer[(16,), "float32"]
+    A: T.Buffer((16, 256, 256), "float32"), D: T.Buffer((16,), "float32")
 ) -> None:
     C = T.alloc_buffer([16], dtype="float32")
     C_rf = T.alloc_buffer([1, 16], dtype="float32")
@@ -299,7 +299,7 @@ def transformed_square_sum_square_root_factor_one_2(a: T.handle, d: T.handle) ->
 
 @T.prim_func
 def square_sum_square_root_factor_one_2_rfactor(
-    A: T.Buffer[(16, 256, 256), "float32"], D: T.Buffer[(16,), "float32"]
+    A: T.Buffer((16, 256, 256), "float32"), D: T.Buffer((16,), "float32")
 ) -> None:
     C = T.alloc_buffer([16], dtype="float32")
     C_rf = T.alloc_buffer([16, 1], dtype="float32")
@@ -636,8 +636,8 @@ def multiple_reduction_blocks_rfactor(a: T.handle, f: T.handle) -> None:
 
 @T.prim_func
 def rfactor_spatial_only(
-    A: T.Buffer[(1, 512, 7, 7), "float32"],
-    B: T.Buffer[(1, 512, 1, 1), "float32"],
+    A: T.Buffer((1, 512, 7, 7), "float32"),
+    B: T.Buffer((1, 512, 1, 1), "float32"),
 ) -> None:
     for _i0, i1, _i2, _i3, i4, _i5 in T.grid(1, 512, 1, 1, 49, 1):
         with T.block("acc"):
@@ -658,8 +658,8 @@ def rfactor_spatial_only(
 
 @T.prim_func
 def rfactor_spatial_only_after(
-    A: T.Buffer[(1, 512, 7, 7), "float32"],
-    B: T.Buffer[(1, 512, 1, 1), "float32"],
+    A: T.Buffer((1, 512, 7, 7), "float32"),
+    B: T.Buffer((1, 512, 1, 1), "float32"),
 ) -> None:
     # body
     # with T.block("root")
@@ -686,10 +686,10 @@ def rfactor_spatial_only_after(
 
 @T.prim_func
 def argmax_split(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -708,10 +708,10 @@ def argmax_split(
 
 @T.prim_func
 def argmin_split_init_update_reordered(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmin_v0: T.Buffer[(128,), "int32"],
-    argmin_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmin_v0: T.Buffer((128,), "int32"),
+    argmin_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmin"):
@@ -730,10 +730,10 @@ def argmin_split_init_update_reordered(
 
 @T.prim_func
 def argmax_split_different_shape(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(256,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((256,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -752,10 +752,10 @@ def argmax_split_different_shape(
 
 @T.prim_func
 def argmax_split_different_indices(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -774,10 +774,10 @@ def argmax_split_different_indices(
 
 @T.prim_func
 def argmax_split_init_not_bufferstore(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -797,10 +797,10 @@ def argmax_split_init_not_bufferstore(
 
 @T.prim_func
 def argmax_split_init_buffer_duplicate(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -819,10 +819,10 @@ def argmax_split_init_buffer_duplicate(
 
 @T.prim_func
 def argmax_split_letstmt_fewer_than_init(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -840,10 +840,10 @@ def argmax_split_letstmt_fewer_than_init(
 
 @T.prim_func
 def argmax_split_letstmt_more_than_init(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -861,10 +861,10 @@ def argmax_split_letstmt_more_than_init(
 
 @T.prim_func
 def argmax_split_let_body_neither_seqstmt_nor_bufferstore(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -882,10 +882,10 @@ def argmax_split_let_body_neither_seqstmt_nor_bufferstore(
 
 @T.prim_func
 def argmax_split_init_update_inconsistent_bufferstore_number(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -905,10 +905,10 @@ def argmax_split_init_update_inconsistent_bufferstore_number(
 
 @T.prim_func
 def argmax_split_body_seq_not_bufferstore(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -927,10 +927,10 @@ def argmax_split_body_seq_not_bufferstore(
 
 @T.prim_func
 def argmax_split_body_bufferstore_value_not_var(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -949,10 +949,10 @@ def argmax_split_body_bufferstore_value_not_var(
 
 @T.prim_func
 def argmax_split_body_bufferstore_value_unbound_var(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     v_unbound = T.var("int32")
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
@@ -972,10 +972,10 @@ def argmax_split_body_bufferstore_value_unbound_var(
 
 @T.prim_func
 def argmax_split_one_let_var_used_multi_times(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "int32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "int32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "int32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "int32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -994,10 +994,10 @@ def argmax_split_one_let_var_used_multi_times(
 
 @T.prim_func
 def argmax_split_body_one_buffer_updated_multi_times(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "int32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "int32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "int32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "int32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -1016,11 +1016,11 @@ def argmax_split_body_one_buffer_updated_multi_times(
 
 @T.prim_func
 def argmax_split_init_buffer_not_match(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v0_1: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v0_1: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0, i1_1 in T.grid(128, 4, 32):
         with T.block("argmax"):
@@ -1039,10 +1039,10 @@ def argmax_split_init_buffer_not_match(
 
 @T.prim_func
 def argmax_split_rfactor(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     argmax_v0_rf = T.alloc_buffer([128, 32], dtype="int32")
     argmax_v1_rf = T.alloc_buffer([128, 32], dtype="float32")
@@ -1086,10 +1086,10 @@ def argmax_split_rfactor(
 
 @T.prim_func
 def argmin_split_rfactor(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmin_v0: T.Buffer[(128,), "int32"],
-    argmin_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmin_v0: T.Buffer((128,), "int32"),
+    argmin_v1: T.Buffer((128,), "float32"),
 ) -> None:
     argmin_v0_rf = T.alloc_buffer([128, 32], dtype="int32")
     argmin_v1_rf = T.alloc_buffer([128, 32], dtype="float32")
@@ -1133,7 +1133,7 @@ def argmin_split_rfactor(
 
 @T.prim_func
 def argmax_topi_rfactor(
-    placeholder: T.Buffer[(1, 32), "int32"], placeholder_red: T.Buffer[1, "int32"]
+    placeholder: T.Buffer((1, 32), "int32"), placeholder_red: T.Buffer(1, "int32")
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     placeholder_red_temp_v0 = T.alloc_buffer([1], dtype="int32")
@@ -1194,7 +1194,7 @@ def argmax_topi_rfactor(
 
 @T.prim_func
 def argmin_topi_rfactor(
-    placeholder: T.Buffer[(1, 32), "int32"], placeholder_red: T.Buffer[1, "int32"]
+    placeholder: T.Buffer((1, 32), "int32"), placeholder_red: T.Buffer(1, "int32")
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     placeholder_red_temp_v0 = T.alloc_buffer([1], dtype="int32")
diff --git a/tests/python/unittest/test_tir_schedule_rolling_buffer.py b/tests/python/unittest/test_tir_schedule_rolling_buffer.py
index c55c41e451cc3..9597a5db72fc0 100644
--- a/tests/python/unittest/test_tir_schedule_rolling_buffer.py
+++ b/tests/python/unittest/test_tir_schedule_rolling_buffer.py
@@ -61,7 +61,7 @@ def _tile_nd(s, tile, block_name):
 
 def test_1d_rolling_buffer():
     @T.prim_func
-    def before(A: T.Buffer[(4, 12), "int32"], C: T.Buffer[(4, 8), "int32"]):
+    def before(A: T.Buffer((4, 12), "int32"), C: T.Buffer((4, 8), "int32")):
         B = T.alloc_buffer((4, 10), "int32")
         for c in T.serial(4):
             for i in T.serial(0, 10):
@@ -80,7 +80,7 @@ def before(A: T.Buffer[(4, 12), "int32"], C: T.Buffer[(4, 8), "int32"]):
                         C[cc, vi] = C[cc, vi] + B[cc, vi + vk]
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 12), "int32"], C: T.Buffer[(4, 8), "int32"]):
+    def expected(A: T.Buffer((4, 12), "int32"), C: T.Buffer((4, 8), "int32")):
         B = T.alloc_buffer([4, 6], dtype="int32")
         for c, i_0 in T.grid(4, 2):
             for ax0, ax1 in T.grid(6, 3):
@@ -114,7 +114,7 @@ def expected(A: T.Buffer[(4, 12), "int32"], C: T.Buffer[(4, 8), "int32"]):
 
 
 @T.prim_func
-def cascade_2_max_pool2d(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]):
+def cascade_2_max_pool2d(A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")):
     B = T.alloc_buffer([1, 10, 10, 16], dtype="int8")
     for i0, i1, i2, i3, i4, i5 in T.grid(1, 10, 10, 16, 3, 3):
         with T.block("B"):
@@ -132,7 +132,7 @@ def cascade_2_max_pool2d(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8
 
 @T.prim_func
 def cascade_3_max_pool2d_with_stride(
-    A: T.Buffer[(1, 24, 24, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]
+    A: T.Buffer((1, 24, 24, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")
 ):
     B_0 = T.alloc_buffer([1, 22, 22, 16], dtype="int8")
     B_1 = T.alloc_buffer([1, 10, 10, 16], dtype="int8")
@@ -164,7 +164,7 @@ def cascade_3_max_pool2d_with_stride(
 
 def test_cascade_max_pool2d_w_tiled():
     @T.prim_func
-    def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]):
+    def expected(A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")):
         B = T.alloc_buffer([1, 10, 6, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 1, 2, 1):
             for ax0, ax1, ax2, ax3, ax4 in T.grid(10, 6, 16, 3, 3):
@@ -205,7 +205,7 @@ def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "i
 
 def test_cascade_max_pool2d_h_tiled():
     @T.prim_func
-    def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]):
+    def expected(A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")):
         B = T.alloc_buffer([1, 6, 10, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 2, 1, 1):
             for ax0, ax1, ax2, ax3, ax4 in T.grid(6, 10, 16, 3, 3):
@@ -246,7 +246,7 @@ def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "i
 
 def test_cascade_max_pool2d_h_w_c_tiled():
     @T.prim_func
-    def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]):
+    def expected(A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")):
         B = T.alloc_buffer([1, 6, 10, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 2, 2, 2):
             for ax0, ax1, ax2, ax3, ax4 in T.grid(6, 6, 8, 3, 3):
@@ -288,7 +288,7 @@ def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "i
 
 def test_cascade_max_pool2d_non_perfect_tiled():
     @T.prim_func
-    def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]) -> None:
+    def expected(A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")) -> None:
         B = T.alloc_buffer([1, 8, 10, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 2, 2, 1):
             for ax0, ax1, ax2, ax3, ax4 in T.grid(8, 8, 16, 3, 3):
@@ -335,7 +335,7 @@ def expected(A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "i
 
 def test_cascade_3_max_pool2d_with_stride():
     @T.prim_func
-    def expected(A: T.Buffer[(1, 24, 24, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "int8"]) -> None:
+    def expected(A: T.Buffer((1, 24, 24, 16), "int8"), C: T.Buffer((1, 8, 8, 16), "int8")) -> None:
         B_0 = T.alloc_buffer([1, 13, 22, 16], dtype="int8")
         B_1 = T.alloc_buffer([1, 6, 10, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 2, 2, 1):
@@ -396,7 +396,7 @@ def expected(A: T.Buffer[(1, 24, 24, 16), "int8"], C: T.Buffer[(1, 8, 8, 16), "i
 
 def test_upscale():
     @T.prim_func
-    def before(A: T.Buffer[(1, 16, 16, 16), "int8"], C: T.Buffer[(1, 24, 24, 16), "int8"]) -> None:
+    def before(A: T.Buffer((1, 16, 16, 16), "int8"), C: T.Buffer((1, 24, 24, 16), "int8")) -> None:
         B = T.alloc_buffer([1, 14, 14, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 5, 5, 1):
             for ax0, ax1, ax2, ax3, ax4 in T.grid(5, 5, 16, 3, 3):
@@ -432,7 +432,7 @@ def before(A: T.Buffer[(1, 16, 16, 16), "int8"], C: T.Buffer[(1, 24, 24, 16), "i
 
     @T.prim_func
     def expected(
-        A: T.Buffer[(1, 16, 16, 16), "int8"], C: T.Buffer[(1, 24, 24, 16), "int8"]
+        A: T.Buffer((1, 16, 16, 16), "int8"), C: T.Buffer((1, 24, 24, 16), "int8")
     ) -> None:
         B = T.alloc_buffer([1, 5, 14, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 5, 5, 1):
@@ -480,7 +480,7 @@ def expected(
 def test_fail_rolling_buffer_multi_writers():
     @T.prim_func
     def func_multi_writers(
-        A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 12, 12, 16), "int8"]
+        A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 12, 12, 16), "int8")
     ):
         B = T.alloc_buffer([1, 12, 12, 16], dtype="int8")
         for i0, i1, i2, i3 in T.grid(1, 3, 3, 1):
@@ -525,7 +525,7 @@ def func_multi_writers(
 def test_fail_rolling_buffer_not_match():
     @T.prim_func
     def func_non_overlap(
-        A: T.Buffer[(1, 12, 12, 16), "int8"], C: T.Buffer[(1, 12, 12, 16), "int8"]
+        A: T.Buffer((1, 12, 12, 16), "int8"), C: T.Buffer((1, 12, 12, 16), "int8")
     ):
         B = T.alloc_buffer([1, 12, 12, 16], dtype="int8")
         for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 3, 3, 1):
diff --git a/tests/python/unittest/test_tir_schedule_sampling.py b/tests/python/unittest/test_tir_schedule_sampling.py
index 0c2a3d27ffdb2..c2f3f89e6e12d 100644
--- a/tests/python/unittest/test_tir_schedule_sampling.py
+++ b/tests/python/unittest/test_tir_schedule_sampling.py
@@ -41,9 +41,9 @@ def elementwise(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def tiled_conv2d_with_padding(
-    inputs: T.Buffer[(1, 224, 224, 3), "float32"],
-    weight: T.Buffer[(7, 7, 3, 64), "float32"],
-    conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"],
+    inputs: T.Buffer((1, 224, 224, 3), "float32"),
+    weight: T.Buffer((7, 7, 3, 64), "float32"),
+    conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32"),
 ) -> None:
     PadInput = T.alloc_buffer([1, 230, 230, 3], dtype="float32")
     for i0, i1, i2, i3 in T.grid(1, 230, 230, 3):
diff --git a/tests/python/unittest/test_tir_schedule_set_axis_separator.py b/tests/python/unittest/test_tir_schedule_set_axis_separator.py
index 327df33408f26..75c650733ae0a 100644
--- a/tests/python/unittest/test_tir_schedule_set_axis_separator.py
+++ b/tests/python/unittest/test_tir_schedule_set_axis_separator.py
@@ -27,7 +27,7 @@
 # pylint: disable=no-member,invalid-name,unused-variable,unexpected-keyword-arg
 
 @T.prim_func
-def element_wise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer((128, 128), dtype="float32")
 
     for i, j in T.grid(128, 128):
@@ -41,7 +41,7 @@ def element_wise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "fl
 
 
 @T.prim_func
-def element_wise_set_axis_separator(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_set_axis_separator(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer([128, 128], dtype="float32", axis_separators=[1])
 
     for i, j in T.grid(128, 128):
@@ -55,7 +55,7 @@ def element_wise_set_axis_separator(A: T.Buffer[(128, 128), "float32"], C: T.Buf
 
 
 @T.prim_func
-def element_wise_set_axis_separator_input_buffer(A: T.Buffer(shape=(128, 128), dtype="float32", axis_separators=(1,)), C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_set_axis_separator_input_buffer(A: T.Buffer(shape=(128, 128), dtype="float32", axis_separators=(1,)), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer([128, 128], dtype="float32")
 
     for i, j in T.grid(128, 128):
@@ -69,7 +69,7 @@ def element_wise_set_axis_separator_input_buffer(A: T.Buffer(shape=(128, 128), d
 
 
 @T.prim_func
-def element_wise_subregion_match(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_subregion_match(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer((128, 128), dtype="float32")
 
     for i, j in T.grid(128, 128):
@@ -85,7 +85,7 @@ def element_wise_subregion_match(A: T.Buffer[(128, 128), "float32"], C: T.Buffer
 
 
 @T.prim_func
-def element_wise_subregion_match_set_axis_separator(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_subregion_match_set_axis_separator(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer([128, 128], dtype="float32", axis_separators=[1])
 
     for i, j in T.grid(128, 128):
diff --git a/tests/python/unittest/test_tir_schedule_set_scope.py b/tests/python/unittest/test_tir_schedule_set_scope.py
index adac81e629469..e5fa25fbc3628 100644
--- a/tests/python/unittest/test_tir_schedule_set_scope.py
+++ b/tests/python/unittest/test_tir_schedule_set_scope.py
@@ -26,7 +26,7 @@
 # pylint: disable=no-member,invalid-name,unused-variable,unexpected-keyword-arg
 
 @T.prim_func
-def element_wise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer((128, 128), dtype="float32")
 
     for i, j in T.grid(128, 128):
@@ -40,7 +40,7 @@ def element_wise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "fl
 
 
 @T.prim_func
-def element_wise_set_scope(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_set_scope(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B_shared = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
 
     for i, j in T.grid(128, 128):
@@ -54,7 +54,7 @@ def element_wise_set_scope(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128,
 
 
 @T.prim_func
-def element_wise_subregion_match(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_subregion_match(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer((128, 128), dtype="float32")
 
     for i, j in T.grid(128, 128):
@@ -70,7 +70,7 @@ def element_wise_subregion_match(A: T.Buffer[(128, 128), "float32"], C: T.Buffer
 
 
 @T.prim_func
-def element_wise_subregion_match_set_scope(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def element_wise_subregion_match_set_scope(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B_shared = T.alloc_buffer([128, 128], dtype="float32", scope="shared")
 
     for i, j in T.grid(128, 128):
diff --git a/tests/python/unittest/test_tir_schedule_split_fuse.py b/tests/python/unittest/test_tir_schedule_split_fuse.py
index 3ae88e0abba57..f6373fa727a18 100644
--- a/tests/python/unittest/test_tir_schedule_split_fuse.py
+++ b/tests/python/unittest/test_tir_schedule_split_fuse.py
@@ -526,9 +526,9 @@ def test_fuse_not_affine():
 def test_add_unit_loop_above_block():
     @T.prim_func
     def zero_dim(
-        A: T.Buffer[(), "int32"],
-        B: T.Buffer[(), "int32"],
-        C: T.Buffer[(), "int32"],
+        A: T.Buffer((), "int32"),
+        B: T.Buffer((), "int32"),
+        C: T.Buffer((), "int32"),
     ) -> None:
         with T.block("C"):
             vi = T.axis.spatial(1, 0)
@@ -536,9 +536,9 @@ def zero_dim(
 
     @T.prim_func
     def zero_dim_added(
-        A: T.Buffer[(), "int32"],
-        B: T.Buffer[(), "int32"],
-        C: T.Buffer[(), "int32"],
+        A: T.Buffer((), "int32"),
+        B: T.Buffer((), "int32"),
+        C: T.Buffer((), "int32"),
     ) -> None:
         for u in range(1):
             with T.block("C"):
@@ -554,9 +554,9 @@ def zero_dim_added(
 def test_add_unit_loop_above_loop():
     @T.prim_func
     def zero_dim(
-        A: T.Buffer[(), "int32"],
-        B: T.Buffer[(), "int32"],
-        C: T.Buffer[(), "int32"],
+        A: T.Buffer((), "int32"),
+        B: T.Buffer((), "int32"),
+        C: T.Buffer((), "int32"),
     ) -> None:
         for u in range(1):
             with T.block("C"):
@@ -565,9 +565,9 @@ def zero_dim(
 
     @T.prim_func
     def zero_dim_added(
-        A: T.Buffer[(), "int32"],
-        B: T.Buffer[(), "int32"],
-        C: T.Buffer[(), "int32"],
+        A: T.Buffer((), "int32"),
+        B: T.Buffer((), "int32"),
+        C: T.Buffer((), "int32"),
     ) -> None:
         for u1, u2 in T.grid(1, 1):
             with T.block("C"):
diff --git a/tests/python/unittest/test_tir_schedule_state_cached_flags.py b/tests/python/unittest/test_tir_schedule_state_cached_flags.py
index 70935814ba407..8120aa2aea31f 100644
--- a/tests/python/unittest/test_tir_schedule_state_cached_flags.py
+++ b/tests/python/unittest/test_tir_schedule_state_cached_flags.py
@@ -355,7 +355,7 @@ def non_perfect_tiling_cache(a: T.handle, b: T.handle) -> None:
 
 
 @T.prim_func
-def uncovered_producer_region(A: T.Buffer[(128,), "float32"], B: T.Buffer[(128,), "float32"]):
+def uncovered_producer_region(A: T.Buffer((128,), "float32"), B: T.Buffer((128,), "float32")):
     for i in range(120):
         with T.block("producer"):
             vi = T.axis.S((0, 120), i)
@@ -367,7 +367,7 @@ def uncovered_producer_region(A: T.Buffer[(128,), "float32"], B: T.Buffer[(128,)
 
 
 @T.prim_func
-def matmul_relu_padding(A: T.Buffer[(127, 127), "float16"], B: T.Buffer[(127, 127), "float16"], compute: T.Buffer[(127, 127), "float32"]) -> None:
+def matmul_relu_padding(A: T.Buffer((127, 127), "float16"), B: T.Buffer((127, 127), "float16"), compute: T.Buffer((127, 127), "float32")) -> None:
     # function attr dict
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     # body
@@ -440,7 +440,7 @@ def matmul_relu_padding(A: T.Buffer[(127, 127), "float16"], B: T.Buffer[(127, 12
 
 @T.prim_func
 def splitted_square_sum_with_predicate(
-    A: T.Buffer[(1, 7, 7, 512), "float32"], B: T.Buffer[(1, 1, 1, 512), "float32"]
+    A: T.Buffer((1, 7, 7, 512), "float32"), B: T.Buffer((1, 1, 1, 512), "float32")
 ) -> None:
     for i0_i1_i2_i3_0_fused, ax0, ax1, ax2, ax3 in T.grid(2, 1, 1, 1, 256):
         for ax4_ax5_fused_0, ax4_ax5_fused_1 in T.grid(1, 256):
diff --git a/tests/python/unittest/test_tir_schedule_tensorize.py b/tests/python/unittest/test_tir_schedule_tensorize.py
index 4847f261a32c0..143cf87d04e14 100644
--- a/tests/python/unittest/test_tir_schedule_tensorize.py
+++ b/tests/python/unittest/test_tir_schedule_tensorize.py
@@ -160,9 +160,9 @@ def outer_product_intrin(a: T.handle, b: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def matmul(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(128, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((128, 128), "float32"),
 ) -> None:
     for i, j, k in T.grid(128, 128, 128):
         with T.block("update"):
@@ -230,9 +230,9 @@ def tensorized_matmul(a: T.handle, b: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def batch_matmul(
-    A: T.Buffer[(16, 128, 128), "float32"],
-    B: T.Buffer[(16, 128, 128), "float32"],
-    C: T.Buffer[(16, 128, 128), "float32"],
+    A: T.Buffer((16, 128, 128), "float32"),
+    B: T.Buffer((16, 128, 128), "float32"),
+    C: T.Buffer((16, 128, 128), "float32"),
 ) -> None:
     for n, i, j in T.grid(16, 128, 128):
         with T.block("init"):
@@ -247,9 +247,9 @@ def batch_matmul(
 
 @T.prim_func
 def tensorized_batch_matmul_mma(
-    A: T.Buffer[(16, 128, 128), "float32"],
-    B: T.Buffer[(16, 128, 128), "float32"],
-    C: T.Buffer[(16, 128, 128), "float32"],
+    A: T.Buffer((16, 128, 128), "float32"),
+    B: T.Buffer((16, 128, 128), "float32"),
+    C: T.Buffer((16, 128, 128), "float32"),
 ) -> None:
     for n, i, j in T.grid(16, 128, 128):
         with T.block("init"):
@@ -302,9 +302,9 @@ def tensorized_batch_matmul_mma(
 
 @T.prim_func
 def tensorized_batch_matmul_dot_product(
-    A: T.Buffer[(16, 128, 128), "float32"],
-    B: T.Buffer[(16, 128, 128), "float32"],
-    C: T.Buffer[(16, 128, 128), "float32"],
+    A: T.Buffer((16, 128, 128), "float32"),
+    B: T.Buffer((16, 128, 128), "float32"),
+    C: T.Buffer((16, 128, 128), "float32"),
 ) -> None:
     for n, i, j in T.grid(16, 128, 128):
         with T.block("init"):
@@ -342,9 +342,9 @@ def tensorized_batch_matmul_dot_product(
 
 @T.prim_func
 def tensorized_batch_matmul_outer_product(
-    A: T.Buffer[(16, 128, 128), "float32"],
-    B: T.Buffer[(16, 128, 128), "float32"],
-    C: T.Buffer[(16, 128, 128), "float32"],
+    A: T.Buffer((16, 128, 128), "float32"),
+    B: T.Buffer((16, 128, 128), "float32"),
+    C: T.Buffer((16, 128, 128), "float32"),
 ) -> None:
     for n, i, j in T.grid(16, 128, 128):
         with T.block("init"):
@@ -392,9 +392,9 @@ def annotated_mma_desc(a: T.handle, b: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def annotated_matmul(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(128, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((128, 128), "float32"),
 ) -> None:
     for i, j, k in T.grid(128, 128, 128):
         with T.block("update"):
@@ -705,9 +705,9 @@ def test_tensorize_matmul_mixed_dtype():
     # fmt: off
     @T.prim_func
     def matmul_int64_shape(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        C: T.Buffer[(T.int64(128), T.int64(128)), "float32"]
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        C: T.Buffer((T.int64(128), T.int64(128)), "float32")
     ) -> None:
         for i_0, j_0 in T.grid(T.int64(8), T.int64(8)):
             for i_1_init, j_1_init in T.grid(T.int64(16), T.int64(16)):
@@ -724,9 +724,9 @@ def matmul_int64_shape(
 
     @T.prim_func
     def tensorized_matmul_int64_shape(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        C: T.Buffer[(T.int64(128), T.int64(128)), "float32"]
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        C: T.Buffer((T.int64(128), T.int64(128)), "float32")
     ) -> None:
         for i_outer, j_outer in T.grid(T.int64(8), T.int64(8)):
             for i_inner_init, j_inner_init in T.grid(T.int64(16), T.int64(16)):
diff --git a/tests/python/unittest/test_tir_schedule_transform.py b/tests/python/unittest/test_tir_schedule_transform.py
index c068385f0a46f..b189d3c39e5b9 100644
--- a/tests/python/unittest/test_tir_schedule_transform.py
+++ b/tests/python/unittest/test_tir_schedule_transform.py
@@ -25,9 +25,9 @@
 class DenseTIRModule:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1024, 1024), "uint8"],
-        placeholder_1: T.Buffer[(64, 256, 16, 4), "int8"],
-        compute: T.Buffer[(1024, 1024), "int32"],
+        placeholder: T.Buffer((1024, 1024), "uint8"),
+        placeholder_1: T.Buffer((64, 256, 16, 4), "int8"),
+        compute: T.Buffer((1024, 1024), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         with T.block("root"):
@@ -49,9 +49,9 @@ def main(
 class DenseTIRModuleTiled:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1024, 1024), "uint8"],
-        placeholder_1: T.Buffer[(64, 256, 16, 4), "int8"],
-        compute: T.Buffer[(1024, 1024), "int32"],
+        placeholder: T.Buffer((1024, 1024), "uint8"),
+        placeholder_1: T.Buffer((64, 256, 16, 4), "int8"),
+        compute: T.Buffer((1024, 1024), "int32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -75,9 +75,9 @@ def main(
 class Conv2dNCHWcTIRModule:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         for i0, i1, i2, i3, i4, i5, i6, i7, i8, i9 in T.grid(1, 16, 56, 56, 16, 1, 1, 4, 4, 4):
@@ -116,9 +116,9 @@ def main(
 class Conv2dNCHWcTIRModuleTiled:
     @T.prim_func
     def main(
-        placeholder: T.Buffer[(1, 4, 56, 56, 16), "uint8"],
-        placeholder_1: T.Buffer[(16, 4, 1, 1, 4, 16, 4), "int8"],
-        conv2d_NCHWc_int8: T.Buffer[(1, 16, 56, 56, 16), "int32"],
+        placeholder: T.Buffer((1, 4, 56, 56, 16), "uint8"),
+        placeholder_1: T.Buffer((16, 4, 1, 1, 4, 16, 4), "int8"),
+        conv2d_NCHWc_int8: T.Buffer((1, 16, 56, 56, 16), "int32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
diff --git a/tests/python/unittest/test_tir_schedule_transform_layout.py b/tests/python/unittest/test_tir_schedule_transform_layout.py
index b4e49316f1239..c4a3ce0312801 100644
--- a/tests/python/unittest/test_tir_schedule_transform_layout.py
+++ b/tests/python/unittest/test_tir_schedule_transform_layout.py
@@ -33,7 +33,7 @@ def packed_index_map_func(m, n):
 
 
 @T.prim_func
-def two_elementwise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]) -> None:
+def two_elementwise(A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")) -> None:
     B = T.alloc_buffer((128, 128), "float32")
     for i, j in T.grid(128, 128):
         with T.block("B"):
@@ -47,7 +47,7 @@ def two_elementwise(A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128),
 
 @T.prim_func
 def two_elementwise_transformed_intermediate_buffer(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(128, 128), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((128, 128), "float32")
 ) -> None:
     B = T.alloc_buffer((8, 8, 16, 16), "float32")
     for i, j in T.grid(128, 128):
@@ -62,7 +62,7 @@ def two_elementwise_transformed_intermediate_buffer(
 
 @T.prim_func
 def two_elementwise_transformed_input_buffer(
-    A: T.Buffer[(8, 8, 16, 16), "float32"], C: T.Buffer[(128, 128), "float32"]
+    A: T.Buffer((8, 8, 16, 16), "float32"), C: T.Buffer((128, 128), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128), "float32")
     for i, j in T.grid(128, 128):
@@ -77,7 +77,7 @@ def two_elementwise_transformed_input_buffer(
 
 @T.prim_func
 def two_elementwise_transformed_output_buffer(
-    A: T.Buffer[(128, 128), "float32"], C: T.Buffer[(8, 8, 16, 16), "float32"]
+    A: T.Buffer((128, 128), "float32"), C: T.Buffer((8, 8, 16, 16), "float32")
 ) -> None:
     B = T.alloc_buffer((128, 128), "float32")
     for i, j in T.grid(128, 128):
@@ -91,7 +91,7 @@ def two_elementwise_transformed_output_buffer(
 
 
 @T.prim_func
-def elementwise(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]) -> None:
+def elementwise(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")) -> None:
     for i, j in T.grid(128, 128):
         with T.block("B"):
             vi, vj = T.axis.remap("SS", [i, j])
@@ -99,7 +99,7 @@ def elementwise(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "flo
 
 
 @T.prim_func
-def elementwise_transformed(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128, 128), "float32"]) -> None:
+def elementwise_transformed(A: T.Buffer((128, 128), "float32"), B: T.Buffer((128, 128), "float32")) -> None:
     for i in range(16384):
         with T.block("B"):
             vi = T.axis.remap("S", [i])
@@ -108,9 +108,9 @@ def elementwise_transformed(A: T.Buffer[(128, 128), "float32"], B: T.Buffer[(128
 
 @T.prim_func
 def conv2d_nhwc(
-    Input: T.Buffer[(1, 224, 224, 3), "float32"],
-    Weight: T.Buffer[(7, 7, 3, 64), "float32"],
-    Conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"],
+    Input: T.Buffer((1, 224, 224, 3), "float32"),
+    Weight: T.Buffer((7, 7, 3, 64), "float32"),
+    Conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32"),
 ) -> None:
     PadInput = T.alloc_buffer([1, 230, 230, 3], dtype="float32")
     for i0, i1, i2, i3 in T.grid(1, 230, 230, 3):
@@ -135,9 +135,9 @@ def conv2d_nhwc(
 
 @T.prim_func
 def conv2d_nhwc_transformed(
-    Input: T.Buffer[(1, 224, 224, 3), "float32"],
-    Weight: T.Buffer[(7, 7, 3, 64), "float32"],
-    Conv2d_nhwc: T.Buffer[(1, 112, 112, 64), "float32"],
+    Input: T.Buffer((1, 224, 224, 3), "float32"),
+    Weight: T.Buffer((7, 7, 3, 64), "float32"),
+    Conv2d_nhwc: T.Buffer((1, 112, 112, 64), "float32"),
 ) -> None:
     PadInput = T.alloc_buffer([1, 230, 230, 3], dtype="float32")
     for i0, i1, i2, i3 in T.grid(1, 230, 230, 3):
@@ -162,7 +162,7 @@ def conv2d_nhwc_transformed(
 
 
 @T.prim_func
-def two_elementwise_unit_dim(A: T.Buffer[(1, 128), "float32"], C: T.Buffer[(1, 128), "float32"]) -> None:
+def two_elementwise_unit_dim(A: T.Buffer((1, 128), "float32"), C: T.Buffer((1, 128), "float32")) -> None:
     B = T.alloc_buffer((1, 128), "float32")
     for i, j in T.grid(1, 128):
         with T.block("B"):
@@ -268,7 +268,7 @@ def test_simplify():
     sch.transform_layout(B, ("write", 0), lambda i, j: (i // 16, j // 16, i % 16, j % 16))
 
     @T.prim_func
-    def ref(B: T.Buffer[(8, 8, 16, 16), "float32"], C: T.Buffer[(128, 128), "float32"]):
+    def ref(B: T.Buffer((8, 8, 16, 16), "float32"), C: T.Buffer((128, 128), "float32")):
         for i_0, j_0 in T.grid(8, 8):
             with T.block("C_o"):
                 vi_o, vj_o = T.axis.remap("SS", [i_0, j_0])
@@ -291,7 +291,7 @@ def ref(B: T.Buffer[(8, 8, 16, 16), "float32"], C: T.Buffer[(128, 128), "float32
 def test_var_args_sugar():
     @T.prim_func
     def summation_3d(
-        A: T.Buffer[(1024, 1024, 32), "float32"], B: T.Buffer[(1,), "float32"]
+        A: T.Buffer((1024, 1024, 32), "float32"), B: T.Buffer((1,), "float32")
     ) -> None:
         B[0] = 0
         for i, j, k in T.grid(1024, 1024, 32):
@@ -301,7 +301,7 @@ def summation_3d(
 
     @T.prim_func
     def summation_3d_split(
-        A: T.Buffer[(1024, 1024, 8, 4), "float32"], B: T.Buffer[(1,), "float32"]
+        A: T.Buffer((1024, 1024, 8, 4), "float32"), B: T.Buffer((1,), "float32")
     ) -> None:
         B[0] = 0
         for i, j, k in T.grid(1024, 1024, 32):
@@ -342,7 +342,7 @@ def test_transform_block_layout_unit_dim(use_block_name):
 
     @T.prim_func
     def two_elementwise_unit_dim_transformed(
-        A: T.Buffer[(1, 128), "float32"], C: T.Buffer[(1, 128), "float32"]
+        A: T.Buffer((1, 128), "float32"), C: T.Buffer((1, 128), "float32")
     ) -> None:
         B = T.alloc_buffer((1, 128), "float32")
         for j, i in T.grid(128, 1):
@@ -378,8 +378,8 @@ def test_transform_block_layout_fail_mixed_iter_type(use_block_name):
 def test_transform_block_layout_int64_extent(use_block_name):
     @T.prim_func
     def elementwise_int64_extent(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
     ) -> None:
         for i, j in T.grid(T.int64(128), T.int64(128)):
             with T.block("B"):
@@ -388,8 +388,8 @@ def elementwise_int64_extent(
 
     @T.prim_func
     def elementwise_int64_extent_transformed(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
     ) -> None:
         for i in range(T.int64(16384)):
             with T.block("B"):
@@ -566,7 +566,7 @@ class TestPaddedTransformIfThenElse(BasePaddingCompare):
     @tvm.testing.fixture
     def before(self, dtype):
         @T.prim_func
-        def func(A: T.Buffer[14, dtype]):
+        def func(A: T.Buffer(14, dtype)):
             B = T.alloc_buffer(14, dtype)
             for i in T.serial(14):
                 with T.block("block"):
@@ -580,7 +580,7 @@ def expected(self, dtype, pad_value):
         pad_value = tir.IntImm(dtype, pad_value)
 
         @T.prim_func
-        def func(A: T.Buffer[14, dtype]):
+        def func(A: T.Buffer(14, dtype)):
             B = T.alloc_buffer([4, 4], dtype)
             for i, j in T.grid(4, 4):
                 with T.block("block"):
@@ -601,14 +601,14 @@ class TestPaddedTransformWithoutLoop(BasePaddingCompare):
 
     pad_value = tvm.testing.parameter(0)
 
-    def before(A: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32")):
         with T.block("root"):
             T.reads()
             T.writes()
             with T.block("block"):
                 A[0] = 0
 
-    def expected(A: T.Buffer[(4, 4), "int32"]):
+    def expected(A: T.Buffer((4, 4), "int32")):
         with T.block("block"):
             A[0, 0] = 0
 
@@ -625,7 +625,7 @@ class TestPaddedTransformIfThenElseReduction(BasePaddingCompare):
     pad_value = tvm.testing.parameter(0)
     transformed_buffer = tvm.testing.parameter("B")
 
-    def before(A: T.Buffer[(14, 32), "int32"]):
+    def before(A: T.Buffer((14, 32), "int32")):
         B = T.alloc_buffer(14, "int32")
         for i, k in T.grid(14, 32):
             with T.block("block"):
@@ -634,7 +634,7 @@ def before(A: T.Buffer[(14, 32), "int32"]):
                     B[vi] = 0
                 B[vi] = B[vi] + A[vi, vk]
 
-    def expected(A: T.Buffer[(14, 32), "int32"]):
+    def expected(A: T.Buffer((14, 32), "int32")):
         B = T.alloc_buffer([4, 4], "int32")
         for i, j, k in T.grid(4, 4, 32):
             with T.block("block"):
@@ -652,7 +652,7 @@ class TestPaddedTransformIfThenElseReductionOpaque(BasePaddingCompare):
     pad_value = tvm.testing.parameter(0)
     transformed_buffer = tvm.testing.parameter("B")
 
-    def before(A: T.Buffer[(14, 32), "int32"]):
+    def before(A: T.Buffer((14, 32), "int32")):
         B = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
             B[i] = 0
@@ -660,7 +660,7 @@ def before(A: T.Buffer[(14, 32), "int32"]):
                 with T.block("block"):
                     B[i] = B[i] + A[i, k]
 
-    def expected(A: T.Buffer[(14, 32), "int32"]):
+    def expected(A: T.Buffer((14, 32), "int32")):
         B = T.alloc_buffer([4, 4], "int32")
         for i, j in T.grid(4, 4):
             B[i, j] = T.if_then_else(i == 3 and 2 <= j, 0, 0, dtype="int32")
@@ -681,7 +681,7 @@ class TestPaddedTransformPostProcIfRequiredDueToSideEffects(BasePaddingCompare):
     pad_value = tvm.testing.parameter(0)
     transformed_buffer = tvm.testing.parameter("B")
 
-    def before(A: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer(14, "int32")
         C = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
@@ -690,7 +690,7 @@ def before(A: T.Buffer[14, "int32"]):
                 B[vi] = A[vi]
                 C[vi] = 0
 
-    def expected(A: T.Buffer[14, "int32"]):
+    def expected(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer([4, 4], "int32")
         C = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
@@ -711,13 +711,13 @@ class TestPaddedTransformOfInputCreatesAssumption(BasePaddingCompare):
 
     pad_value = tvm.testing.parameter(42)
 
-    def before(A: T.Buffer[14, "int32"], B: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32"), B: T.Buffer(14, "int32")):
         for i in T.serial(14):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
                 B[vi] = A[vi]
 
-    def expected(A: T.Buffer[(4, 4), "int32"], B: T.Buffer[14, "int32"]):
+    def expected(A: T.Buffer((4, 4), "int32"), B: T.Buffer(14, "int32")):
         for i, j in T.grid(4, 4):
             with T.block("buffer_A_assumption"):
                 vi, vj = T.axis.remap("SS", [i, j])
@@ -750,14 +750,14 @@ def transform(mod):
 
         return transform
 
-    def before(A: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
                 B[vi] = A[vi]
 
-    def expected(A: T.Buffer[14, "int32"]):
+    def expected(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer([4, 4], "int32")
         for i, j in T.grid(4, 4):
             with T.block("block"):
@@ -793,14 +793,14 @@ def transform(mod):
 
         return transform
 
-    def before(A: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
                 B[vi] = A[vi]
 
-    def expected(A: T.Buffer[(4, 4), "int32"]):
+    def expected(A: T.Buffer((4, 4), "int32")):
         for i, j in T.grid(4, 4):
             with T.block("buffer_A_assumption"):
                 vi, vj = T.axis.remap("SS", [i, j])
@@ -842,7 +842,7 @@ def transform(mod):
 
         return transform
 
-    def before(A: T.Buffer[14, "int32"]):
+    def before(A: T.Buffer(14, "int32")):
         B = T.alloc_buffer(14, "int32")
         for i in T.serial(14):
             with T.block("block"):
@@ -872,14 +872,14 @@ def transform(mod):
 
         return transform
 
-    def before(A: T.Buffer[16, "int32"], n: T.int32):
+    def before(A: T.Buffer(16, "int32"), n: T.int32):
         B = T.alloc_buffer(16, "int32")
         for i in T.serial(16):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
                 B[vi] = A[vi]
 
-    def expected(A: T.Buffer[16, "int32"], n: T.int32):
+    def expected(A: T.Buffer(16, "int32"), n: T.int32):
         B = T.alloc_buffer([(-16 % n + 16) // n, n], dtype="int32")
         for i, j in T.grid((-16 % n + 16) // n, n):
             with T.block("block"):
@@ -942,7 +942,7 @@ def test_index_map_dtype_legalize():
     """Test dtype legalization of the index map indices."""
 
     @T.prim_func
-    def func(A: T.Buffer[T.int64(58), "int32"]):
+    def func(A: T.Buffer(T.int64(58), "int32")):
         for i in T.serial(T.int64(58)):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
diff --git a/tests/python/unittest/test_tir_schedule_utilities.py b/tests/python/unittest/test_tir_schedule_utilities.py
index 2f6c2f6a51205..53ee6a58cd9a4 100644
--- a/tests/python/unittest/test_tir_schedule_utilities.py
+++ b/tests/python/unittest/test_tir_schedule_utilities.py
@@ -105,8 +105,8 @@ def matmul_relu_ann2(a: T.handle, b: T.handle, d: T.handle) -> None:
 class ModuleWithMultipleFuncs:
     @T.prim_func
     def vector_add(
-        A: T.Buffer[128, "float32"],
-        B: T.Buffer[128, "float32"],
+        A: T.Buffer(128, "float32"),
+        B: T.Buffer(128, "float32"),
     ) -> None:
         for i in range(128):
             with T.block("init"):
@@ -115,8 +115,8 @@ def vector_add(
 
     @T.prim_func
     def vector_add_2(
-        A: T.Buffer[128, "float32"],
-        B: T.Buffer[128, "float32"],
+        A: T.Buffer(128, "float32"),
+        B: T.Buffer(128, "float32"),
     ) -> None:
         for i in range(128):
             with T.block("init"):
@@ -125,7 +125,7 @@ def vector_add_2(
 
 
 @T.prim_func
-def tuple_reduction(data: T.Buffer[(4, 32), "float32"], T_add: T.Buffer[(4,), "float32"]) -> None:
+def tuple_reduction(data: T.Buffer((4, 32), "float32"), T_add: T.Buffer((4,), "float32")) -> None:
     # function attr dict
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     # body
diff --git a/tests/python/unittest/test_tir_te_extern_primfunc.py b/tests/python/unittest/test_tir_te_extern_primfunc.py
index f6eb2e8a9b860..45ca7a1c72569 100644
--- a/tests/python/unittest/test_tir_te_extern_primfunc.py
+++ b/tests/python/unittest/test_tir_te_extern_primfunc.py
@@ -31,7 +31,7 @@
 
 
 @T.prim_func
-def func_1(A: T.Buffer[(16,), "float32"], C: T.Buffer[(1,), "float32"]):
+def func_1(A: T.Buffer((16,), "float32"), C: T.Buffer((1,), "float32")):
     for i in T.serial(
         0,
         16,
@@ -59,7 +59,7 @@ def verify_func_1(module):
 
 @T.prim_func
 def func_2(
-    C: T.Buffer[(1,), "float32"], A: T.Buffer[(16,), "float32"], D: T.Buffer[(2,), "float32"]
+    C: T.Buffer((1,), "float32"), A: T.Buffer((16,), "float32"), D: T.Buffer((2,), "float32")
 ):
     for i in T.serial(
         0,
@@ -89,11 +89,11 @@ def verify_func_2(module):
 
 @T.prim_func
 def func_3(
-    C: T.Buffer[(1,), "float32"],
-    A: T.Buffer[(16,), "float32"],
-    D: T.Buffer[(2,), "float32"],
-    E: T.Buffer[(16,), "float32"],
-    F: T.Buffer[(16,), "float32"],
+    C: T.Buffer((1,), "float32"),
+    A: T.Buffer((16,), "float32"),
+    D: T.Buffer((2,), "float32"),
+    E: T.Buffer((16,), "float32"),
+    F: T.Buffer((16,), "float32"),
 ):
     for i in T.serial(
         0,
@@ -131,11 +131,11 @@ def verify_func_3(module):
 
 @T.prim_func
 def func_4(
-    C: T.Buffer[(1,), "float32"],
-    A: T.Buffer[(16,), "float32"],
-    F: T.Buffer[(16,), "float32"],
-    D: T.Buffer[(2,), "float32"],
-    E: T.Buffer[(16,), "float32"],
+    C: T.Buffer((1,), "float32"),
+    A: T.Buffer((16,), "float32"),
+    F: T.Buffer((16,), "float32"),
+    D: T.Buffer((2,), "float32"),
+    E: T.Buffer((16,), "float32"),
 ):
     for i in T.serial(
         0,
diff --git a/tests/python/unittest/test_tir_transform_compact_buffer_region.py b/tests/python/unittest/test_tir_transform_compact_buffer_region.py
index 34b3190b9aa27..1a2a47a17043e 100644
--- a/tests/python/unittest/test_tir_transform_compact_buffer_region.py
+++ b/tests/python/unittest/test_tir_transform_compact_buffer_region.py
@@ -447,7 +447,7 @@ def padding_pattern_inlined(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def compacted_padding_pattern_inlined(
-    X: T.Buffer[(224, 224), "float32"], Y: T.Buffer[(224, 224), "float32"]
+    X: T.Buffer((224, 224), "float32"), Y: T.Buffer((224, 224), "float32")
 ) -> None:
     cache = T.alloc_buffer([224, 224], dtype="float32")
     for h, w in T.grid(224, 224):
@@ -561,10 +561,10 @@ def compacted_opaque_access_annotated_func(a: T.handle) -> None:
 
 @T.prim_func
 def sparse_read_cache(
-    A_data: T.Buffer[(819,), "float32"],
-    B: T.Buffer[(128,), "float32"],
-    A_indptr: T.Buffer[(129,), "int32"],
-    A_indices: T.Buffer[(819,), "int32"],
+    A_data: T.Buffer((819,), "float32"),
+    B: T.Buffer((128,), "float32"),
+    A_indptr: T.Buffer((129,), "int32"),
+    A_indices: T.Buffer((819,), "int32"),
 ) -> None:
     for i in T.serial(128):
         with T.block("rowsum_outer"):
@@ -594,10 +594,10 @@ def sparse_read_cache(
 
 @T.prim_func
 def compacted_sparse_read_cache(
-    A_data: T.Buffer[(819,), "float32"],
-    B: T.Buffer[(128,), "float32"],
-    A_indptr: T.Buffer[(129,), "int32"],
-    A_indices: T.Buffer[(819,), "int32"],
+    A_data: T.Buffer((819,), "float32"),
+    B: T.Buffer((128,), "float32"),
+    A_indptr: T.Buffer((129,), "int32"),
+    A_indices: T.Buffer((819,), "int32"),
 ) -> None:
     for i in T.serial(128):
         with T.block("rowsum_outer"):
@@ -626,7 +626,7 @@ def compacted_sparse_read_cache(
 
 
 @T.prim_func
-def narrow_shape(A: T.Buffer[(10,), "float32"], B: T.Buffer[(10,), "float32"]) -> None:
+def narrow_shape(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")) -> None:
     B_cache = T.alloc_buffer(10, "float32")
     for j in T.serial(3):
         for k in T.serial(4):
@@ -638,7 +638,7 @@ def narrow_shape(A: T.Buffer[(10,), "float32"], B: T.Buffer[(10,), "float32"]) -
 
 
 @T.prim_func
-def compacted_narrow_shape(A: T.Buffer[(10,), "float32"], B: T.Buffer[(10,), "float32"]) -> None:
+def compacted_narrow_shape(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")) -> None:
     # body
     # with T.block("root")
     B_cache = T.alloc_buffer([10], dtype="float32")
@@ -756,7 +756,7 @@ def func_with_non_index_let_binding():
 def test_compact_spatial_tiled_pad_and_pooling():
     @T.prim_func
     def spatial_tiled_pad_and_pooling(
-        X: T.Buffer[(64, 112, 112), "int32"], Y: T.Buffer[(64, 56, 56), "int32"]
+        X: T.Buffer((64, 112, 112), "int32"), Y: T.Buffer((64, 56, 56), "int32")
     ) -> None:
         for h_o, w_o in T.grid(14, 14):
             with T.block():
@@ -796,7 +796,7 @@ def spatial_tiled_pad_and_pooling(
 
     @T.prim_func
     def compacted_spatial_tiled_pad_and_pooling(
-        X: T.Buffer[(64, 112, 112), "int32"], Y: T.Buffer[(64, 56, 56), "int32"]
+        X: T.Buffer((64, 112, 112), "int32"), Y: T.Buffer((64, 56, 56), "int32")
     ) -> None:
         for h_o, w_o in T.grid(14, 14):
             with T.block():
@@ -854,7 +854,7 @@ def test_complex_case_1():
 
     # fmt: off
     @T.prim_func
-    def func(A: T.Buffer[(960, 770), "float32"], B: T.Buffer[(770, 2304), "float32"], C: T.Buffer[(960, 2304), "float32"]) -> None:
+    def func(A: T.Buffer((960, 770), "float32"), B: T.Buffer((770, 2304), "float32"), C: T.Buffer((960, 2304), "float32")) -> None:
         for bx in T.thread_binding(144, thread="blockIdx.x"):
             for vx in T.thread_binding(2, thread="vthread.x"):
                 for tx_p in T.thread_binding(256, thread="threadIdx.x"):
@@ -880,7 +880,7 @@ def func(A: T.Buffer[(960, 770), "float32"], B: T.Buffer[(770, 2304), "float32"]
                                         C[(((bx // 18 + 0) * 8 + tx_p // 32) * 8 + i_3) * 2 + i_4, ((bx % 18 * 2 + vx % 2) * 32 + tx_p % 32 + j_3) * 2 + j_4] = C[(((bx // 18 + 0) * 8 + tx_p // 32) * 8 + i_3) * 2 + i_4, ((bx % 18 * 2 + vx % 2) * 32 + tx_p % 32 + j_3) * 2 + j_4] + A_shared[(((bx // 18 + 0) * 8 + tx_p // 32) * 8 + i_3) * 2 + i_4, (k_0 + k_1) * 4 + k_2] * B_shared[(k_0 + k_1) * 4 + k_2, ((bx % 18 * 2 + vx % 2) * 32 + tx_p % 32 + j_3) * 2 + j_4]
 
     @T.prim_func
-    def compacted_func(A: T.Buffer[(960, 770), "float32"], B: T.Buffer[(770, 2304), "float32"], C: T.Buffer[(960, 2304), "float32"]) -> None:
+    def compacted_func(A: T.Buffer((960, 770), "float32"), B: T.Buffer((770, 2304), "float32"), C: T.Buffer((960, 2304), "float32")) -> None:
         for bx in T.thread_binding(144, thread="blockIdx.x"):
             for vx in T.thread_binding(2, thread="vthread.x"):
                 for tx_p in T.thread_binding(256, thread="threadIdx.x"):
@@ -940,9 +940,9 @@ def test_compact_dependent_buffer_indices_of_packed_matmul():
 
     @T.prim_func
     def nonuniform_packed_matmul_write_cache(
-        A: T.Buffer[(1020, 64), "float32"],
-        B: T.Buffer[(1000, 64), "float32"],
-        C: T.Buffer[(1020, 1000), "float32"],
+        A: T.Buffer((1020, 64), "float32"),
+        B: T.Buffer((1000, 64), "float32"),
+        C: T.Buffer((1020, 1000), "float32"),
     ):
         for i0, i1 in T.grid(4, 1):
             with T.block():
@@ -977,9 +977,9 @@ def nonuniform_packed_matmul_write_cache(
 
     @T.prim_func
     def nonuniform_packed_matmul_write_cache_compacted(
-        A: T.Buffer[(1020, 64), "float32"],
-        B: T.Buffer[(1000, 64), "float32"],
-        C: T.Buffer[(1020, 1000), "float32"],
+        A: T.Buffer((1020, 64), "float32"),
+        B: T.Buffer((1000, 64), "float32"),
+        C: T.Buffer((1020, 1000), "float32"),
     ) -> None:
         for i0, i1 in T.grid(4, 1):
             with T.block():
diff --git a/tests/python/unittest/test_tir_transform_convert_blocks_to_opaque.py b/tests/python/unittest/test_tir_transform_convert_blocks_to_opaque.py
index 297943bc13817..73b5203b56f0b 100644
--- a/tests/python/unittest/test_tir_transform_convert_blocks_to_opaque.py
+++ b/tests/python/unittest/test_tir_transform_convert_blocks_to_opaque.py
@@ -85,7 +85,7 @@ def test_lower_te():
 class TestErrorIfPredicateUsesBlockVariables(tvm.testing.CompareBeforeAfter):
     transform = tvm.tir.transform.ConvertBlocksToOpaque()
 
-    def before(A: T.Buffer[8, "int32"]):
+    def before(A: T.Buffer(8, "int32")):
         for i in T.serial(8):
             with T.block():
                 vi = T.axis.remap("S", [i])
diff --git a/tests/python/unittest/test_tir_transform_flatten_buffer.py b/tests/python/unittest/test_tir_transform_flatten_buffer.py
index 12523fbdb2aec..c68dbd9ada6d1 100644
--- a/tests/python/unittest/test_tir_transform_flatten_buffer.py
+++ b/tests/python/unittest/test_tir_transform_flatten_buffer.py
@@ -32,7 +32,7 @@ class BaseCompare(tvm.testing.CompareBeforeAfter):
 class TestElementwise(BaseCompare):
     """2-d buffers are flattened to 1-d"""
 
-    def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         for i in T.serial(0, 16):
             B_new = T.decl_buffer([1, 16], "float32")
             for j in T.serial(0, 16):
@@ -40,7 +40,7 @@ def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
             for j in T.serial(0, 16):
                 C[i, j] = B_new[0, j] * 2.0
 
-    def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
+    def expected(input_A: T.Buffer((16, 16), "float32"), input_C: T.Buffer((16, 16), "float32")):
         A = T.Buffer(256, dtype="float32", data=input_A.data)
         C = T.Buffer(256, dtype="float32", data=input_C.data)
         for i in T.serial(0, 16):
@@ -62,7 +62,7 @@ class TestElementwiseWithoutDeclBuffer(BaseCompare):
     memory, and should be flattened to a 1-d allocation.
     """
 
-    def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         for i in T.serial(0, 16):
             B_new_data = T.allocate([1, 16], "float32", "global")
             B_new = T.Buffer([1, 16], "float32", data=B_new_data)
@@ -71,7 +71,7 @@ def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
             for j in T.serial(0, 16):
                 C[i, j] = B_new[0, j] * 2.0
 
-    def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
+    def expected(input_A: T.Buffer((16, 16), "float32"), input_C: T.Buffer((16, 16), "float32")):
         A = T.Buffer(256, dtype="float32", data=input_A.data)
         C = T.Buffer(256, dtype="float32", data=input_C.data)
         for i in T.serial(0, 16):
@@ -86,7 +86,7 @@ def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16),
 class TestGPU(BaseCompare):
     """Buffer flattening may have indices based on GPU thread vars"""
 
-    def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         i0 = T.env_thread("blockIdx.x")
         i1 = T.env_thread("threadIdx.x")
         i2 = T.env_thread("vthread")
@@ -100,7 +100,7 @@ def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
         for j in range(0, 16):
             C[i0 * 4 + i1 * 2 + i2, j] = B[0, j] * 2.0
 
-    def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
+    def expected(input_A: T.Buffer((16, 16), "float32"), input_C: T.Buffer((16, 16), "float32")):
         A = T.Buffer(256, dtype="float32", data=input_A.data)
         C = T.Buffer(256, dtype="float32", data=input_C.data)
 
@@ -151,7 +151,7 @@ def expected(a: T.handle, c: T.handle, n: T.int32, m: T.int32) -> None:
 class TestMultiAlloc(BaseCompare):
     """If multiple allocations occur, all are flattened."""
 
-    def before(A: T.Buffer[(4, 32), "float32"], D: T.Buffer[(4, 32), "float32"]):
+    def before(A: T.Buffer((4, 32), "float32"), D: T.Buffer((4, 32), "float32")):
         for i, j in T.grid(4, 32):
             B = T.decl_buffer((4, 32), "float32", scope="global")
             C = T.decl_buffer((4, 32), "float32", scope="global")
@@ -159,7 +159,7 @@ def before(A: T.Buffer[(4, 32), "float32"], D: T.Buffer[(4, 32), "float32"]):
             C[i, j] = A[i, j] + B[i, j]
             D[i, j] = C[i, j] * 2.0
 
-    def expected(input_A: T.Buffer[(4, 32), "float32"], input_D: T.Buffer[(4, 32), "float32"]):
+    def expected(input_A: T.Buffer((4, 32), "float32"), input_D: T.Buffer((4, 32), "float32")):
         A = T.Buffer(128, "float32", data=input_A.data)
         D = T.Buffer(128, "float32", data=input_D.data)
 
@@ -176,7 +176,7 @@ def expected(input_A: T.Buffer[(4, 32), "float32"], input_D: T.Buffer[(4, 32), "
 class TestStrided(BaseCompare):
     """Indices for flattened buffers use the specified striding."""
 
-    def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         for i0 in T.serial(4):
             B = T.decl_buffer([4, 17], "float32")
             B_1 = T.Buffer([4, 16], dtype="float32", data=B.data, strides=[17, 1])
@@ -185,7 +185,7 @@ def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
             for i1, j in T.grid(4, 16):
                 C[i0 * 4 + i1, j] = B_1[i1, j] * 2.0
 
-    def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
+    def expected(input_A: T.Buffer((16, 16), "float32"), input_C: T.Buffer((16, 16), "float32")):
         A = T.Buffer(256, dtype="float32", data=input_A.data)
         C = T.Buffer(256, dtype="float32", data=input_C.data)
         for i0 in T.serial(0, 4):
@@ -202,11 +202,11 @@ def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16),
 class TestBoolean(BaseCompare):
     """Boolean buffers should be replaced by a backing int8 array"""
 
-    def before(A: T.Buffer[10, "bool"], B: T.Buffer[10, "bool"]) -> None:
+    def before(A: T.Buffer(10, "bool"), B: T.Buffer(10, "bool")) -> None:
         for i0 in T.serial(10):
             B[i0] = A[i0]
 
-    def expected(input_A: T.Buffer[10, "bool"], input_B: T.Buffer[10, "bool"]) -> None:
+    def expected(input_A: T.Buffer(10, "bool"), input_B: T.Buffer(10, "bool")) -> None:
         A = T.Buffer(10, dtype="int8", data=input_A.data)
         B = T.Buffer(10, dtype="int8", data=input_B.data)
         # body
diff --git a/tests/python/unittest/test_tir_transform_helpers.py b/tests/python/unittest/test_tir_transform_helpers.py
index 01496e0e0fc13..f8dc0f682d06b 100644
--- a/tests/python/unittest/test_tir_transform_helpers.py
+++ b/tests/python/unittest/test_tir_transform_helpers.py
@@ -25,7 +25,7 @@ def test_annotate_entry_func_single_primfunc():
     @tvm.script.ir_module
     class MockModule:
         @T.prim_func
-        def func1(A: T.Buffer[(16,), "float32"]):
+        def func1(A: T.Buffer((16,), "float32")):
             for i in T.serial(16):
                 if i == 5:
                     if i == 5:
@@ -46,14 +46,14 @@ def func1(A: T.Buffer[(16,), "float32"]):
 @tvm.script.ir_module
 class MockModule:
     @T.prim_func
-    def func1(A: T.Buffer[(16,), "float32"]):
+    def func1(A: T.Buffer((16,), "float32")):
         for i in T.serial(16):
             if i == 5:
                 if i == 5:
                     A[i] = 0.0
 
     @T.prim_func
-    def func2(A: T.Buffer[(32,), "float32"]):
+    def func2(A: T.Buffer((32,), "float32")):
         for i in T.serial(32):
             if i == 15:
                 if i == 15:
diff --git a/tests/python/unittest/test_tir_transform_hoist_expression.py b/tests/python/unittest/test_tir_transform_hoist_expression.py
index 8b7fc98bfdcf7..77862f64d6291 100644
--- a/tests/python/unittest/test_tir_transform_hoist_expression.py
+++ b/tests/python/unittest/test_tir_transform_hoist_expression.py
@@ -59,13 +59,13 @@ class TestHoistToTop(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(16,), "float32"], n: T.int32):
+    def before(A: T.Buffer((16,), "float32"), n: T.int32):
         for i in T.serial(16):
             if n != 0:
                 A[i] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(16,), "float32"], n: T.int32):
+    def expected(A: T.Buffer((16,), "float32"), n: T.int32):
         if n != 0:
             for i in T.serial(16):
                 A[i] = 0.0
@@ -78,7 +78,7 @@ class TestSuppressHoistIfElse(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(16,), "float32"], n: T.int32):
+    def before(A: T.Buffer((16,), "float32"), n: T.int32):
         for i in T.serial(16):
             if n != 0:
                 A[i] = 0.0
@@ -88,7 +88,7 @@ def before(A: T.Buffer[(16,), "float32"], n: T.int32):
 
 class TestHoistBlockVar(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def before(A: T.Buffer((128, 16), "float32"), n: T.int32):
         i = T.env_thread("threadIdx.x")
         T.launch_thread(i, 128)
 
@@ -97,7 +97,7 @@ def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
                 A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def expected(A: T.Buffer((128, 16), "float32"), n: T.int32):
         i = T.env_thread("threadIdx.x")
         T.launch_thread(i, 128)
 
@@ -112,7 +112,7 @@ class TestSuppressHoistBlockVar(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def before(A: T.Buffer((128, 16), "float32"), n: T.int32):
         thread_x = T.env_thread("threadIdx.x")
         T.launch_thread(thread_x, 128)
 
@@ -126,7 +126,7 @@ def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
 
 class TestHoistAcrossBlockVar(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def before(A: T.Buffer((128, 16), "float32"), n: T.int32):
         thread_x = T.env_thread("threadIdx.x")
         T.launch_thread(thread_x, 128)
 
@@ -136,7 +136,7 @@ def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
                     A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def expected(A: T.Buffer((128, 16), "float32"), n: T.int32):
         thread_x = T.env_thread("threadIdx.x")
 
         if n == 0:
@@ -152,7 +152,7 @@ class TestSuppressHoistAcrossBlockVar(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def before(A: T.Buffer((128, 16), "float32"), n: T.int32):
         thread_x = T.env_thread("threadIdx.x")
         T.launch_thread(thread_x, 128)
 
@@ -162,7 +162,7 @@ def before(A: T.Buffer[(128, 16), "float32"], n: T.int32):
                     A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(128, 16), "float32"], n: T.int32):
+    def expected(A: T.Buffer((128, 16), "float32"), n: T.int32):
         thread_x = T.env_thread("threadIdx.x")
 
         T.launch_thread(thread_x, 128)
@@ -174,14 +174,14 @@ def expected(A: T.Buffer[(128, 16), "float32"], n: T.int32):
 
 class TestHoistToMiddle(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 if i < 3:
                     A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 3:
                 for j in T.serial(4):
@@ -190,7 +190,7 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistWithLet(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 condition = i < 3
@@ -198,7 +198,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
                     A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             condition = i < 3
             if condition:
@@ -216,7 +216,7 @@ class TestHoistDisableLet(BaseBeforeAfter):
     hoisted_let_bindings = tvm.testing.parameter(HoistedLetBindings.Never)
 
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 condition = i < 3
@@ -228,7 +228,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistIfElse(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 if i < 3:
@@ -237,7 +237,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
                     A[i, j] = 1.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 3:
                 for j in T.serial(4):
@@ -249,7 +249,7 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistSequentialAssign(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"], B: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32"), B: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 if i < 3:
@@ -260,7 +260,7 @@ def before(A: T.Buffer[(4, 4), "float32"], B: T.Buffer[(4, 4), "float32"]):
                     B[i, j] = 1.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"], B: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32"), B: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 3:
                 for j in T.serial(4):
@@ -274,7 +274,7 @@ def expected(A: T.Buffer[(4, 4), "float32"], B: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistMultiIf(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 for k in T.serial(4):
@@ -283,7 +283,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
                             A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 2:
                 for j in T.serial(4):
@@ -294,13 +294,13 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistComplexConditional(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j, k in T.grid(4, 4, 4):
             if j < 3 and i < 2:
                 A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 2:
                 for j in T.serial(4):
@@ -315,13 +315,13 @@ class TestSuppressSplittingConditional(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j, k in T.grid(4, 4, 4):
             if j < 3 and i < 2:
                 A[i, j] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i, j in T.grid(4, 4):
             if j < 3 and i < 2:
                 for k in T.serial(4):
@@ -330,7 +330,7 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistMultiIfElse(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 for k in T.serial(4):
@@ -346,7 +346,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
                             A[i, j] = 3.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 2:
                 for j in T.serial(4):
@@ -368,7 +368,7 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistMultiIfElseDifferentBranches(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             for j in T.serial(4):
                 for k in T.serial(4):
@@ -384,7 +384,7 @@ def before(A: T.Buffer[(4, 4), "float32"]):
                             A[i, j] = 3.0
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 2:
                 if i < 1:
@@ -415,12 +415,12 @@ def expected(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistIfElseExpr(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j in T.grid(4, 4):
             A[i, j] = T.if_then_else(i < 2, 1.0, 2.0, dtype="float32")
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             if i < 2:
                 for j in T.serial(4):
@@ -436,7 +436,7 @@ class TestSuppressHoistIfElseExpr(TestHoistIfElseExpr):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j in T.grid(4, 4):
             A[i, j] = T.if_then_else(i < 2, 1.0, 2.0, dtype="float32")
 
@@ -445,13 +445,13 @@ def before(A: T.Buffer[(4, 4), "float32"]):
 
 class TestHoistLetExpr(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j in T.grid(4, 4):
             x = T.var("float32")
             A[i, j] = T.Let(x, T.cast(i + 1, "float32"), 5.0 * x + T.cast(j, "float32"))
 
     @T.prim_func
-    def expected(A: T.Buffer[(4, 4), "float32"]):
+    def expected(A: T.Buffer((4, 4), "float32")):
         for i in T.serial(4):
             x = T.cast(i + 1, "float32")
             for j in T.serial(4):
@@ -464,7 +464,7 @@ class TestSuppressHoistLetExpr(BaseBeforeAfter):
     )
 
     @T.prim_func
-    def before(A: T.Buffer[(4, 4), "float32"]):
+    def before(A: T.Buffer((4, 4), "float32")):
         for i, j in T.grid(4, 4):
             x = T.var("float32")
             A[i, j] = T.Let(x, T.cast(i + 1, "float32"), 5.0 * x + T.cast(j, "float32"))
diff --git a/tests/python/unittest/test_tir_transform_inject_ptx_async_copy.py b/tests/python/unittest/test_tir_transform_inject_ptx_async_copy.py
index adf3d9da05cea..fca88594c0e00 100644
--- a/tests/python/unittest/test_tir_transform_inject_ptx_async_copy.py
+++ b/tests/python/unittest/test_tir_transform_inject_ptx_async_copy.py
@@ -37,7 +37,7 @@ def generate_global_to_shared_vectorized_copy(dtype, vector_size):
 
     @T.prim_func
     def ptx_global_to_shared_copy(
-        A: T.Buffer[(32, 128), dtype], B: T.Buffer[(32, 128), dtype]
+        A: T.Buffer((32, 128), dtype), B: T.Buffer((32, 128), dtype)
     ) -> None:
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         bx = T.env_thread("blockIdx.x")
@@ -65,7 +65,7 @@ def ptx_global_to_shared_copy(
 
 @T.prim_func
 def ptx_global_to_shared_copy_fp32x1(
-    A: T.Buffer[(32, 128), "float32"], B: T.Buffer[(32, 128), "float32"]
+    A: T.Buffer((32, 128), "float32"), B: T.Buffer((32, 128), "float32")
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     bx = T.env_thread("blockIdx.x")
@@ -90,9 +90,9 @@ def ptx_global_to_shared_copy_fp32x1(
 
 @T.prim_func
 def ptx_global_to_shared_dyn_copy_fp16x8(
-    A: T.Buffer[(32, 128), "float16"],
-    B: T.Buffer[(32, 128), "float16"],
-    C: T.Buffer[(32, 128), "float16"],
+    A: T.Buffer((32, 128), "float16"),
+    B: T.Buffer((32, 128), "float16"),
+    C: T.Buffer((32, 128), "float16"),
 ) -> None:
     T.func_attr({"global_symbol": "main", "tir.noalias": True})
     bx = T.env_thread("blockIdx.x")
diff --git a/tests/python/unittest/test_tir_transform_inject_software_pipeline.py b/tests/python/unittest/test_tir_transform_inject_software_pipeline.py
index cf01d7700725f..1e5fd8843ba31 100644
--- a/tests/python/unittest/test_tir_transform_inject_software_pipeline.py
+++ b/tests/python/unittest/test_tir_transform_inject_software_pipeline.py
@@ -50,7 +50,7 @@ def _check_error(func):
 
 
 @T.prim_func
-def trivial_pipeline(A: T.Buffer[(16, 1), "float32"], C: T.Buffer[(16, 1), "float32"]):
+def trivial_pipeline(A: T.Buffer((16, 1), "float32"), C: T.Buffer((16, 1), "float32")):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
             0, 1, annotations={"software_pipeline_stage": [0, 1], "software_pipeline_order": [0, 1]}
@@ -71,7 +71,7 @@ def trivial_pipeline(A: T.Buffer[(16, 1), "float32"], C: T.Buffer[(16, 1), "floa
 
 @T.prim_func
 def transformed_trivial_pipeline(
-    A: T.Buffer[(16, 1), "float32"], C: T.Buffer[(16, 1), "float32"]
+    A: T.Buffer((16, 1), "float32"), C: T.Buffer((16, 1), "float32")
 ) -> None:
     for tx in T.thread_binding(16, thread="threadIdx.x"):
         with T.block():
@@ -94,7 +94,7 @@ def transformed_trivial_pipeline(
 
 def gen_simple_compute(num_stages):
     @T.prim_func
-    def simple_compute(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def simple_compute(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
             for i in T.serial(
                 0,
@@ -122,7 +122,7 @@ def simple_compute(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "floa
 
 @T.prim_func
 def transformed_simple_compute(
-    A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -153,7 +153,7 @@ def transformed_simple_compute(
 
 @T.prim_func
 def simple_compute_with_other_annotation(
-    A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -181,7 +181,7 @@ def simple_compute_with_other_annotation(
 
 @T.prim_func
 def transformed_simple_compute_with_other_annotation(
-    A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -215,7 +215,7 @@ def transformed_simple_compute_with_other_annotation(
 
 
 @T.prim_func
-def three_stage_compute(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]):
+def three_stage_compute(A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
             0,
@@ -246,7 +246,7 @@ def three_stage_compute(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16),
 
 @T.prim_func
 def transformed_three_stage_compute(
-    A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(16, thread="threadIdx.x"):
         with T.block():
@@ -300,9 +300,9 @@ def transformed_three_stage_compute(
 
 @T.prim_func
 def dag_interleaving(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -344,9 +344,9 @@ def dag_interleaving(
 
 @T.prim_func
 def transformed_dag_interleaving(
-    A: T.Buffer[(16, 16), "float32"],
-    B: T.Buffer[(16, 16), "float32"],
-    C: T.Buffer[(16, 16), "float32"],
+    A: T.Buffer((16, 16), "float32"),
+    B: T.Buffer((16, 16), "float32"),
+    C: T.Buffer((16, 16), "float32"),
 ) -> None:
     for tx in T.thread_binding(16, thread="threadIdx.x"):
         with T.block():
@@ -409,7 +409,7 @@ def transformed_dag_interleaving(
 
 @T.prim_func
 def nested_pipeline_simple(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -453,7 +453,7 @@ def nested_pipeline_simple(
 
 @T.prim_func
 def transformed_nested_pipeline_simple(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -530,7 +530,7 @@ def transformed_nested_pipeline_simple(
 
 @T.prim_func
 def nested_pipeline_prefetch_inner(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -574,7 +574,7 @@ def nested_pipeline_prefetch_inner(
 
 @T.prim_func
 def transformed_nested_pipeline_prefetch_inner(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -654,7 +654,7 @@ def transformed_nested_pipeline_prefetch_inner(
 
 @T.prim_func
 def nested_pipeline_interleaving(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -704,7 +704,7 @@ def nested_pipeline_interleaving(
 
 @T.prim_func
 def transformed_nested_pipeline_interleaving(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -813,7 +813,7 @@ def transformed_nested_pipeline_interleaving(
 
 @T.prim_func
 def nested_pipeline_double_buffer(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -864,7 +864,7 @@ def nested_pipeline_double_buffer(
 
 @T.prim_func
 def transformed_nested_pipeline_double_buffer(
-    A: T.Buffer[(16, 16, 16), "float32"], C: T.Buffer[(16, 16, 16), "float32"]
+    A: T.Buffer((16, 16, 16), "float32"), C: T.Buffer((16, 16, 16), "float32")
 ) -> None:
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         with T.block():
@@ -977,7 +977,7 @@ def transformed_nested_pipeline_double_buffer(
 
 @T.prim_func
 def simple_compute_incorrect_reorder(
-    A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -1009,7 +1009,7 @@ def simple_compute_incorrect_reorder(
 
 @T.prim_func
 def simple_compute_conflicting_order(
-    A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(
@@ -1041,7 +1041,7 @@ def simple_compute_conflicting_order(
 
 @T.prim_func
 def simple_compute_missing_annotation(
-    A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")
 ):
     for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
         for i in T.serial(0, 16, annotations={"software_pipeline_stage": [0, 1]}):
@@ -1116,7 +1116,7 @@ def test_simple_compute_async():
     mod = tvm.tir.transform.InjectSoftwarePipeline()(sch.mod)
 
     @T.prim_func
-    def ref(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
+    def ref(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")):
         for tx in T.thread_binding(16, thread="threadIdx.x"):
             with T.block():
                 T.reads(A[tx, 0:16])
@@ -1163,7 +1163,7 @@ def ref(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
     mod = tvm.tir.transform.InjectSoftwarePipeline()(sch.mod)
 
     @T.prim_func
-    def ref(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]) -> None:
+    def ref(A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")) -> None:
         for tx in T.thread_binding(16, thread="threadIdx.x"):
             with T.block():
                 T.reads(A[tx, 0:16])
@@ -1216,9 +1216,9 @@ def ref(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]) -> N
 def test_async_producer_interleaving():
     @T.prim_func
     def simple_compute(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 16), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 16), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ):
         for tx in T.thread_binding(0, 16, thread="threadIdx.x"):
             for i in range(16):
@@ -1251,9 +1251,9 @@ def simple_compute(
 
     @T.prim_func
     def ref(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 16), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 16), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ) -> None:
         for tx in T.thread_binding(16, thread="threadIdx.x"):
             with T.block():
@@ -1330,7 +1330,7 @@ def test_three_stage_compute_two_stage_async():
     mod = tvm.tir.transform.InjectSoftwarePipeline()(sch.mod)
 
     @T.prim_func
-    def ref(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]) -> None:
+    def ref(A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")) -> None:
         for tx in T.thread_binding(16, thread="threadIdx.x"):
             with T.block():
                 T.reads(A[tx, 0:16])
diff --git a/tests/python/unittest/test_tir_transform_loop_partition.py b/tests/python/unittest/test_tir_transform_loop_partition.py
index 1a40f52140ee9..b88f8d1e3e72a 100644
--- a/tests/python/unittest/test_tir_transform_loop_partition.py
+++ b/tests/python/unittest/test_tir_transform_loop_partition.py
@@ -541,7 +541,7 @@ def test_simple_rfactor():
 
 @T.prim_func
 def partitioned_concat(
-    A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"], C: T.Buffer[(32,), "float32"]
+    A: T.Buffer((16,), "float32"), B: T.Buffer((16,), "float32"), C: T.Buffer((32,), "float32")
 ) -> None:
     T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
     for i in T.serial(0, 16):
@@ -578,10 +578,10 @@ def partition_from_scheduled_tir(prim_func, pass_cfg):
 
 @T.prim_func
 def partitioned_concat_3(
-    placeholder: T.Buffer[(1, 64, 28, 28), "int8"],
-    placeholder_1: T.Buffer[(1, 32, 28, 28), "int8"],
-    placeholder_2: T.Buffer[(1, 32, 28, 28), "int8"],
-    T_concat: T.Buffer[(1, 128, 28, 28), "int8"],
+    placeholder: T.Buffer((1, 64, 28, 28), "int8"),
+    placeholder_1: T.Buffer((1, 32, 28, 28), "int8"),
+    placeholder_2: T.Buffer((1, 32, 28, 28), "int8"),
+    T_concat: T.Buffer((1, 128, 28, 28), "int8"),
 ) -> None:
     placeholder_flat = T.Buffer([50176], "int8", data=placeholder.data)
     placeholder_1_flat = T.Buffer([25088], "int8", data=placeholder_1.data)
@@ -597,10 +597,10 @@ def partitioned_concat_3(
 
 @T.prim_func
 def concat_func_3(
-    placeholder: T.Buffer[(1, 64, 28, 28), "int8"],
-    placeholder_1: T.Buffer[(1, 32, 28, 28), "int8"],
-    placeholder_2: T.Buffer[(1, 32, 28, 28), "int8"],
-    T_concat: T.Buffer[(1, 128, 28, 28), "int8"],
+    placeholder: T.Buffer((1, 64, 28, 28), "int8"),
+    placeholder_1: T.Buffer((1, 32, 28, 28), "int8"),
+    placeholder_2: T.Buffer((1, 32, 28, 28), "int8"),
+    T_concat: T.Buffer((1, 128, 28, 28), "int8"),
 ) -> None:
     placeholder_flat = T.Buffer([50176], "int8", data=placeholder.data)
     placeholder_1_flat = T.Buffer([25088], "int8", data=placeholder_1.data)
@@ -630,7 +630,7 @@ def test_condition_mutually_exclusive():
 def test_loop_partition_unroll_hint():
     @T.prim_func
     def main(
-        A_arg: T.Buffer[(1, 3, 224, 224), "int8"], B_arg: T.Buffer[(1, 224, 7, 16), "int8"]
+        A_arg: T.Buffer((1, 3, 224, 224), "int8"), B_arg: T.Buffer((1, 224, 7, 16), "int8")
     ) -> None:
         A = T.Buffer(150528, "int8", data=A_arg.data)
         B = T.Buffer(25088, "int8", data=B_arg.data)
@@ -644,7 +644,7 @@ def main(
 
     @T.prim_func
     def partitioned_main(
-        A_arg: T.Buffer[(1, 3, 224, 224), "int8"], B_arg: T.Buffer[(1, 224, 7, 16), "int8"]
+        A_arg: T.Buffer((1, 3, 224, 224), "int8"), B_arg: T.Buffer((1, 224, 7, 16), "int8")
     ) -> None:
         A = T.Buffer(150528, dtype="int8", data=A_arg.data)
         B = T.Buffer(25088, dtype="int8", data=B_arg.data)
@@ -748,7 +748,7 @@ def partitioned_main():
 
 def test_loop_partition_keep_loop_annotations():
     @T.prim_func
-    def before(A: T.Buffer[160, "int32"], B: T.Buffer[160, "int32"]) -> None:
+    def before(A: T.Buffer(160, "int32"), B: T.Buffer(160, "int32")) -> None:
         for i in T.serial(
             160,
             annotations={"pragma_loop_partition_hint": True, "key": "value"},
@@ -761,7 +761,7 @@ def before(A: T.Buffer[160, "int32"], B: T.Buffer[160, "int32"]) -> None:
                 B[i] = A[i] + 3
 
     @T.prim_func
-    def after(A: T.Buffer[160, "int32"], B: T.Buffer[160, "int32"]) -> None:
+    def after(A: T.Buffer(160, "int32"), B: T.Buffer(160, "int32")) -> None:
         for i in T.serial(10, annotations={"key": "value"}):
             B[i] = A[i] + 1
         for i in T.serial(140, annotations={"key": "value"}):
@@ -783,10 +783,10 @@ def after(A: T.Buffer[160, "int32"], B: T.Buffer[160, "int32"]) -> None:
 def test_loop_partition_with_unit_loop_in_condition():
     @T.prim_func
     def before(
-        placeholder: T.Buffer[(50176,), "int8"],
-        placeholder_1: T.Buffer[(25088,), "int8"],
-        placeholder_2: T.Buffer[(25088,), "int8"],
-        T_concat: T.Buffer[(100352,), "int8"],
+        placeholder: T.Buffer((50176,), "int8"),
+        placeholder_1: T.Buffer((25088,), "int8"),
+        placeholder_2: T.Buffer((25088,), "int8"),
+        T_concat: T.Buffer((100352,), "int8"),
     ) -> None:
         for k in range(1, annotations={"preserve_unit_loop": True}):
             for i1 in T.serial(128, annotations={"pragma_loop_partition_hint": 1}):
@@ -804,10 +804,10 @@ def before(
 
     @T.prim_func
     def after(
-        placeholder: T.Buffer[50176, "int8"],
-        placeholder_1: T.Buffer[25088, "int8"],
-        placeholder_2: T.Buffer[25088, "int8"],
-        T_concat: T.Buffer[100352, "int8"],
+        placeholder: T.Buffer(50176, "int8"),
+        placeholder_1: T.Buffer(25088, "int8"),
+        placeholder_2: T.Buffer(25088, "int8"),
+        T_concat: T.Buffer(100352, "int8"),
     ) -> None:
         for _ in T.serial(1, annotations={"preserve_unit_loop": True}):
             for i1, i2, i3 in T.grid(64, 28, 28):
diff --git a/tests/python/unittest/test_tir_transform_lower_cross_thread_reduction.py b/tests/python/unittest/test_tir_transform_lower_cross_thread_reduction.py
index 2bf898e66b083..dc0a6ce62a661 100644
--- a/tests/python/unittest/test_tir_transform_lower_cross_thread_reduction.py
+++ b/tests/python/unittest/test_tir_transform_lower_cross_thread_reduction.py
@@ -333,7 +333,7 @@ def lowered_with_block_predicate(a: T.handle, b: T.handle) -> None:
 
 @T.prim_func
 def single_reduction_loop_with_block_predicate(
-    A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+    A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
 ) -> None:
     T_softmax_maxelem_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
     T_softmax_expsum_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
@@ -383,7 +383,7 @@ def single_reduction_loop_with_block_predicate(
 
 @T.prim_func
 def lowered_single_reduction_loop_with_block_predicate(
-    A: T.Buffer[(256, 256), "float32"], T_softmax_norm: T.Buffer[(256, 256), "float32"]
+    A: T.Buffer((256, 256), "float32"), T_softmax_norm: T.Buffer((256, 256), "float32")
 ) -> None:
     T_softmax_maxelem_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
     T_softmax_expsum_shared = T.alloc_buffer([256], dtype="float32", scope="shared")
@@ -489,9 +489,9 @@ def lowered_single_reduction_loop_with_block_predicate(
 
 @T.prim_func
 def single_reduction_loop_with_tensorize(
-    input_A: T.Buffer[(1, 64, 7, 7, 32), "uint8"],
-    input_B: T.Buffer[(16, 64, 1, 1, 8, 32, 4), "int8"],
-    output: T.Buffer[(1, 16, 7, 7, 32), "int32"],
+    input_A: T.Buffer((1, 64, 7, 7, 32), "uint8"),
+    input_B: T.Buffer((16, 64, 1, 1, 8, 32, 4), "int8"),
+    output: T.Buffer((1, 16, 7, 7, 32), "int32"),
 ) -> None:
     # body
     # with T.block("root")
@@ -550,9 +550,9 @@ def single_reduction_loop_with_tensorize(
 
 @T.prim_func
 def nested_reduction_loop_with_inner_match_buffers(
-    in0: T.Buffer[(4, 16), "int8"],
-    in1: T.Buffer[(4, 16), "int8"],
-    out: T.Buffer[(4, 4), "int32"],
+    in0: T.Buffer((4, 16), "int8"),
+    in1: T.Buffer((4, 16), "int8"),
+    out: T.Buffer((4, 4), "int32"),
 ) -> None:
     # body
     # with T.block("root")
@@ -935,10 +935,10 @@ def lowered_softmax(var_A: T.handle, var_T_softmax_norm: T.handle) -> None:
 
 @T.prim_func
 def argmax_split(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0 in T.grid(128, 4):
         for i1_1 in T.thread_binding(32, thread="threadIdx.x"):
@@ -960,10 +960,10 @@ def argmax_split(
 
 @T.prim_func
 def lowered_argmax_split(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmax_v0: T.Buffer[(128,), "int32"],
-    argmax_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmax_v0: T.Buffer((128,), "int32"),
+    argmax_v1: T.Buffer((128,), "float32"),
 ) -> None:
     cross_thread_argmax_v0 = T.alloc_buffer([1], dtype="int32", strides=[1], scope="local")
     cross_thread_argmax_v1 = T.alloc_buffer([1], dtype="float32", strides=[1], scope="local")
@@ -1026,10 +1026,10 @@ def lowered_argmax_split(
 
 @T.prim_func
 def argmin_split_init_update_reordered(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmin_v0: T.Buffer[(128,), "int32"],
-    argmin_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmin_v0: T.Buffer((128,), "int32"),
+    argmin_v1: T.Buffer((128,), "float32"),
 ) -> None:
     for i0, i1_0 in T.grid(128, 4):
         for i1_1 in T.thread_binding(32, thread="threadIdx.x"):
@@ -1051,10 +1051,10 @@ def argmin_split_init_update_reordered(
 
 @T.prim_func
 def lowered_argmin_split_init_update_reordered(
-    idx: T.Buffer[(128, 128), "int32"],
-    val: T.Buffer[(128, 128), "float32"],
-    argmin_v0: T.Buffer[(128,), "int32"],
-    argmin_v1: T.Buffer[(128,), "float32"],
+    idx: T.Buffer((128, 128), "int32"),
+    val: T.Buffer((128, 128), "float32"),
+    argmin_v0: T.Buffer((128,), "int32"),
+    argmin_v1: T.Buffer((128,), "float32"),
 ) -> None:
     cross_thread_argmin_v0 = T.alloc_buffer([1], dtype="int32", strides=[1], scope="local")
     cross_thread_argmin_v1 = T.alloc_buffer([1], dtype="float32", strides=[1], scope="local")
@@ -1117,10 +1117,10 @@ def lowered_argmin_split_init_update_reordered(
 
 @T.prim_func
 def layer_norm_tuple_sum(
-    data: T.Buffer[(128, 768), "float32"],
-    gamma: T.Buffer[768, "float32"],
-    bias: T.Buffer[768, "float32"],
-    T_layer_norm: T.Buffer[(128, 768), "float32"],
+    data: T.Buffer((128, 768), "float32"),
+    gamma: T.Buffer(768, "float32"),
+    bias: T.Buffer(768, "float32"),
+    T_layer_norm: T.Buffer((128, 768), "float32"),
 ) -> None:
     data_red_temp_v0 = T.alloc_buffer([128], dtype="float32")
     data_red_temp_v1 = T.alloc_buffer([128], dtype="float32")
@@ -1172,10 +1172,10 @@ def layer_norm_tuple_sum(
 
 @T.prim_func
 def lowered_layer_norm_tuple_sum(
-    data: T.Buffer[(128, 768), "float32"],
-    gamma: T.Buffer[768, "float32"],
-    bias: T.Buffer[768, "float32"],
-    T_layer_norm: T.Buffer[(128, 768), "float32"],
+    data: T.Buffer((128, 768), "float32"),
+    gamma: T.Buffer(768, "float32"),
+    bias: T.Buffer(768, "float32"),
+    T_layer_norm: T.Buffer((128, 768), "float32"),
 ) -> None:
     # with T.block("root")
     data_red_temp_v0 = T.alloc_buffer([128], dtype="float32")
diff --git a/tests/python/unittest/test_tir_transform_lower_opaque_block.py b/tests/python/unittest/test_tir_transform_lower_opaque_block.py
index 824cef174055c..a7502edd31ab7 100644
--- a/tests/python/unittest/test_tir_transform_lower_opaque_block.py
+++ b/tests/python/unittest/test_tir_transform_lower_opaque_block.py
@@ -236,7 +236,7 @@ def compacted_strided_buffer_func(a: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def transformed_strided_buffer_func(
-    A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]
+    A: T.Buffer((16, 16), "float32"), C: T.Buffer((16, 16), "float32")
 ) -> None:
     # body
     for i0 in T.serial(4):
@@ -256,7 +256,7 @@ def annotated_loops(a: T.handle) -> None:
 
 
 @T.prim_func
-def boolean_handling_before(a: T.Buffer[10, "bool"], b: T.Buffer[10, "bool"]) -> None:
+def boolean_handling_before(a: T.Buffer(10, "bool"), b: T.Buffer(10, "bool")) -> None:
     for i0 in T.serial(10):
         with T.block("b"):
             T.reads(a[i0])
@@ -265,7 +265,7 @@ def boolean_handling_before(a: T.Buffer[10, "bool"], b: T.Buffer[10, "bool"]) ->
 
 
 @T.prim_func
-def boolean_handling_after(a: T.Buffer[10, "bool"], b: T.Buffer[10, "bool"]) -> None:
+def boolean_handling_after(a: T.Buffer(10, "bool"), b: T.Buffer(10, "bool")) -> None:
     # body
     for i0 in T.serial(10):
         b[i0] = a[i0]
@@ -342,14 +342,14 @@ def annotated_block() -> None:
 
 def test_preserved_annotations():
     @T.prim_func
-    def before(A: T.Buffer[8, "float32"], B: T.Buffer[8, "float32"]):
+    def before(A: T.Buffer(8, "float32"), B: T.Buffer(8, "float32")):
         for i in T.serial(8, annotations={"k_0": 1, "k_1": [2, 3], "k_2": 3.14}):
             with T.block("block"):
                 T.block_attr({"k_3": "oops"})
                 B[i] = A[i] + 1.0
 
     @T.prim_func
-    def after(A: T.Buffer[8, "float32"], B: T.Buffer[8, "float32"]):
+    def after(A: T.Buffer(8, "float32"), B: T.Buffer(8, "float32")):
         for i in T.serial(8, annotations={"k_0": 1, "k_1": [2, 3], "k_2": 3.14}):
             B[i] = A[i] + 1.0
 
diff --git a/tests/python/unittest/test_tir_transform_manifest_shared_memory_local_stage.py b/tests/python/unittest/test_tir_transform_manifest_shared_memory_local_stage.py
index 111b91d5fd54a..15d7118fb8a95 100644
--- a/tests/python/unittest/test_tir_transform_manifest_shared_memory_local_stage.py
+++ b/tests/python/unittest/test_tir_transform_manifest_shared_memory_local_stage.py
@@ -26,7 +26,7 @@
 @tvm.script.ir_module
 class MatmulBefore:
     @T.prim_func
-    def main(A: T.Buffer[(1024, 1024), "float32"], B: T.Buffer[(1024, 1024), "float32"], C: T.Buffer[(1024, 1024), "float32"]) -> None:
+    def main(A: T.Buffer((1024, 1024), "float32"), B: T.Buffer((1024, 1024), "float32"), C: T.Buffer((1024, 1024), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "default_function", "tir.noalias": True})
         # body
@@ -67,7 +67,7 @@ def main(A: T.Buffer[(1024, 1024), "float32"], B: T.Buffer[(1024, 1024), "float3
 @tvm.script.ir_module
 class MatmulAfter:
     @T.prim_func
-    def main(A: T.Buffer[(1024, 1024), "float32"], B: T.Buffer[(1024, 1024), "float32"], C: T.Buffer[(1024, 1024), "float32"]) -> None:
+    def main(A: T.Buffer((1024, 1024), "float32"), B: T.Buffer((1024, 1024), "float32"), C: T.Buffer((1024, 1024), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "default_function", "tir.noalias": True})
         # body
diff --git a/tests/python/unittest/test_tir_transform_narrow_datatype.py b/tests/python/unittest/test_tir_transform_narrow_datatype.py
index c9c5133785956..56b63c889335a 100644
--- a/tests/python/unittest/test_tir_transform_narrow_datatype.py
+++ b/tests/python/unittest/test_tir_transform_narrow_datatype.py
@@ -117,7 +117,7 @@ def test_thread_axis_2():
     @tvm.script.ir_module
     class Before:
         @T.prim_func
-        def main(T_reshape: T.Buffer[(1, 12, 384, 384), "float32"], placeholder_1: T.Buffer[(T.int64(1), T.int64(12), T.int64(384), 384), "bool"], T_where: T.Buffer[(T.int64(1), T.int64(12), T.int64(384), 384), "float32"]) -> None:
+        def main(T_reshape: T.Buffer((1, 12, 384, 384), "float32"), placeholder_1: T.Buffer((T.int64(1), T.int64(12), T.int64(384), 384), "bool"), T_where: T.Buffer((T.int64(1), T.int64(12), T.int64(384), 384), "float32")) -> None:
             # function attr dict
             T.func_attr({"global_symbol": "main", "tir.noalias": True})
             # body
@@ -299,7 +299,7 @@ def test_ramp_dtype_consistency():
 
 def test_condition():
     @T.prim_func
-    def before(A: T.Buffer[(128,), "float32"], B: T.Buffer[(130,), "float32"]):
+    def before(A: T.Buffer((128,), "float32"), B: T.Buffer((130,), "float32")):
         for i, j in T.grid(T.int64(2), T.int64(65)):
             if i * T.int64(65) + j >= T.int64(0) and i * T.int64(65) + j < T.int64(128):
                 A[i * T.int64(65) + j] = 0.0
@@ -312,7 +312,7 @@ def before(A: T.Buffer[(128,), "float32"], B: T.Buffer[(130,), "float32"]):
             )
 
     @T.prim_func
-    def expected_after(A: T.Buffer[128, "float32"], B: T.Buffer[130, "float32"]):
+    def expected_after(A: T.Buffer(128, "float32"), B: T.Buffer(130, "float32")):
         for i, j in T.grid(2, 65):
             if i * 65 + j >= 0 and i * 65 + j < 128:
                 A[i * 65 + j] = T.float32(0)
@@ -327,7 +327,7 @@ def expected_after(A: T.Buffer[128, "float32"], B: T.Buffer[130, "float32"]):
 
 def test_block():
     @T.prim_func
-    def before(A: T.Buffer[(128,), "float32"], B: T.Buffer[(128,), "float32"]):
+    def before(A: T.Buffer((128,), "float32"), B: T.Buffer((128,), "float32")):
         for i in T.serial(0, T.int64(16)):
             for j in T.serial(0, T.int64(8)):
                 with T.block():
@@ -335,7 +335,7 @@ def before(A: T.Buffer[(128,), "float32"], B: T.Buffer[(128,), "float32"]):
                     B[vi] = A[vi] + T.float32(1)
 
     @T.prim_func
-    def expected_after(A: T.Buffer[(128,), "float32"], B: T.Buffer[(128,), "float32"]):
+    def expected_after(A: T.Buffer((128,), "float32"), B: T.Buffer((128,), "float32")):
         for i in T.serial(0, T.int32(16)):
             for j in T.serial(0, T.int32(8)):
                 with T.block():
diff --git a/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py b/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py
index e1750e73b43e9..05d71de5bca69 100644
--- a/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py
+++ b/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py
@@ -254,7 +254,7 @@ def test_loop_carried_dependency():
     and the allocate buffer should keep the order."""
 
     @T.prim_func
-    def before(A: T.Buffer[(8, 8, 8), "int32"], B: T.Buffer[(8, 8, 8), "int32"]):
+    def before(A: T.Buffer((8, 8, 8), "int32"), B: T.Buffer((8, 8, 8), "int32")):
         C = T.alloc_buffer([8, 8, 8], dtype="int32")
         D = T.alloc_buffer([8, 8, 8], dtype="int32")
         for i in T.serial(8):
@@ -278,7 +278,7 @@ def before(A: T.Buffer[(8, 8, 8), "int32"], B: T.Buffer[(8, 8, 8), "int32"]):
                         )
 
     @T.prim_func
-    def after(A: T.Buffer[(8, 8, 8), "int32"], B: T.Buffer[(8, 8, 8), "int32"]) -> None:
+    def after(A: T.Buffer((8, 8, 8), "int32"), B: T.Buffer((8, 8, 8), "int32")) -> None:
         for i in T.serial(8):
             with T.block():
                 T.reads(A[i, 0:8, 0:8])
@@ -312,7 +312,7 @@ def test_1D_cascade_op_rolling_buffer():
     which is marked as opaque in consumer block's iter mappings."""
 
     @T.prim_func
-    def before(A: T.Buffer[(4, 16), "int32"], C: T.Buffer[(4, 8), "int32"]):
+    def before(A: T.Buffer((4, 16), "int32"), C: T.Buffer((4, 8), "int32")):
         B = T.alloc_buffer((4, 6), "int32")
         for c in T.serial(4):
             for i in T.serial(0, 2):
@@ -338,7 +338,7 @@ def before(A: T.Buffer[(4, 16), "int32"], C: T.Buffer[(4, 8), "int32"]):
                             )
 
     @T.prim_func
-    def after(A: T.Buffer[(4, 16), "int32"], C: T.Buffer[(4, 8), "int32"]):
+    def after(A: T.Buffer((4, 16), "int32"), C: T.Buffer((4, 8), "int32")):
         for c in T.serial(4):
             with T.block():
                 T.reads(A[c, 0:12], C[c, 0:8])
diff --git a/tests/python/unittest/test_tir_transform_reduce_branching_through_overcompute.py b/tests/python/unittest/test_tir_transform_reduce_branching_through_overcompute.py
index 13fbcc7594ec1..ae82afa650e3b 100644
--- a/tests/python/unittest/test_tir_transform_reduce_branching_through_overcompute.py
+++ b/tests/python/unittest/test_tir_transform_reduce_branching_through_overcompute.py
@@ -46,7 +46,7 @@ class TestIntroduceNoOp(BaseBeforeAfter):
     to the then_case, then the conditional can be removed.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 14:
                 A[i] = 1
@@ -54,7 +54,7 @@ def before(A: T.Buffer[16, "int32"]):
             else:
                 A[i] = 1
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 1
             T.evaluate(0)
@@ -70,12 +70,12 @@ class TestIntroduceAdditionOfZero(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         for i in T.serial(16):
             if i > 0:
                 A[0] = A[0] + i * i
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         for i in T.serial(16):
             A[0] = A[0] + i * i
 
@@ -89,7 +89,7 @@ class TestIntroduceAdditionOfKnownZeroInBuffer(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"], B: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(16, "int32"), B: T.Buffer(1, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(i < 14 or A[i] == 0))
 
@@ -98,7 +98,7 @@ def before(A: T.Buffer[16, "int32"], B: T.Buffer[1, "int32"]):
             if i < 14:
                 B[0] = B[0] + A[i]
 
-    def expected(A: T.Buffer[16, "int32"], B: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(16, "int32"), B: T.Buffer(1, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(i < 14 or A[i] == 0))
 
@@ -118,7 +118,7 @@ class TestIntroduceOverwrittenWrite(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 14:
                 A[i] = 1
@@ -127,7 +127,7 @@ def before(A: T.Buffer[16, "int32"]):
             if i >= 14:
                 A[i] = 2
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 1
 
@@ -145,7 +145,7 @@ class TestMaintainValuesUsedLater(BaseBeforeAfter):
     not be valid.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 14:
                 A[i] = 1
@@ -167,7 +167,7 @@ class TestIdentifyOverwrittenWriteFromEquivalentExpressions(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 14:
                 A[i] = 1
@@ -176,7 +176,7 @@ def before(A: T.Buffer[16, "int32"]):
             if io == 3 and ii >= 2:
                 A[4 * io + ii] = 2
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 1
 
@@ -197,7 +197,7 @@ class TestIntroduceSupersetOverwrittenWrite(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 14:
                 A[i] = 1
@@ -206,7 +206,7 @@ def before(A: T.Buffer[16, "int32"]):
             if i >= 14:
                 A[i] = 2
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 1
 
diff --git a/tests/python/unittest/test_tir_transform_remove_assume.py b/tests/python/unittest/test_tir_transform_remove_assume.py
index a2d68a0757902..542d5c7a10207 100644
--- a/tests/python/unittest/test_tir_transform_remove_assume.py
+++ b/tests/python/unittest/test_tir_transform_remove_assume.py
@@ -30,25 +30,25 @@ def transform(self):
 class TestRemoveAssume(BaseBeforeAfter):
     """Remove any instance of T.assume"""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 5))
         A[0] = 10
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         A[0] = 10
 
 
 class TestRemoveAssumeLoop(BaseBeforeAfter):
     """Loops containing only T.assume should be removed"""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == 0))
 
         for i in T.serial(16):
             A[i] = 10
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 10
 
diff --git a/tests/python/unittest/test_tir_transform_remove_no_op.py b/tests/python/unittest/test_tir_transform_remove_no_op.py
index 06d9289aa7952..15c5a577f9f54 100644
--- a/tests/python/unittest/test_tir_transform_remove_no_op.py
+++ b/tests/python/unittest/test_tir_transform_remove_no_op.py
@@ -74,7 +74,7 @@ def test_remove_no_op():
 
 def test_remove_no_op_with_invalid_extent():
     @T.prim_func
-    def main(A: T.Buffer[(16), "int32"], B: T.Buffer[(16), "int32"]) -> None:
+    def main(A: T.Buffer((16), "int32"), B: T.Buffer((16), "int32")) -> None:
         for i in T.serial(16):
             for j in T.serial(i - 20):
                 B[i] = A[i] + j
@@ -115,23 +115,23 @@ def expected():
 class TestRemoveZeroExtentLoop(BaseBeforeAfter):
     """A for-loop with no extent is a no-op."""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(0):
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         T.evaluate(0)
 
 
 class TestRemoveUnusedLet(BaseBeforeAfter):
     """A let statement that is never used is a no-op."""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         x = 5
         for i in T.serial(16):
             A[i] = 0
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 0
 
@@ -143,12 +143,12 @@ class TestRemoveLetUsedOnlyInNoOp(BaseBeforeAfter):
     may have been removed by an earlier removal of another no-op.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         x = 5
         for i in T.serial(0):
             A[i] = x
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         T.evaluate(0)
 
 
@@ -166,14 +166,14 @@ def expected():
 class TestRemoveEmptyThenCase(BaseBeforeAfter):
     """A no-op then_case can be removed."""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 8:
                 T.evaluate(0)
             else:
                 A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if not (i < 8):
                 A[i] = 42
@@ -182,14 +182,14 @@ def expected(A: T.Buffer[16, "int32"]):
 class TestRemoveEmptyElseCase(BaseBeforeAfter):
     """A no-op else_case can be removed."""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 8:
                 A[i] = 42
             else:
                 T.evaluate(0)
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 8:
                 A[i] = 42
@@ -200,12 +200,12 @@ class TestRemoveUnusedWrite(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 42
 
@@ -218,7 +218,7 @@ class TestSuppressRemovalOfUnusedWrite(BaseBeforeAfter):
 
     use_dataflow_analysis = False
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
             A[i] = 42
@@ -231,12 +231,12 @@ class TestKeepSideEffectsOfUnusedWrite(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = T.call_extern("extern_func", dtype="int32")
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.call_extern("extern_func", dtype="int32"))
             A[i] = 42
@@ -245,7 +245,7 @@ def expected(A: T.Buffer[16, "int32"]):
 class TestKeepFirstWriteWhenUsed(BaseBeforeAfter):
     """For two sequential writes, keep the first if it is used"""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
             A[i] = A[i] + 1
@@ -261,14 +261,14 @@ class TestRemoveOverwrittenLoop(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
 
         for i in T.serial(16):
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 42
 
@@ -283,14 +283,14 @@ class TestRemoveOverwrittenSubloop(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(4, 12):
             A[i] = 100
 
         for i in T.serial(16):
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 42
 
@@ -302,7 +302,7 @@ class TestKeepPartiallyOverwrittenLoop(BaseBeforeAfter):
     may not be removed be kept.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
 
@@ -323,7 +323,7 @@ class TestRemoveOverwrittenPredicatedLoopWithIdenticalCondition(BaseBeforeAfter)
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 12:
                 A[i] = 100
@@ -332,7 +332,7 @@ def before(A: T.Buffer[16, "int32"]):
             if i < 12:
                 A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 12:
                 A[i] = 42
@@ -351,7 +351,7 @@ class TestRemoveOverwrittenPredicatedLoopWithProvableCondition(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 10:
                 A[i] = 100
@@ -360,7 +360,7 @@ def before(A: T.Buffer[16, "int32"]):
             if i // 4 < 3:
                 A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i // 4 < 3:
                 A[i] = 42
@@ -375,7 +375,7 @@ class TestRemoveSeparatedOverwrites(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 100
 
@@ -385,7 +385,7 @@ def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
         for i in T.serial(16):
             A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             B[i] = 0
 
@@ -404,7 +404,7 @@ class TestRemoveSeparatedOverwriteOfPredicatedLoop(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i < 12:
                 A[i] = 100
@@ -417,7 +417,7 @@ def before(A: T.Buffer[16, "int32"]):
             if i < 12:
                 A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i > 12:
                 A[i] = 15
@@ -430,17 +430,17 @@ def expected(A: T.Buffer[16, "int32"]):
 class TestRemoveReadWrite(BaseBeforeAfter):
     """Writing a value to the same location as was just read is a no-op."""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = A[0]
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(0)
 
 
 class TestKeepReadWriteToDifferentIndices(BaseBeforeAfter):
     """Writing a value to a different index should not be removed"""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(15):
             A[i] = A[i + 1]
 
@@ -455,12 +455,12 @@ class TestRemoveReadWriteSameIndexDifferentExpression(BaseBeforeAfter):
     expression.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for io, ii in T.grid(4, 4):
             i = 4 * io + ii
             A[4 * io + ii] = A[i]
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         T.evaluate(0)
 
 
@@ -472,14 +472,14 @@ class TestRemoveReadWriteSameIndexUsingConstraint(BaseBeforeAfter):
     that is known from a conditional containing the read/write.
     """
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i != 0:
                 A[i] = A[i - 1]
             else:
                 A[i] = A[0]
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i != 0:
                 A[i] = A[i - 1]
@@ -490,13 +490,13 @@ class TestRemoveWritingOfKnownValue(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
         A[4] = 4
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
@@ -513,14 +513,14 @@ class TestKeepOneOfDuplicateLoops(BaseBeforeAfter):
 
     use_dataflow_analysis = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
         for i in T.serial(16):
             A[i] = i
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
@@ -540,12 +540,12 @@ def expected():
 class TestRemoveUnusedTemporary(BaseBeforeAfter):
     """An unused allocation is a no-op."""
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         B = T.allocate([16], "int32", "local")
         for i in T.serial(16):
             A[i] = 1
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = 1
 
@@ -566,7 +566,7 @@ def expected():
 class TestKeepUsedWriteIntoTemporary(BaseBeforeAfter):
     """A write into a temporary that is used later must be kept."""
 
-    def before(B: T.Buffer[16, "int32"]):
+    def before(B: T.Buffer(16, "int32")):
         A = T.decl_buffer([16], "int32", scope="local")
         for i in T.serial(16):
             A[i] = 0
@@ -581,7 +581,7 @@ def before(B: T.Buffer[16, "int32"]):
 class TestRemoveWriteIntoTemporary(BaseBeforeAfter):
     """A write that only impacts a temporary allocation is a no-op."""
 
-    def before(A: T.Buffer[16, "int32"], C: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(16, "int32"), C: T.Buffer(1, "int32")):
         B = T.decl_buffer([16], "int32", scope="local")
         for i in T.serial(16):
             B[i] = A[i]
@@ -593,7 +593,7 @@ def before(A: T.Buffer[16, "int32"], C: T.Buffer[1, "int32"]):
         for i in T.serial(16):
             B[i] = 0
 
-    def expected(A: T.Buffer[16, "int32"], C: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(16, "int32"), C: T.Buffer(1, "int32")):
         B = T.decl_buffer([16], "int32", scope="local")
         for i in T.serial(16):
             B[i] = A[i]
diff --git a/tests/python/unittest/test_tir_transform_remove_undef.py b/tests/python/unittest/test_tir_transform_remove_undef.py
index c634bf5e9da89..66a6198723c1e 100644
--- a/tests/python/unittest/test_tir_transform_remove_undef.py
+++ b/tests/python/unittest/test_tir_transform_remove_undef.py
@@ -30,27 +30,27 @@ def transform(self):
 class TestRemoveStoreUndef(BaseBeforeAfter):
     """Remove a store whose value is T.undef()"""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = T.undef(dtype="int32")
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(0)
 
 
 class TestRemoveStoreUndefExpression(BaseBeforeAfter):
     """Expressions containing T.undef() are removed"""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = 1 + T.undef(dtype="int32")
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(0)
 
 
 class TestKeepOtherCallNodes(BaseBeforeAfter):
     """Expressions containing other CallNodes are not removed"""
 
-    def before(A: T.Buffer[1, "int32"], n: T.int32):
+    def before(A: T.Buffer(1, "int32"), n: T.int32):
         A[0] = T.shift_left(n, 1, dtype="int32")
 
     expected = before
@@ -59,18 +59,18 @@ def before(A: T.Buffer[1, "int32"], n: T.int32):
 class TestRemoveLetUndef(BaseBeforeAfter):
     """Remove a store whose value is bound to T.undef()"""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         val = T.undef(dtype="int32")
         A[0] = val
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(0)
 
 
 class TestRaiseErrorForUndefAsStoreIndices(BaseBeforeAfter):
     """Use of T.undef() as buffer indices is an error"""
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         val = T.undef(dtype="int32")
         A[val] = 5
 
@@ -84,7 +84,7 @@ class TestRaiseErrorForUndefAsLoadIndices(BaseBeforeAfter):
     T.undef() may not appear in a buffer's indices.
     """
 
-    def before(A: T.Buffer[1, "int32"], B: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32"), B: T.Buffer(1, "int32")):
         B[0] = A[T.undef(dtype="int32")]
 
     expected = TVMError
diff --git a/tests/python/unittest/test_tir_transform_remove_weight_layout_rewrite_block.py b/tests/python/unittest/test_tir_transform_remove_weight_layout_rewrite_block.py
index 7a014283816fd..6d6e0da71cc50 100644
--- a/tests/python/unittest/test_tir_transform_remove_weight_layout_rewrite_block.py
+++ b/tests/python/unittest/test_tir_transform_remove_weight_layout_rewrite_block.py
@@ -35,9 +35,9 @@ def _check(before, expect):
 def test_matmul():
     @T.prim_func
     def before(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 16), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 16), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [1]})
         B_ = T.alloc_buffer([16, 4, 4], dtype="float32")
@@ -61,9 +61,9 @@ def before(
 
     @T.prim_func
     def after(
-        A: T.Buffer[(16, 16), "float32"],
-        B: T.Buffer[(16, 4, 4), "float32"],
-        C: T.Buffer[(16, 16), "float32"],
+        A: T.Buffer((16, 16), "float32"),
+        B: T.Buffer((16, 4, 4), "float32"),
+        C: T.Buffer((16, 16), "float32"),
     ) -> None:
         T.func_attr({"layout_free_buffers": [1]})
         for i0_o, i1_o in T.grid(16, 16):
diff --git a/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py b/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py
index 5cdc272440e78..057cfc42e4ecb 100644
--- a/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py
+++ b/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py
@@ -25,7 +25,7 @@
 @tvm.script.ir_module
 class Before:
     @T.prim_func
-    def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def main(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         inputs_flat = T.Buffer([8192], dtype="float32", data=inputs.data)
@@ -56,7 +56,7 @@ def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 51
 @tvm.script.ir_module
 class After:
     @T.prim_func
-    def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def main(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         inputs_flat = T.Buffer([8192], dtype="float32", data=inputs.data)
@@ -87,7 +87,7 @@ def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 51
 @tvm.script.ir_module
 class After_simplified:
     @T.prim_func
-    def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
+    def main(inputs: T.Buffer((1, 4, 4, 512), "float32"), weight: T.Buffer((4, 4, 512, 256), "float32"), conv2d_transpose_nhwc: T.Buffer((1, 8, 8, 256), "float32")) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
         # var definition
diff --git a/tests/python/unittest/test_tir_transform_simplify.py b/tests/python/unittest/test_tir_transform_simplify.py
index 1ddc0e50d98fb..b50035aa69d46 100644
--- a/tests/python/unittest/test_tir_transform_simplify.py
+++ b/tests/python/unittest/test_tir_transform_simplify.py
@@ -164,10 +164,10 @@ def inner(mod):
 class TestLoadStoreNoop(BaseBeforeAfter):
     """Store of a value that was just read from the same location is a no-op."""
 
-    def before(A: T.Buffer[(1,), "float32"]):
+    def before(A: T.Buffer((1,), "float32")):
         A[0] = A[0]
 
-    def expected(A: T.Buffer[(1,), "float32"]):
+    def expected(A: T.Buffer((1,), "float32")):
         T.evaluate(0)
 
 
@@ -180,10 +180,10 @@ class TestLoadStoreNoopAfterSimplify(BaseBeforeAfter):
     regression.
     """
 
-    def before(A: T.Buffer[(1,), "float32"]):
+    def before(A: T.Buffer((1,), "float32")):
         A[0] = A[0] + (5.0 - 5.0)
 
-    def expected(A: T.Buffer[(1,), "float32"]):
+    def expected(A: T.Buffer((1,), "float32")):
         T.evaluate(0)
 
 
@@ -195,13 +195,13 @@ class TestNestedCondition(BaseBeforeAfter):
     constraint.
     """
 
-    def before(A: T.Buffer[(16,), "float32"]):
+    def before(A: T.Buffer((16,), "float32")):
         for i in T.serial(16):
             if i == 5:
                 if i == 5:
                     A[i] = 0.0
 
-    def expected(A: T.Buffer[(16,), "float32"]):
+    def expected(A: T.Buffer((16,), "float32")):
         for i in T.serial(16):
             if i == 5:
                 A[i] = 0.0
@@ -214,13 +214,13 @@ class TestNestedProvableCondition(BaseBeforeAfter):
     conditional.
     """
 
-    def before(A: T.Buffer[(16,), "float32"]):
+    def before(A: T.Buffer((16,), "float32")):
         for i in T.serial(16):
             if i == 5:
                 if i < 7:
                     A[i] = 0.0
 
-    def expected(A: T.Buffer[(16,), "float32"]):
+    def expected(A: T.Buffer((16,), "float32")):
         for i in T.serial(16):
             if i == 5:
                 A[i] = 0.0
@@ -233,13 +233,13 @@ class TestNestedVarCondition(BaseBeforeAfter):
     constraint.
     """
 
-    def before(A: T.Buffer[(16,), "float32"], n: T.int32):
+    def before(A: T.Buffer((16,), "float32"), n: T.int32):
         for i in T.serial(16):
             if i == n:
                 if i == n:
                     A[i] = 0.0
 
-    def expected(A: T.Buffer[(16,), "float32"], n: T.int32):
+    def expected(A: T.Buffer((16,), "float32"), n: T.int32):
         for i in T.serial(16):
             if i == n:
                 A[i] = 0.0
@@ -254,7 +254,7 @@ class TestAlteredBufferContents(BaseBeforeAfter):
     may not.
     """
 
-    def before(A: T.Buffer[(1,), "int32"], n: T.int32):
+    def before(A: T.Buffer((1,), "int32"), n: T.int32):
         if A[0] == n:
             A[0] = A[0] + 1
             if A[0] == n:
@@ -270,7 +270,7 @@ class TestNegationOfCondition(BaseBeforeAfter):
     condition is known to be false.
     """
 
-    def before(A: T.Buffer[(16,), "int32"]):
+    def before(A: T.Buffer((16,), "int32")):
         for i in T.serial(16):
             if i == 5:
                 if i != 5:
@@ -278,7 +278,7 @@ def before(A: T.Buffer[(16,), "int32"]):
                 else:
                     A[i] = 1
 
-    def expected(A: T.Buffer[(16,), "int32"]):
+    def expected(A: T.Buffer((16,), "int32")):
         for i in T.serial(16):
             if i == 5:
                 A[i] = 1
@@ -293,7 +293,7 @@ class TestNegationOfNotEqual(BaseBeforeAfter):
     ``i==5`` as the negation of a literal constraint.
     """
 
-    def before(A: T.Buffer[(16,), "int32"]):
+    def before(A: T.Buffer((16,), "int32")):
         for i in T.serial(16):
             if i != 5:
                 if i == 5:
@@ -301,7 +301,7 @@ def before(A: T.Buffer[(16,), "int32"]):
                 else:
                     A[i] = 1
 
-    def expected(A: T.Buffer[(16,), "int32"]):
+    def expected(A: T.Buffer((16,), "int32")):
         for i in T.serial(16):
             if i != 5:
                 A[i] = 1
@@ -314,7 +314,7 @@ class TestNegationOfVarCondition(BaseBeforeAfter):
     must rely on RewriteSimplifier recognizing the repeated literal.
     """
 
-    def before(A: T.Buffer[(16,), "int32"], n: T.int32):
+    def before(A: T.Buffer((16,), "int32"), n: T.int32):
         for i in T.serial(16):
             if i == n:
                 if i != n:
@@ -322,7 +322,7 @@ def before(A: T.Buffer[(16,), "int32"], n: T.int32):
                 else:
                     A[i] = 1
 
-    def expected(A: T.Buffer[(16,), "int32"], n: T.int32):
+    def expected(A: T.Buffer((16,), "int32"), n: T.int32):
         for i in T.serial(16):
             if i == n:
                 A[i] = 1
@@ -337,13 +337,13 @@ class TestLiteralConstraintSplitBooleanAnd(BaseBeforeAfter):
     the condition is to ensure we exercise RewriteSimplifier.
     """
 
-    def before(A: T.Buffer[(16, 16), "int32"], n: T.int32):
+    def before(A: T.Buffer((16, 16), "int32"), n: T.int32):
         for i, j in T.grid(16, 16):
             if i == n and j == n:
                 if i == n:
                     A[i, j] = 0
 
-    def expected(A: T.Buffer[(16, 16), "int32"], n: T.int32):
+    def expected(A: T.Buffer((16, 16), "int32"), n: T.int32):
         for i, j in T.grid(16, 16):
             if i == n and j == n:
                 A[i, j] = 0
@@ -360,7 +360,7 @@ class TestLiteralConstraintSplitBooleanOr(BaseBeforeAfter):
     RewriteSimplifier.
     """
 
-    def before(A: T.Buffer[(16, 16), "int32"], n: T.int32):
+    def before(A: T.Buffer((16, 16), "int32"), n: T.int32):
         for i, j in T.grid(16, 16):
             if i == n or j == n:
                 A[i, j] = 0
@@ -370,7 +370,7 @@ def before(A: T.Buffer[(16, 16), "int32"], n: T.int32):
                 else:
                     A[i, j] = 2
 
-    def expected(A: T.Buffer[(16, 16), "int32"], n: T.int32):
+    def expected(A: T.Buffer((16, 16), "int32"), n: T.int32):
         for i, j in T.grid(16, 16):
             if i == n or j == n:
                 A[i, j] = 0
@@ -387,14 +387,14 @@ class TestProveConditionUsingLet(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[4, "bool"]):
+    def before(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             if condition or i >= 3:
                 A[i] = condition
 
     @T.prim_func
-    def expected(A: T.Buffer[4, "bool"]):
+    def expected(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             A[i] = condition
@@ -409,7 +409,7 @@ class TestProveLetCondition(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[4, "bool"]):
+    def before(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             if i < 3:
@@ -417,7 +417,7 @@ def before(A: T.Buffer[4, "bool"]):
                     A[i] = condition
 
     @T.prim_func
-    def expected(A: T.Buffer[4, "bool"]):
+    def expected(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             if i < 3:
@@ -432,7 +432,7 @@ class TestProveRepeatedLetCondition(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[4, "bool"]):
+    def before(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             if condition:
@@ -440,7 +440,7 @@ def before(A: T.Buffer[4, "bool"]):
                     A[i] = condition
 
     @T.prim_func
-    def expected(A: T.Buffer[4, "bool"]):
+    def expected(A: T.Buffer(4, "bool")):
         for i in T.serial(4):
             condition = i < 3
             if condition:
@@ -449,13 +449,13 @@ def expected(A: T.Buffer[4, "bool"]):
 
 class TestIfThenElseExpr(BaseBeforeAfter):
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if i < 12:
                 A[i] = T.if_then_else(i < 12, 1.0, 2.0, dtype="float32")
 
     @T.prim_func
-    def expected(A: T.Buffer[16, "float32"]):
+    def expected(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if i < 12:
                 A[i] = 1.0
@@ -465,13 +465,13 @@ class TestCeilLog2Int(BaseBeforeAfter):
     """Simplify expressions resulting from topi.math.ceil_log2"""
 
     @T.prim_func
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = T.cast(
             T.ceil(T.log2(T.cast(14, "float64"), dtype="float64"), dtype="float64"), dtype="int32"
         )
 
     @T.prim_func
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         A[0] = 4
 
 
@@ -479,7 +479,7 @@ class TestLeftCeilLog2LowerBound(BaseBeforeAfter):
     """Integer bounds are propagated through topi.math.ceil_log2"""
 
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             x = T.cast(
                 T.ceil(T.log2(T.cast(i + 1024 + 1, "float64"), dtype="float64"), dtype="float64"),
@@ -489,7 +489,7 @@ def before(A: T.Buffer[16, "float32"]):
                 A[i] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[16, "float32"]):
+    def expected(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             A[i] = 0.0
 
@@ -503,13 +503,13 @@ class TestLeftShiftLowerBound(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if T.shift_left(1, i, dtype="int32") >= 1:
                 A[i] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[16, "float32"]):
+    def expected(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             A[i] = 0.0
 
@@ -523,13 +523,13 @@ class TestLeftShiftUpperBound(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if T.shift_left(31, i, dtype="int32") <= 1015808:
                 A[i] = 0.0
 
     @T.prim_func
-    def expected(A: T.Buffer[16, "float32"]):
+    def expected(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             A[i] = 0.0
 
@@ -543,7 +543,7 @@ class TestLeftShiftOfNegativeValue(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if -64 <= T.shift_left(-i, 4, dtype="int32"):
                 A[i] = 0.0
@@ -560,7 +560,7 @@ class TestLeftShiftByNegativeValue(BaseBeforeAfter):
     """
 
     @T.prim_func
-    def before(A: T.Buffer[16, "float32"]):
+    def before(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             if T.shift_left(16, -i, dtype="int32") <= 16:
                 A[i] = 0.0
@@ -651,7 +651,7 @@ def before(self, test_case):
         priors, postulate, _ = test_case
 
         @T.prim_func
-        def func(A: T.Buffer[1, "bool"]):
+        def func(A: T.Buffer(1, "bool")):
             if priors:
                 A[0] = postulate
 
@@ -667,7 +667,7 @@ def expected(self, test_case):
         if provable:
 
             @T.prim_func
-            def func(A: T.Buffer[1, "bool"]):
+            def func(A: T.Buffer(1, "bool")):
                 if priors:
                     A[0] = True
 
@@ -677,7 +677,7 @@ def func(A: T.Buffer[1, "bool"]):
             postulate = analyzer.canonical_simplify(postulate)
 
             @T.prim_func
-            def func(A: T.Buffer[1, "bool"]):
+            def func(A: T.Buffer(1, "bool")):
                 if priors:
                     A[0] = postulate
 
@@ -687,7 +687,7 @@ def func(A: T.Buffer[1, "bool"]):
 class TestSuppressTransitivelyProvableCondition(BaseBeforeAfter):
     transitively_prove_inequalities = False
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         if i < j and j < k:
             A[0] = i < k
 
@@ -699,10 +699,10 @@ class TestRewriteAsAndOfOrs(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[3, "bool"]):
+    def before(A: T.Buffer(3, "bool")):
         T.evaluate(A[0] or (A[1] and A[2]))
 
-    def expected(A: T.Buffer[3, "bool"]):
+    def expected(A: T.Buffer(3, "bool")):
         T.evaluate((A[0] or A[1]) and (A[0] or A[2]))
 
 
@@ -711,7 +711,7 @@ class TestSuppressRewriteAsAndOfOrs(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = False
 
-    def before(A: T.Buffer[3, "bool"]):
+    def before(A: T.Buffer(3, "bool")):
         T.evaluate(A[0] or (A[1] and A[2]))
 
     expected = before
@@ -729,10 +729,10 @@ class TestRewriteAsAndOfOrsWithTopLevelAnd(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[4, "bool"]):
+    def before(A: T.Buffer(4, "bool")):
         T.evaluate((A[0] or A[1]) and (A[1] or (A[0] and A[2] and A[3])))
 
-    def expected(A: T.Buffer[4, "bool"]):
+    def expected(A: T.Buffer(4, "bool")):
         # If the simplification is applied to the OrNode, then a
         # redundant `(A[1] or A[0])` would't be canceled out.  When
         # applying SimplifyAsAndOfOrs to the top-level AndNode, the
@@ -760,10 +760,10 @@ class TestRewriteAsAndOfOrsWithSimplificationBetweenGroups(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (i == 0 or j == 10 or k == 20) and (i == 0 or j == 10 or k != 30)
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = i == 0 or j == 10 or k == 20
 
 
@@ -777,10 +777,10 @@ class TestRewriteAsAndOfOrsWithSimplificationBetweenReorderedGroups(BaseBeforeAf
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (i == 0 or j == 10 or k == 20) and (j == 10 or k != 30 or i == 0)
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = j == 10 or k == 20 or i == 0
 
 
@@ -794,10 +794,10 @@ class TestRewriteAsAndOfOrUsingSimplificationAcrossAnd(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (k == 20) and ((i == 0 or j == 10) and (k != 30))
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (i == 0 or j == 10) and (k == 20)
 
 
@@ -815,10 +815,10 @@ class TestRewriteAsAndOfOrUsingSimplificationWithinOr(BaseBeforeAfter):
 
     convert_boolean_to_and_of_ors = True
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (i == 20) or (j == 0) or (i != 30)
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32, k: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32, k: T.int32):
         A[0] = (j == 0) or (i != 30)
 
 
@@ -842,11 +842,11 @@ class TestConditionalFloorMod(BaseBeforeAfter):
     `canonical_simplify`.
     """
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32):
         if T.floormod(0 - i, 2) == 0:
             A[0] = T.floormod(i, 2) == 0
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32):
         if T.floormod(i, -2) == 0:
             A[0] = True
 
@@ -861,10 +861,10 @@ class TestSimplifyRHSOfBooleanAndUsingLHS(BaseBeforeAfter):
 
     apply_constraints_to_boolean_branches = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 5 and n < 10
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 5
 
 
@@ -877,10 +877,10 @@ class TestSimplifyLHSOfBooleanAndUsingRHS(BaseBeforeAfter):
 
     apply_constraints_to_boolean_branches = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 10 and n < 5
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 5
 
 
@@ -894,10 +894,10 @@ class TestSimplifyRHSOfBooleanOrUsingLHS(BaseBeforeAfter):
 
     apply_constraints_to_boolean_branches = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 10 or n < 5
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 10
 
 
@@ -910,10 +910,10 @@ class TestSimplifyLHSOfBooleanOrUsingRHS(BaseBeforeAfter):
 
     apply_constraints_to_boolean_branches = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 5 or n < 10
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32):
         A[0] = n < 10
 
 
@@ -929,10 +929,10 @@ class TestSimplifyRHSOfBooleanAndUsingLHSWithoutConst(BaseBeforeAfter):
     apply_constraints_to_boolean_branches = True
     transitively_prove_inequalities = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 5 and n < m + 10
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 5
 
 
@@ -948,10 +948,10 @@ class TestSimplifyLHSOfBooleanAndUsingRHSWithoutConst(BaseBeforeAfter):
     apply_constraints_to_boolean_branches = True
     transitively_prove_inequalities = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 10 and n < m + 5
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 5
 
 
@@ -967,10 +967,10 @@ class TestSimplifyRHSOfBooleanOrUsingLHSWithoutConst(BaseBeforeAfter):
     apply_constraints_to_boolean_branches = True
     transitively_prove_inequalities = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 10 or n < m + 5
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 10
 
 
@@ -986,10 +986,10 @@ class TestSimplifyLHSOfBooleanOrUsingRHSWithoutConst(BaseBeforeAfter):
     apply_constraints_to_boolean_branches = True
     transitively_prove_inequalities = True
 
-    def before(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 5 or n < m + 10
 
-    def expected(A: T.Buffer[1, "bool"], n: T.int32, m: T.int32):
+    def expected(A: T.Buffer(1, "bool"), n: T.int32, m: T.int32):
         A[0] = n < m + 10
 
 
@@ -998,11 +998,11 @@ class TestProvableConditionWithOffset(BaseBeforeAfter):
 
     transitively_prove_inequalities = False
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32):
         if i < j:
             A[0] = i < j + 1
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32):
         if i < j:
             A[0] = True
 
@@ -1035,7 +1035,7 @@ def before(self, test_case):
         priors, expr_before, _ = test_case
 
         @T.prim_func
-        def func(A: T.Buffer[1, "bool"]):
+        def func(A: T.Buffer(1, "bool")):
             if priors:
                 A[0] = expr_before
 
@@ -1046,7 +1046,7 @@ def expected(self, test_case):
         priors, _, expr_after = test_case
 
         @T.prim_func
-        def func(A: T.Buffer[1, "bool"]):
+        def func(A: T.Buffer(1, "bool")):
             if priors:
                 A[0] = expr_after
 
@@ -1058,11 +1058,11 @@ class TestProvableConditionWithOffset(BaseBeforeAfter):
 
     transitively_prove_inequalities = False
 
-    def before(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32):
+    def before(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32):
         if i < j:
             A[0] = i < j + 1
 
-    def expected(A: T.Buffer[1, "bool"], i: T.int32, j: T.int32):
+    def expected(A: T.Buffer(1, "bool"), i: T.int32, j: T.int32):
         if i < j:
             A[0] = True
 
@@ -1078,7 +1078,7 @@ class TestAlteredBufferContents(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[(1,), "int32"], n: T.int32):
+    def before(A: T.Buffer((1,), "int32"), n: T.int32):
         if A[0] == n:
             A[0] = A[0] + 1
             # If the simplifier incorrectly uses the invalidated
@@ -1091,7 +1091,7 @@ def before(A: T.Buffer[(1,), "int32"], n: T.int32):
             else:
                 A[0] = 10
 
-    def expected(A: T.Buffer[(1,), "int32"], n: T.int32):
+    def expected(A: T.Buffer((1,), "int32"), n: T.int32):
         if A[0] == n:
             A[0] = A[0] + 1
             A[0] = 10
@@ -1108,7 +1108,7 @@ class TestPossiblyAlteredBufferContents(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[(1,), "int32"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer((1,), "int32"), n: T.int32, m: T.int32):
         if A[0] == n:
             if m == 0:
                 A[0] = A[0] + 1
@@ -1126,12 +1126,12 @@ class TestSimplifyInputAssumption(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"], n: T.int32):
+    def before(A: T.Buffer(1, "int32"), n: T.int32):
         T.evaluate(T.assume(n == 0))
         if n == 0:
             A[0] = 42
 
-    def expected(A: T.Buffer[1, "int32"], n: T.int32):
+    def expected(A: T.Buffer(1, "int32"), n: T.int32):
         T.evaluate(T.assume(n == 0))
         A[0] = 42
 
@@ -1141,12 +1141,12 @@ class TestSimplifyInputAssumption(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"], n: T.int32):
+    def before(A: T.Buffer(1, "int32"), n: T.int32):
         T.evaluate(T.assume(n == 0))
         if n == 0:
             A[0] = 42
 
-    def expected(A: T.Buffer[1, "int32"], n: T.int32):
+    def expected(A: T.Buffer(1, "int32"), n: T.int32):
         T.evaluate(T.assume(n == 0))
         A[0] = 42
 
@@ -1156,7 +1156,7 @@ class TestNoSimplifyFromScopedInputAssumption(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"], n: T.int32, m: T.int32):
+    def before(A: T.Buffer(1, "int32"), n: T.int32, m: T.int32):
         if m == 0:
             T.evaluate(T.assume(n == 0))
 
@@ -1171,13 +1171,13 @@ class TestSimplifyConditionalUsingBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = 0
 
         if A[0] == 0:
             A[0] = 42
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         A[0] = 0
         A[0] = 42
 
@@ -1192,7 +1192,7 @@ class TestKeepExpressionSimplifyUsingBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"], B: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32"), B: T.Buffer(1, "int32")):
         A[0] = 0
         B[0] = A[0]
 
@@ -1208,7 +1208,7 @@ class TestSimplifyConditionalInLoopUsingBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
@@ -1218,7 +1218,7 @@ def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
             else:
                 B[j] = 100
 
-    def expected(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             A[i] = i
 
@@ -1231,13 +1231,13 @@ class TestSimplifyUsingBufferAssumption(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 0))
 
         if A[0] == 0:
             A[0] = 42
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 0))
         A[0] = 42
 
@@ -1247,7 +1247,7 @@ class TestSimplifyUsingBufferAssumptionInLoop(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == i))
 
@@ -1255,7 +1255,7 @@ def before(A: T.Buffer[16, "int32"]):
             if A[i] < 100:
                 A[i] = 0
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == i))
 
@@ -1269,7 +1269,7 @@ class TestSimplifyUsingPartiallyKnownBufferConditional(BaseBeforeAfter):
     propagate_knowns_to_prove_conditional = True
     apply_constraints_to_boolean_branches = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if 14 <= i:
                 T.evaluate(T.assume(A[i] == 0))
@@ -1283,7 +1283,7 @@ def before(A: T.Buffer[16, "int32"]):
                 if A[i] == 0:
                     A[i] = 100
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if 14 <= i:
                 T.evaluate(T.assume(A[i] == 0))
@@ -1307,7 +1307,7 @@ class TestSimplifyUsingPartiallyKnownBufferExpression(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(i < 14 or A[i] == 0))
 
@@ -1316,7 +1316,7 @@ def before(A: T.Buffer[16, "int32"]):
                 if A[i] == 0:
                     A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(i < 14 or A[i] == 0))
 
@@ -1336,7 +1336,7 @@ class TestNoSimplificationIfPredicateNotMet(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if 14 <= i:
                 T.evaluate(T.assume(A[i] == 0))
@@ -1354,7 +1354,7 @@ class TestNoSimplifyUsingInvalidatedScopedConstraint(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             if i == 0:
                 A[i] = 0
@@ -1374,7 +1374,7 @@ class TestNoSimplifyUsingOverwrittenValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == 0))
 
@@ -1398,7 +1398,7 @@ class TestNoSimplifyUsingLoopDependentBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"], B: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(16, "int32"), B: T.Buffer(1, "int32")):
         B[0] = 0
         for i in T.serial(16):
             if B[0] < 10:
@@ -1421,7 +1421,7 @@ class TestSimplifyPriorToOverwrittenValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == 0))
 
@@ -1435,7 +1435,7 @@ def before(A: T.Buffer[16, "int32"]):
             if A[i] == 0:
                 A[i] = 42
 
-    def expected(A: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32")):
         for i in T.serial(16):
             T.evaluate(T.assume(A[i] == 0))
 
@@ -1459,7 +1459,7 @@ class TestSimplifyElementWiseUsingPreLoopBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def before(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             B[i] = 0
 
@@ -1469,7 +1469,7 @@ def before(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
             else:
                 B[i] = A[i] + B[i]
 
-    def expected(A: T.Buffer[16, "int32"], B: T.Buffer[16, "int32"]):
+    def expected(A: T.Buffer(16, "int32"), B: T.Buffer(16, "int32")):
         for i in T.serial(16):
             B[i] = 0
 
@@ -1482,11 +1482,11 @@ class TestSimplifyNonConditional(BaseBeforeAfter):
 
     propagate_knowns_to_simplify_expressions = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = 0
         A[0] = A[0] + 1
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         A[0] = 0
         A[0] = 1
 
@@ -1499,7 +1499,7 @@ class TestSuppressSimplifyNonConditional(BaseBeforeAfter):
 
     propagate_knowns_to_simplify_expressions = False
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = 0
         A[0] = A[0] + 1
 
@@ -1515,7 +1515,7 @@ class TestSimplifyUsingTransitiveKnownBufferValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 0))
 
         A[0] = A[0] + 1
@@ -1525,7 +1525,7 @@ def before(A: T.Buffer[1, "int32"]):
         if A[0] == 3:
             A[0] = 42
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 0))
 
         A[0] = A[0] + 1
@@ -1540,7 +1540,7 @@ class TestSimplifyRampIndexBroadcastValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[4, "int32"]):
+    def before(A: T.Buffer(4, "int32")):
         A[T.ramp(0, 1, 4)] = T.broadcast(0, 4)
 
         if A[0] == 0:
@@ -1549,7 +1549,7 @@ def before(A: T.Buffer[4, "int32"]):
         if A[1] == 0:
             A[1] = 60
 
-    def expected(A: T.Buffer[4, "int32"]):
+    def expected(A: T.Buffer(4, "int32")):
         A[T.ramp(0, 1, 4)] = T.broadcast(0, 4)
 
         A[0] = 42
@@ -1561,7 +1561,7 @@ class TestSimplifyRampIndexRampValue(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[4, "int32"]):
+    def before(A: T.Buffer(4, "int32")):
         A[T.ramp(0, 1, 4)] = T.ramp(11, 1, 4)
 
         if A[0] == 11:
@@ -1570,7 +1570,7 @@ def before(A: T.Buffer[4, "int32"]):
         if A[1] == 12:
             A[1] = 60
 
-    def expected(A: T.Buffer[4, "int32"]):
+    def expected(A: T.Buffer(4, "int32")):
         A[T.ramp(0, 1, 4)] = T.ramp(11, 1, 4)
 
         A[0] = 42
@@ -1589,7 +1589,7 @@ class TestSimplifyUsingPartiallyProvenBufferValueGather(BaseBeforeAfter):
     transitively_prove_inequalities = True
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "int32"]):
+    def before(A: T.Buffer(24, "int32"), B: T.Buffer(24, "int32"), F: T.Buffer(3, "int32")):
         # A has non-zero values only in the range 3 <= i < 17
         for i in T.serial(24):
             T.evaluate(T.assume(((3 <= i) and (i < 17)) or A[i] == 0))
@@ -1610,7 +1610,7 @@ def before(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "i
                 if B[i] != 0:
                     B[i] = 0
 
-    def expected(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "int32"]):
+    def expected(A: T.Buffer(24, "int32"), B: T.Buffer(24, "int32"), F: T.Buffer(3, "int32")):
         for i in T.serial(24):
             T.evaluate(T.assume(((3 <= i) and (i < 17)) or A[i] == 0))
 
@@ -1635,7 +1635,7 @@ class TestSimplifyUsingPartiallyProvenBufferValueScatter(BaseBeforeAfter):
 
     propagate_knowns_to_prove_conditional = True
 
-    def before(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "int32"]):
+    def before(A: T.Buffer(24, "int32"), B: T.Buffer(24, "int32"), F: T.Buffer(3, "int32")):
         # A has non-zero values only in the range 3 <= i < 17
         for i in T.serial(24):
             T.evaluate(T.assume(((3 <= i) and (i < 17)) or A[i] == 0))
@@ -1658,7 +1658,7 @@ def before(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "i
                 if B[i] != 0:
                     B[i] = 0
 
-    def expected(A: T.Buffer[24, "int32"], B: T.Buffer[24, "int32"], F: T.Buffer[3, "int32"]):
+    def expected(A: T.Buffer(24, "int32"), B: T.Buffer(24, "int32"), F: T.Buffer(3, "int32")):
         for i in T.serial(24):
             T.evaluate(T.assume(((3 <= i) and (i < 17)) or A[i] == 0))
 
@@ -1680,11 +1680,11 @@ class TestSimplifyBufferStore(BaseBeforeAfter):
 
     propagate_knowns_to_simplify_expressions = True
 
-    def before(A: T.Buffer[1, "int32"]):
+    def before(A: T.Buffer(1, "int32")):
         A[0] = 5
         A[0] = A[0] + 7
 
-    def expected(A: T.Buffer[1, "int32"]):
+    def expected(A: T.Buffer(1, "int32")):
         A[0] = 5
         A[0] = 12
 
diff --git a/tests/python/unittest/test_tir_transform_storage_flatten.py b/tests/python/unittest/test_tir_transform_storage_flatten.py
index 95e2eaed55fa0..539dc158dfd51 100644
--- a/tests/python/unittest/test_tir_transform_storage_flatten.py
+++ b/tests/python/unittest/test_tir_transform_storage_flatten.py
@@ -139,7 +139,7 @@ def main():
             T.func_attr({"from_legacy_te_schedule": True})
 
             # If a pointer defined using a LetStmt,
-            A_data: T.Ptr[T.int32] = T.call_extern("dummy_extern_function", dtype="handle")
+            A_data: T.Ptr(T.int32) = T.call_extern("dummy_extern_function", dtype="handle")
 
             # and a buffer is backed by that pointer,
             A = T.decl_buffer([1], dtype="float32", data=A_data)
diff --git a/tests/python/unittest/test_tir_transform_storage_rewrite.py b/tests/python/unittest/test_tir_transform_storage_rewrite.py
index 2ed2e6ec6d71d..cddeaacfc4f23 100644
--- a/tests/python/unittest/test_tir_transform_storage_rewrite.py
+++ b/tests/python/unittest/test_tir_transform_storage_rewrite.py
@@ -652,7 +652,7 @@ def verify(n):
 
 def test_access_in_let_value():
     @T.prim_func
-    def func(A: T.Buffer[(8,), "float32"]):
+    def func(A: T.Buffer((8,), "float32")):
         for i in range(8):
             B_data = T.allocate((1,), "float32", "global")
             B = T.Buffer(shape=[1], dtype="float32", data=B_data)
@@ -661,7 +661,7 @@ def func(A: T.Buffer[(8,), "float32"]):
             A[i] = (x + 1.0) / (x - 1.0)
 
     @T.prim_func
-    def func_rewritten(A: T.Buffer[(8,), "float32"]) -> None:
+    def func_rewritten(A: T.Buffer((8,), "float32")) -> None:
         B_data = T.allocate((1,), "float32", "global")
         B = T.Buffer(shape=[1], dtype="float32", data=B_data)
         for i in range(8):
@@ -689,12 +689,12 @@ class TestLetBufferRewrite(BaseCompare):
     """
 
     def before() -> None:
-        A_data: T.Ptr[T.int32] = T.call_extern("dummy_func", dtype="handle")
+        A_data: T.Ptr(T.int32) = T.call_extern("dummy_func", dtype="handle")
         A = T.Buffer([8], "int32", data=A_data)
         A[0:8] = T.broadcast(42, 8)
 
     def expected() -> None:
-        A_data: T.Ptr[T.int32x8] = T.call_extern("dummy_func", dtype="handle")
+        A_data: T.Ptr(T.int32x8) = T.call_extern("dummy_func", dtype="handle")
         A = T.Buffer([1], "int32x8", data=A_data)
         A[0] = T.broadcast(42, 8)
 
@@ -702,7 +702,7 @@ def expected() -> None:
 class TestRewriteInPlaceUseOfNonFlatBuffer(BaseCompare):
     """A non-flat buffer may be re-used for in-place operations"""
 
-    def before(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")):
         B_data = T.allocate(
             [16, 16],
             dtype="float32",
@@ -735,7 +735,7 @@ def before(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]):
         for i, j in T.grid(16, 16):
             D[i, j] = C[i, j]
 
-    def expected(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]):
+    def expected(A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")):
         B_data = T.allocate(
             [16, 16],
             dtype="float32",
@@ -771,7 +771,7 @@ class TestNoRewriteOfSharedNonFlatBuffer(BaseCompare):
     not have matching shapes.
     """
 
-    def before(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]):
+    def before(A: T.Buffer((16, 16), "float32"), D: T.Buffer((16, 16), "float32")):
         B_data = T.allocate(
             [16, 16],
             dtype="float32",
diff --git a/tests/python/unittest/test_tir_transform_thread_sync.py b/tests/python/unittest/test_tir_transform_thread_sync.py
index b7caf04d659c6..eb578a8817b57 100644
--- a/tests/python/unittest/test_tir_transform_thread_sync.py
+++ b/tests/python/unittest/test_tir_transform_thread_sync.py
@@ -98,7 +98,7 @@ def ir(A, B):
 @tvm.testing.requires_cuda
 def test_sync_read_thread_id_independent_location():
     @T.prim_func
-    def func(p0_arg: T.Buffer[(1, 2, 1, 1), "float32"], p1: T.Buffer[2, "float32"]) -> None:
+    def func(p0_arg: T.Buffer((1, 2, 1, 1), "float32"), p1: T.Buffer(2, "float32")) -> None:
         threadIdx_x = T.env_thread("threadIdx.x")
         blockIdx_x = T.env_thread("blockIdx.x")
         p0 = T.Buffer([2], dtype="float32", data=p0_arg.data)
diff --git a/tests/python/unittest/test_tir_transform_unify_thread_binding.py b/tests/python/unittest/test_tir_transform_unify_thread_binding.py
index 90fce22bc14fa..e489298741cc5 100644
--- a/tests/python/unittest/test_tir_transform_unify_thread_binding.py
+++ b/tests/python/unittest/test_tir_transform_unify_thread_binding.py
@@ -74,9 +74,9 @@ def unified_element_wise_thread_x(a: T.handle, b: T.handle, c: T.handle) -> None
 
 @T.prim_func
 def element_wise_thread_x_different_dtype(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(128, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((128, 128), "float32"),
 ) -> None:
     for i in T.thread_binding(128, "blockIdx.x"):
         for j0_0 in T.thread_binding(4, "threadIdx.x"):
@@ -91,9 +91,9 @@ def element_wise_thread_x_different_dtype(
 
 @T.prim_func
 def unified_element_wise_thread_x_different_dtype(
-    A: T.Buffer[(128, 128), "float32"],
-    B: T.Buffer[(128, 128), "float32"],
-    C: T.Buffer[(128, 128), "float32"],
+    A: T.Buffer((128, 128), "float32"),
+    B: T.Buffer((128, 128), "float32"),
+    C: T.Buffer((128, 128), "float32"),
 ) -> None:
     for blockIdx_x in T.thread_binding(128, "blockIdx.x"):
         for threadIdx_x in T.thread_binding(4, "threadIdx.x"):
diff --git a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
index 6145c39b876db..5d55c8ba8d3c0 100644
--- a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
+++ b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
@@ -144,20 +144,20 @@ def __tvm_main__(input: T.handle, output: T.handle) -> None:
 @tvm.script.ir_module
 class LinearStructurePlanned:
     @T.prim_func
-    def __tvm_main__(input: T.handle, fast_memory_0_var: T.Ptr[T.uint8], slow_memory_1_var: T.Ptr[T.uint8], output: T.handle) -> None:
+    def __tvm_main__(input: T.handle, fast_memory_0_var: T.Ptr(T.uint8), slow_memory_1_var: T.Ptr(T.uint8), output: T.handle) -> None:
         fast_memory_0_buffer_var = T.match_buffer(fast_memory_0_var, [200704], dtype="uint8", strides=[1], elem_offset=0, align=16)
         slow_memory_1_buffer_var = T.match_buffer(slow_memory_1_var, [1418528], dtype="uint8", strides=[1], elem_offset=0, align=16)
         # body
         T.attr("default", "device_id", 0)
         T.attr("default", "device_type", 1)
-        sid_9_let: T.Ptr[T.int8] = T.address_of(slow_memory_1_buffer_var[1117472], dtype="handle")
-        sid_8_let: T.Ptr[T.int8] = T.address_of(slow_memory_1_buffer_var[0], dtype="handle")
+        sid_9_let: T.Ptr(T.int8) = T.address_of(slow_memory_1_buffer_var[1117472), dtype="handle")
+        sid_8_let: T.Ptr(T.int8) = T.address_of(slow_memory_1_buffer_var[0), dtype="handle")
         T.evaluate(T.call_extern("tvmgen_default_fused_cast_subtract", input, T.lookup_param("p0", dtype="handle"), sid_9_let, fast_memory_0_buffer_var.data, slow_memory_1_buffer_var.data, dtype="int32"))
         T.evaluate(T.call_extern("tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast", sid_9_let, T.lookup_param("p1", dtype="handle"), T.lookup_param("p2", dtype="handle"), sid_8_let, fast_memory_0_buffer_var.data, slow_memory_1_buffer_var.data, dtype="int32"))
         T.evaluate(T.call_extern("tvmgen_default_fused_nn_max_pool2d_cast", sid_8_let, output, fast_memory_0_buffer_var.data, slow_memory_1_buffer_var.data, dtype="int32"))
 
     @T.prim_func
-    def tvmgen_default_fused_nn_max_pool2d_cast(placeholder_28: T.handle, T_cast_6: T.handle, fast_memory_6_var: T.Ptr[T.uint8], slow_memory_7_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_max_pool2d_cast(placeholder_28: T.handle, T_cast_6: T.handle, fast_memory_6_var: T.Ptr(T.uint8), slow_memory_7_var: T.Ptr(T.uint8)) -> None:
         placeholder_29 = T.match_buffer(placeholder_28, [802816], dtype="uint8")
         T_cast_7 = T.match_buffer(T_cast_6, [177], dtype="int16")
         fast_memory_6_buffer_var = T.match_buffer(fast_memory_6_var, [200704], dtype="uint8", strides=[1], elem_offset=0, align=16)
@@ -174,7 +174,7 @@ def tvmgen_default_fused_nn_max_pool2d_cast(placeholder_28: T.handle, T_cast_6:
                 T_cast_7[ax0_ax1_fused_5 * 3584 + ax2_5 * 64 + ax3_3] = T.cast(tensor_2_let[ax0_ax1_fused_5 * 3584 + ax2_5 * 64 + ax3_3], "int16")
 
     @T.prim_func
-    def tvmgen_default_fused_cast_subtract(placeholder_2: T.handle, placeholder_3: T.handle, T_subtract: T.handle, fast_memory_2_var: T.Ptr[T.uint8], slow_memory_3_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_cast_subtract(placeholder_2: T.handle, placeholder_3: T.handle, T_subtract: T.handle, fast_memory_2_var: T.Ptr(T.uint8), slow_memory_3_var: T.Ptr(T.uint8)) -> None:
         placeholder_4 = T.match_buffer(placeholder_2, [150528], dtype="uint8")
         placeholder_5 = T.match_buffer(placeholder_3, [1], dtype="int16")
         T_subtract_1 = T.match_buffer(T_subtract, [452], dtype="int16")
@@ -185,7 +185,7 @@ def tvmgen_default_fused_cast_subtract(placeholder_2: T.handle, placeholder_3: T
             T_subtract_1[ax0_ax1_fused_1 * 672 + ax2_1 * 3 + ax3_inner_1] = T.cast(placeholder_4[ax0_ax1_fused_1 * 672 + ax2_1 * 3 + ax3_inner_1], "int16") - placeholder_5[0]
 
     @T.prim_func
-    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast(placeholder_62: T.handle, placeholder_63: T.handle, placeholder_64: T.handle, T_cast_20: T.handle, fast_memory_4_var: T.Ptr[T.uint8], slow_memory_5_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast(placeholder_62: T.handle, placeholder_63: T.handle, placeholder_64: T.handle, T_cast_20: T.handle, fast_memory_4_var: T.Ptr(T.uint8), slow_memory_5_var: T.Ptr(T.uint8)) -> None:
         placeholder_65 = T.match_buffer(placeholder_62, [150528], dtype="int16")
         placeholder_66 = T.match_buffer(placeholder_63, [9408], dtype="int16")
         placeholder_67 = T.match_buffer(placeholder_64, [64], dtype="int32")
@@ -380,7 +380,7 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(place
 @tvm.script.ir_module
 class ResnetStructurePlanned:
     @T.prim_func
-    def tvmgen_default_fused_cast_subtract_fixed_point_multiply_add_clip_cast_cast(placeholder: T.handle, placeholder_1: T.handle, T_cast: T.handle, global_workspace_1_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_cast_subtract_fixed_point_multiply_add_clip_cast_cast(placeholder: T.handle, placeholder_1: T.handle, T_cast: T.handle, global_workspace_1_var: T.Ptr(T.uint8)) -> None:
         placeholder_2 = T.match_buffer(placeholder, [360000], dtype="uint8")
         placeholder_3 = T.match_buffer(placeholder_1, [64], dtype="int32")
         T_cast_1 = T.match_buffer(T_cast, [215], dtype="int16")
@@ -390,7 +390,7 @@ def tvmgen_default_fused_cast_subtract_fixed_point_multiply_add_clip_cast_cast(p
             T_cast_1[ax0_ax1_fused * 4800 + ax2 * 64 + ax3_outer * 16 + ax3_inner] = T.cast(T.cast(T.max(T.min(T.q_multiply_shift(T.cast(placeholder_2[ax0_ax1_fused * 4800 + ax2 * 64 + ax3_outer * 16 + ax3_inner], "int32") - 94, 1843157232, 31, 1, dtype="int32") + placeholder_3[ax3_outer * 16 + ax3_inner], 255), 0), "uint8"), "int16")
 
     @T.prim_func
-    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_subtract_fixed_point_4200876283395191415_(placeholder_22: T.handle, placeholder_23: T.handle, placeholder_24: T.handle, placeholder_25: T.handle, T_cast_6: T.handle, global_workspace_5_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_subtract_fixed_point_4200876283395191415_(placeholder_22: T.handle, placeholder_23: T.handle, placeholder_24: T.handle, placeholder_25: T.handle, T_cast_6: T.handle, global_workspace_5_var: T.Ptr(T.uint8)) -> None:
         placeholder_29 = T.match_buffer(placeholder_22, [360000], dtype="int16")
         placeholder_27 = T.match_buffer(placeholder_23, [16384], dtype="int16")
         placeholder_26 = T.match_buffer(placeholder_24, [256], dtype="int32")
@@ -414,7 +414,7 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_s
                             T_cast_7[ax0_ax1_fused_ax2_fused_3 * 256 + ax3_outer_2 * 64 + ax3_inner_4] = T.cast(T.max(T.min(T.q_multiply_shift(T.cast(T.cast(T.max(T.min(T.q_multiply_shift(Conv2dOutput_3_let[ax3_inner_4] + placeholder_26[ax3_outer_2 * 64 + ax3_inner_4], 1343014664, 31, -8, dtype="int32") + 136, 255), 0), "uint8"), "int32") - 136, 1073903788, 31, 1, dtype="int32") + placeholder_28[ax0_ax1_fused_ax2_fused_3 * 256 + ax3_outer_2 * 64 + ax3_inner_4], 255), 0), "uint8")
 
     @T.prim_func
-    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_subtract_fixed_point_15934180698220515269_(placeholder_16: T.handle, placeholder_17: T.handle, placeholder_18: T.handle, T_add: T.handle, global_workspace_4_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_subtract_fixed_point_15934180698220515269_(placeholder_16: T.handle, placeholder_17: T.handle, placeholder_18: T.handle, T_add: T.handle, global_workspace_4_var: T.Ptr(T.uint8)) -> None:
         placeholder_19 = T.match_buffer(placeholder_16, [360000], dtype="int16")
         placeholder_20 = T.match_buffer(placeholder_17, [16384], dtype="int16")
         placeholder_21 = T.match_buffer(placeholder_18, [256], dtype="int32")
@@ -437,7 +437,7 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_s
                             T_add_1[ax0_ax1_fused_ax2_fused_2 * 256 + ax3_outer_1 * 64 + ax3_inner_3] = T.q_multiply_shift(T.cast(T.cast(T.max(T.min(T.q_multiply_shift(Conv2dOutput_2_let[ax3_inner_3] + placeholder_21[ax3_outer_1 * 64 + ax3_inner_3], 1711626602, 31, -8, dtype="int32") + 132, 255), 0), "uint8"), "int32") - 132, 2094289803, 31, -2, dtype="int32") + 136
 
     @T.prim_func
-    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(placeholder_4: T.handle, placeholder_5: T.handle, placeholder_6: T.handle, T_cast_2: T.handle, global_workspace_2_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(placeholder_4: T.handle, placeholder_5: T.handle, placeholder_6: T.handle, T_cast_2: T.handle, global_workspace_2_var: T.Ptr(T.uint8)) -> None:
         placeholder_7 = T.match_buffer(placeholder_4, [360000], dtype="int16")
         placeholder_8 = T.match_buffer(placeholder_5, [4096], dtype="int16")
         placeholder_9 = T.match_buffer(placeholder_6, [64], dtype="int32")
@@ -459,7 +459,7 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(place
                         T_cast_3[ax0_ax1_fused_ax2_fused * 64 + ax3_inner_1] = T.cast(T.cast(T.max(T.min(T.q_multiply_shift(Conv2dOutput_let[ax3_inner_1] + placeholder_9[ax3_inner_1], 1843106743, 31, -6, dtype="int32"), 255), 0), "uint8"), "int16")
 
     @T.prim_func
-    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_1(placeholder_10: T.handle, placeholder_11: T.handle, placeholder_12: T.handle, T_cast_4: T.handle, global_workspace_3_var: T.Ptr[T.uint8]) -> None:
+    def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_1(placeholder_10: T.handle, placeholder_11: T.handle, placeholder_12: T.handle, T_cast_4: T.handle, global_workspace_3_var: T.Ptr(T.uint8)) -> None:
         placeholder_13 = T.match_buffer(placeholder_10, [360000], dtype="int16")
         placeholder_14 = T.match_buffer(placeholder_11, [36864], dtype="int16")
         placeholder_15 = T.match_buffer(placeholder_12, [64], dtype="int32")
@@ -481,15 +481,15 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_1(pla
                         T_cast_5[ax0_ax1_fused_ax2_fused_1 * 64 + ax3_inner_2] = T.cast(T.cast(T.max(T.min(T.q_multiply_shift(Conv2dOutput_1_let[ax3_inner_2] + placeholder_15[ax3_inner_2], 1608879842, 31, -7, dtype="int32"), 255), 0), "uint8"), "int16")
 
     @T.prim_func
-    def __tvm_main__(input: T.handle, global_workspace_0_var: T.Ptr[T.uint8], output: T.handle) -> None:
+    def __tvm_main__(input: T.handle, global_workspace_0_var: T.Ptr(T.uint8), output: T.handle) -> None:
         global_workspace_0_buffer_var = T.match_buffer(global_workspace_0_var, [7920256], dtype="uint8", strides=[1], elem_offset=0, align=16)
         # body
         T.attr("default", "device_id", 0)
         T.attr("default", "device_type", 1)
-        sid_2_let: T.Ptr[T.int8] = T.address_of(global_workspace_0_buffer_var[5760000], dtype="handle")
-        sid_6_let: T.Ptr[T.int8] = T.address_of(global_workspace_0_buffer_var[0], dtype="handle")
-        sid_7_let: T.Ptr[T.int8] = T.address_of(global_workspace_0_buffer_var[6480000], dtype="handle")
-        sid_8_let: T.Ptr[T.int8] = T.address_of(global_workspace_0_buffer_var[6480000], dtype="handle")
+        sid_2_let: T.Ptr(T.int8) = T.address_of(global_workspace_0_buffer_var[5760000), dtype="handle")
+        sid_6_let: T.Ptr(T.int8) = T.address_of(global_workspace_0_buffer_var[0), dtype="handle")
+        sid_7_let: T.Ptr(T.int8) = T.address_of(global_workspace_0_buffer_var[6480000), dtype="handle")
+        sid_8_let: T.Ptr(T.int8) = T.address_of(global_workspace_0_buffer_var[6480000), dtype="handle")
         T.evaluate(T.call_extern("tvmgen_default_fused_cast_subtract_fixed_point_multiply_add_clip_cast_cast", input, T.lookup_param("p0", dtype="handle"), sid_2_let, global_workspace_0_buffer_var.data, dtype="int32"))
         T.evaluate(T.call_extern("tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast", sid_2_let, T.lookup_param("p3", dtype="handle"), T.lookup_param("p4", dtype="handle"), sid_8_let, global_workspace_0_buffer_var.data, dtype="int32"))
         T.evaluate(T.call_extern("tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_1", sid_8_let, T.lookup_param("p5", dtype="handle"), T.lookup_param("p6", dtype="handle"), sid_7_let, global_workspace_0_buffer_var.data, dtype="int32"))
@@ -557,7 +557,7 @@ def __tvm_main__(input: T.handle, output: T.handle) -> None:
 @tvm.script.ir_module
 class TensorIntrinStructurePlanned:
     @T.prim_func
-    def tensor_intrin_primfunc(global_workspace_1_var: T.Ptr[T.uint8]) -> None:
+    def tensor_intrin_primfunc(global_workspace_1_var: T.Ptr(T.uint8)) -> None:
         global_workspace_1_buffer_var = T.match_buffer(
             global_workspace_1_var, [40], dtype="uint8", strides=[1], elem_offset=0, align=16
         )
@@ -576,7 +576,7 @@ def tensor_intrin_primfunc(global_workspace_1_var: T.Ptr[T.uint8]) -> None:
 
     @T.prim_func
     def __tvm_main__(
-        input: T.handle, global_workspace_1_var: T.Ptr[T.uint8], output: T.handle
+        input: T.handle, global_workspace_1_var: T.Ptr(T.uint8), output: T.handle
     ) -> None:
         global_workspace_1_buffer_var = T.match_buffer(
             global_workspace_1_var, [40], dtype="uint8", strides=[1], elem_offset=0, align=16
diff --git a/tests/python/unittest/test_tvm_testing_before_after.py b/tests/python/unittest/test_tvm_testing_before_after.py
index 946493922ed58..4fcca7957b15b 100644
--- a/tests/python/unittest/test_tvm_testing_before_after.py
+++ b/tests/python/unittest/test_tvm_testing_before_after.py
@@ -70,7 +70,7 @@ class TestBeforeAfterParametrizedFixture(BaseBeforeAfter):
     @tvm.testing.fixture
     def before(self, n):
         @T.prim_func
-        def func(A: T.Buffer[n, "float32"]):
+        def func(A: T.Buffer(n, "float32")):
             for i in T.serial(n):
                 A[i] = 0.0
 
@@ -89,11 +89,11 @@ class TestBeforeAfterIRModule(BaseBeforeAfter):
     """
 
     class before:
-        def func_A(A: T.Buffer[16, "float32"]):
+        def func_A(A: T.Buffer(16, "float32")):
             for i in T.serial(16):
                 A[i] = 0.0
 
-        def func_B(A: T.Buffer[16, "int32"]):
+        def func_B(A: T.Buffer(16, "int32")):
             for i in T.serial(16):
                 A[i] = 42
 
@@ -112,12 +112,12 @@ def before(self):
         @ir_module
         class mod:
             @T.prim_func
-            def func_A(A: T.Buffer[16, "float32"]):
+            def func_A(A: T.Buffer(16, "float32")):
                 for i in T.serial(16):
                     A[i] = 0.0
 
             @T.prim_func
-            def func_B(A: T.Buffer[16, "int32"]):
+            def func_B(A: T.Buffer(16, "int32")):
                 for i in T.serial(16):
                     A[i] = 42
 
diff --git a/tests/python/unittest/test_tvmscript_ops.py b/tests/python/unittest/test_tvmscript_ops.py
index e10681338727f..8eba301fe7197 100644
--- a/tests/python/unittest/test_tvmscript_ops.py
+++ b/tests/python/unittest/test_tvmscript_ops.py
@@ -163,7 +163,7 @@ def test_alloc_zero_dim_buffer_round_trip():
 
 
 @T.prim_func
-def ceildiv_test(A: T.Buffer[16, "int32"]):
+def ceildiv_test(A: T.Buffer(16, "int32")):
     for i in range(16):
         A[i] = T.ceildiv(A[i], 4)
 
diff --git a/tests/python/unittest/test_tvmscript_parser_source.py b/tests/python/unittest/test_tvmscript_parser_source.py
index 359583c1aa06f..416bfd719f5cc 100644
--- a/tests/python/unittest/test_tvmscript_parser_source.py
+++ b/tests/python/unittest/test_tvmscript_parser_source.py
@@ -92,7 +92,7 @@ class dummy:
         class Module:
             @T.prim_func
             def impl(
-                A: T.Buffer[(12, 196, 64), "float32"],
+                A: T.Buffer((12, 196, 64), "float32"),
             ) -> None:
                 T.evaluate(0)
 
diff --git a/tests/python/unittest/test_tvmscript_parser_tir.py b/tests/python/unittest/test_tvmscript_parser_tir.py
index e3f87928acf8d..e96ae4da8c2e4 100644
--- a/tests/python/unittest/test_tvmscript_parser_tir.py
+++ b/tests/python/unittest/test_tvmscript_parser_tir.py
@@ -31,7 +31,7 @@ def test_tir_buffer_proxy():
         and buffer_0.dtype == "float32"
     )
 
-    buffer_1 = T.Buffer[(64, 64, 64), "int32"]
+    buffer_1 = T.Buffer((64, 64, 64), "int32")
     assert (
         isinstance(buffer_1, tir.Buffer)
         and list(buffer_1.shape) == [64, 64, 64]
@@ -49,7 +49,7 @@ def test_tir_ptr_proxy():
         and ptr_0.type_annotation.storage_scope == "global"
     )
 
-    ptr_1 = T.Ptr["float32", "shared"]
+    ptr_1 = T.Ptr("float32", "shared")
     assert (
         isinstance(ptr_1, tir.Var)
         and ptr_1.dtype == "handle"
diff --git a/tests/python/unittest/test_tvmscript_regression.py b/tests/python/unittest/test_tvmscript_regression.py
index 6678c10acd7a8..c4ca23b3f037c 100644
--- a/tests/python/unittest/test_tvmscript_regression.py
+++ b/tests/python/unittest/test_tvmscript_regression.py
@@ -74,7 +74,7 @@ def func_ref():
 
 def test_tir_buffer_region_extent_correct_dtype():
     @T.prim_func
-    def func(A: T.Buffer[(T.int64(16), T.int64(1)), "float32"]):
+    def func(A: T.Buffer((T.int64(16), T.int64(1)), "float32")):
         for i in T.grid(T.int64(16)):
             with T.block("block"):
                 vi = T.axis.remap("S", [i])
diff --git a/tests/python/unittest/test_tvmscript_roundtrip.py b/tests/python/unittest/test_tvmscript_roundtrip.py
index f52b488fef6b8..c598d3a73d980 100644
--- a/tests/python/unittest/test_tvmscript_roundtrip.py
+++ b/tests/python/unittest/test_tvmscript_roundtrip.py
@@ -208,30 +208,30 @@ def mmult(
             arg2: T.handle = T.tvm_struct_get(args, 2, 12, dtype="handle")
             arg2_code: T.int32 = buf_type_ids[2]
 
-            A_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
+            A_data: T.Ptr(T.int32) = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
             T.attr(A_data, "storage_alignment", 128)
             A = T.Buffer([1024 * 1024], dtype="int32", data=A_data)
-            buf0_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 2, dtype="handle")
+            buf0_shape_data: T.Ptr(T.int32) = T.tvm_struct_get(arg0, 0, 2, dtype="handle")
             buf0_shape = T.Buffer([2], dtype="int32", data=buf0_shape_data)
-            buf0_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 3, dtype="handle")
+            buf0_strides_data: T.Ptr(T.int32) = T.tvm_struct_get(arg0, 0, 3, dtype="handle")
             buf0_strides = T.Buffer([2], dtype="int32", data=buf0_strides_data)
 
             dev_id: T.int32 = T.tvm_struct_get(arg0, 0, 9, dtype="int32")
 
-            B_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
+            B_data: T.Ptr(T.int32) = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
             T.attr(B_data, "storage_alignment", 128)
             B = T.Buffer([1024 * 1024], dtype="int32", data=B_data)
-            buf1_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 2, dtype="handle")
+            buf1_shape_data: T.Ptr(T.int32) = T.tvm_struct_get(arg1, 0, 2, dtype="handle")
             buf1_shape = T.Buffer([2], dtype="int32", data=buf1_shape_data)
-            buf1_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 3, dtype="handle")
+            buf1_strides_data: T.Ptr(T.int32) = T.tvm_struct_get(arg1, 0, 3, dtype="handle")
             buf1_strides = T.Buffer([2], dtype="int32", data=buf1_strides_data)
 
-            C_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 1, dtype="handle")
+            C_data: T.Ptr(T.int32) = T.tvm_struct_get(arg2, 0, 1, dtype="handle")
             T.attr(C_data, "storage_alignment", 128)
             C = T.Buffer([1024 * 1024], dtype="int32", data=C_data)
-            buf2_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 2, dtype="handle")
+            buf2_shape_data: T.Ptr(T.int32) = T.tvm_struct_get(arg2, 0, 2, dtype="handle")
             buf2_shape = T.Buffer([2], dtype="int32", data=buf2_shape_data)
-            buf2_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 3, dtype="handle")
+            buf2_strides_data: T.Ptr(T.int32) = T.tvm_struct_get(arg2, 0, 3, dtype="handle")
             buf2_strides = T.Buffer([2], dtype="int32", data=buf2_strides_data)
 
             assert (((arg0_code == 3) or (arg0_code == 13)) or (arg0_code == 7)) or (
@@ -932,9 +932,9 @@ def func(A: T.handle, W: T.handle, Conv: T.handle) -> None:
 def opt_conv_tensorcore_lower():
     @T.prim_func
     def func(
-        A: T.Buffer[(16, 14, 14, 16, 16, 16), "float16"],
-        W: T.Buffer[(3, 3, 16, 32, 16, 16), "float16"],
-        Conv: T.Buffer[(16, 14, 14, 32, 16, 16), "float32"],
+        A: T.Buffer((16, 14, 14, 16, 16, 16), "float16"),
+        W: T.Buffer((3, 3, 16, 32, 16, 16), "float16"),
+        Conv: T.Buffer((16, 14, 14, 32, 16, 16), "float32"),
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "default_function", "tir.noalias": True})
@@ -2226,7 +2226,7 @@ def opt_conv_tensorcore_mod_host():
     @T.prim_func
     def opt_conv_tensorcore_mod_host(
         args: T.handle,
-        arg_type_ids: T.Buffer[(3,), "int32"],
+        arg_type_ids: T.Buffer((3,), "int32"),
         num_args: T.int32,
         out_ret_value: T.handle,
         out_ret_tcode: T.handle,
@@ -2242,7 +2242,7 @@ def opt_conv_tensorcore_mod_host(
             }
         )
         # body
-        stack_tcode_data: T.Ptr[T.int32] = T.tvm_stack_alloca("arg_tcode", 10, dtype="handle")
+        stack_tcode_data: T.Ptr(T.int32) = T.tvm_stack_alloca("arg_tcode", 10, dtype="handle")
         stack_tcode = T.Buffer([9], "int32", data=stack_tcode_data)
         stack_value: T.handle = T.tvm_stack_alloca("arg_value", 10, dtype="handle")
         assert num_args == 3, "default_function: num_args should be 3"
@@ -2255,25 +2255,25 @@ def opt_conv_tensorcore_mod_host(
 
         A: T.handle = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
         T.attr(A, "storage_alignment", 128)
-        arg0_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg0, 0, 2, dtype="handle")
+        arg0_shape_data: T.Ptr(T.int64) = T.tvm_struct_get(arg0, 0, 2, dtype="handle")
         arg0_shape = T.Buffer([6], "int64", data=arg0_shape_data)
-        arg0_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg0, 0, 3, dtype="handle")
+        arg0_strides_data: T.Ptr(T.int64) = T.tvm_struct_get(arg0, 0, 3, dtype="handle")
         arg0_strides = T.Buffer([6], "int64", data=arg0_strides_data)
 
         dev_id: T.int32 = T.tvm_struct_get(arg0, 0, 9, dtype="int32")
 
         W: T.handle = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
         T.attr(W, "storage_alignment", 128)
-        arg1_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg1, 0, 2, dtype="handle")
+        arg1_shape_data: T.Ptr(T.int64) = T.tvm_struct_get(arg1, 0, 2, dtype="handle")
         arg1_shape = T.Buffer([6], "int64", data=arg1_shape_data)
-        arg1_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg1, 0, 3, dtype="handle")
+        arg1_strides_data: T.Ptr(T.int64) = T.tvm_struct_get(arg1, 0, 3, dtype="handle")
         arg1_strides = T.Buffer([6], "int64", data=arg1_strides_data)
 
         Conv: T.handle = T.tvm_struct_get(arg2, 0, 1, dtype="handle")
         T.attr(Conv, "storage_alignment", 128)
-        arg2_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg2, 0, 2, dtype="handle")
+        arg2_shape_data: T.Ptr(T.int64) = T.tvm_struct_get(arg2, 0, 2, dtype="handle")
         arg2_shape = T.Buffer([6], "int64", data=arg2_shape_data)
-        arg2_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg2, 0, 3, dtype="handle")
+        arg2_strides_data: T.Ptr(T.int64) = T.tvm_struct_get(arg2, 0, 3, dtype="handle")
         arg2_strides = T.Buffer([6], "int64", data=arg2_strides_data)
 
         assert (((arg0_code == 3) or (arg0_code == 13)) or (arg0_code == 7)) or (
@@ -3129,7 +3129,7 @@ def func_root_attr():
 
 def func_trivial_root_block():
     @T.prim_func
-    def func(A: T.Buffer[1, "int32"]):
+    def func(A: T.Buffer(1, "int32")):
         with T.block("root"):
             A[0] = 0
 
@@ -3138,7 +3138,7 @@ def func(A: T.Buffer[1, "int32"]):
 
 def func_nested_root_block():
     @T.prim_func
-    def func(A: T.Buffer[1, "int32"]):
+    def func(A: T.Buffer(1, "int32")):
         with T.block("root"):
             with T.block("block"):
                 A[0] = 0
@@ -3149,7 +3149,7 @@ def func(A: T.Buffer[1, "int32"]):
 def func_T_ptr_let_statement():
     @T.prim_func
     def func_T_ptr_let_statement(
-        args: T.handle, arg_type_ids_handle: T.Ptr[T.int32], num_args: T.int32
+        args: T.handle, arg_type_ids_handle: T.Ptr(T.int32), num_args: T.int32
     ) -> None:
         # The T.Ptr declaration in the parameter list should parse
         # correctly, and should be usable as the data pointer in a buffer.
@@ -3161,14 +3161,14 @@ def func_T_ptr_let_statement(
         # Functions that return a "handle" can be assigned to a T.Ptr
         # variable.  A variable annotated with T.Ptr still has dtype of
         # T.handle, but has type annotation as a pointer type.
-        A_data: T.Ptr[T.float32] = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
+        A_data: T.Ptr(T.float32) = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
 
         # The buffer declaration has a data pointer defined earlier in
         # this function.  It should only be defined after the data pointer
         # has been defined, and should not be hoisted into the header of
         # the function as other buffer_decl statements can be.
         A = T.Buffer([1024], dtype="float32", data=A_data)
-        B_data: T.Ptr[T.float32] = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
+        B_data: T.Ptr(T.float32) = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
         B = T.Buffer([1024], dtype="float32", data=B_data)
 
         B[0] = A[0]
@@ -3188,7 +3188,7 @@ def func_T_ptr_allocate() -> None:
 
 def llvm_intrin_call():
     @T.prim_func
-    def ctpop(A: T.Buffer[(16,), "uint8"], B: T.Buffer[(16,), "uint8"]) -> None:
+    def ctpop(A: T.Buffer((16,), "uint8"), B: T.Buffer((16,), "uint8")) -> None:
         for i in range(0, 16):
             with T.block("A"):
                 vi = T.axis.remap(
@@ -3270,13 +3270,13 @@ def string_annotation_of_special_chars():
 
 def pointer_type():
     @T.prim_func
-    def func_with_ptr_type_annotations(x: T.Ptr[T.int32], y: T.Ptr[T.int32, "shared"]):
+    def func_with_ptr_type_annotations(x: T.Ptr(T.int32), y: T.Ptr(T.int32, "shared")):
         xx_data = T.allocate([16], "int32", "global")
         xx = T.Buffer(shape=[16], dtype="int32", scope="global", data=xx_data)
         yy_data = T.allocate([16], "int32", "shared")
         yy = T.Buffer(shape=[16], dtype="int32", scope="shared", data=yy_data)
-        a: T.Ptr[T.int32] = T.address_of(xx[0], dtype="handle")
-        b: T.Ptr[T.int32, "shared"] = T.address_of(yy[0], dtype="handle")
+        a: T.Ptr(T.int32) = T.address_of(xx[0), dtype="handle")
+        b: T.Ptr(T.int32, "shared") = T.address_of(yy[0), dtype="handle")
         T.evaluate(T.call_extern("copy", a, b, dtype=""))
 
     return func_with_ptr_type_annotations
@@ -3328,7 +3328,7 @@ def func():
 
 def void_ptr():
     @T.prim_func
-    def func(out_ret_value: T.Ptr[T.void]):
+    def func(out_ret_value: T.Ptr(T.void)):
         T.evaluate(out_ret_value)
 
     return func
@@ -3336,7 +3336,7 @@ def func(out_ret_value: T.Ptr[T.void]):
 
 def decl_buffer():
     @T.prim_func
-    def func(A: T.Buffer[(16, 16), "float32"], B: T.Buffer[(16, 16), "float32"]) -> None:
+    def func(A: T.Buffer((16, 16), "float32"), B: T.Buffer((16, 16), "float32")) -> None:
         A_flattened = T.decl_buffer(data=A.data, shape=(256,), dtype="float32")
         B_flattened = T.decl_buffer(data=B.data, shape=(256,), dtype="float32")
         C_alias = T.decl_buffer(data=A_flattened.data, shape=(256,), dtype="float32")
@@ -3348,7 +3348,7 @@ def func(A: T.Buffer[(16, 16), "float32"], B: T.Buffer[(16, 16), "float32"]) ->
 
 def allocate_and_decl_buffer():
     @T.prim_func
-    def func(A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"]) -> None:
+    def func(A: T.Buffer((16,), "float32"), B: T.Buffer((16,), "float32")) -> None:
         D_data = T.allocate((16,), "float32", "global")
         D = T.decl_buffer((16,), "float32", data=D_data)
         for i in range(4):
@@ -3367,7 +3367,7 @@ def func(A: T.Buffer[(16,), "float32"], B: T.Buffer[(16,), "float32"]) -> None:
 def float_infinity():
     @T.prim_func
     def func(
-        placeholder: T.Buffer[(1, 512, 768), "float32"], T_isinf: T.Buffer[(1, 512, 768), "bool"]
+        placeholder: T.Buffer((1, 512, 768), "float32"), T_isinf: T.Buffer((1, 512, 768), "bool")
     ) -> None:
         # function attr dict
         T.func_attr({"global_symbol": "main", "tir.noalias": True})
@@ -3445,7 +3445,7 @@ def func() -> None:
 
 def implicit_evaluate():
     @T.prim_func
-    def func(A: T.Buffer[1, "int32"]):
+    def func(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 5))
         A[0] = 10
 
@@ -3508,7 +3508,7 @@ def nested_boolean_expressions():
     def make_ir_generator(name, expression):
         def inner():
             @T.prim_func
-            def func(A: T.Buffer[1, "bool"], i: T.bool, j: T.bool, k: T.bool):
+            def func(A: T.Buffer(1, "bool"), i: T.bool, j: T.bool, k: T.bool):
                 A[0] = expression(i, j, k)
 
             return func
@@ -3524,7 +3524,7 @@ def func(A: T.Buffer[1, "bool"], i: T.bool, j: T.bool, k: T.bool):
 
 def multi_env_threads():
     @T.prim_func
-    def func(A: T.Buffer[128, "float32"], C: T.Buffer[128, "float32"]):
+    def func(A: T.Buffer(128, "float32"), C: T.Buffer(128, "float32")):
         B = T.alloc_buffer([128], dtype="float32")
         for i in T.thread_binding(128, thread="threadIdx.x"):
             B[i] = A[i] + 1.0
diff --git a/tests/python/unittest/test_tvmscript_syntax_sugar.py b/tests/python/unittest/test_tvmscript_syntax_sugar.py
index 35f9e6c2e635b..a840722bea8ce 100644
--- a/tests/python/unittest/test_tvmscript_syntax_sugar.py
+++ b/tests/python/unittest/test_tvmscript_syntax_sugar.py
@@ -121,8 +121,8 @@ def elementwise_buffer_kwargs(
 # match buffer - use buffer without kwargs
 @T.prim_func
 def elementwise_buffer_no_kwargs(
-    a: T.Buffer[(128, 128, 128, 128), "float32"],
-    b: T.Buffer[(128, 128, 128, 128), "float32"],
+    a: T.Buffer((128, 128, 128, 128), "float32"),
+    b: T.Buffer((128, 128, 128, 128), "float32"),
 ) -> None:
     for i, j, k, l in T.grid(128, 128, 128, 128):
         with T.block("B"):
@@ -145,7 +145,7 @@ def func_no_sugar(a: T.handle):
             A[i] = 0.0
 
     @T.prim_func
-    def func_with_sugar(A: T.Buffer[16, "float32"]):
+    def func_with_sugar(A: T.Buffer(16, "float32")):
         for i in T.serial(16):
             A[i] = 0.0
 
@@ -191,8 +191,8 @@ def match_buffer_int64(a: T.handle, c: T.handle) -> None:
 
 @T.prim_func
 def match_buffer_int64_after_roundtrip(
-    A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-    C: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+    A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+    C: T.Buffer((T.int64(128), T.int64(128)), "float32"),
 ) -> None:
     B = T.alloc_buffer((T.int64(128), T.int64(128)), dtype="float32")
     for i, j in T.grid(128, 128):
@@ -213,13 +213,13 @@ def test_match_buffer_int64():
 
 def test_match_buffer_region_has_implicit_shape_dtype():
     @T.prim_func
-    def explicit_shape_dtype(A: T.Buffer[(16, 64), "int32"]):
+    def explicit_shape_dtype(A: T.Buffer((16, 64), "int32")):
         with T.block():
             B = T.match_buffer(A[8:16, 32:64], shape=(8, 32), dtype="int32")
             T.evaluate(0)
 
     @T.prim_func
-    def implicit_shape_dtype(A: T.Buffer[(16, 64), "int32"]):
+    def implicit_shape_dtype(A: T.Buffer((16, 64), "int32")):
         with T.block():
             B = T.match_buffer(A[8:16, 32:64])
             T.evaluate(0)
@@ -245,7 +245,7 @@ def test_letstmt_bufferload_without_type_annotation():
 
     # Failure occurred during parsing of the tvmscript.
     @T.prim_func
-    def func_without_type_annotation(A: T.Buffer[(1,), "int32"]):
+    def func_without_type_annotation(A: T.Buffer((1,), "int32")):
         x = A[0]
         T.evaluate(x)
 
@@ -350,8 +350,8 @@ def mma_sync_m16n16k16_desc_manual(a: T.handle, b: T.handle, c: T.handle) -> Non
 def test_int64_loop():
     @T.prim_func
     def int64_grid(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
     ) -> None:
         for i, j in T.grid(T.int64(128), T.int64(128)):
             with T.block("C"):
@@ -360,8 +360,8 @@ def int64_grid(
 
     @T.prim_func
     def int64_grid_expanded(
-        A: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
-        B: T.Buffer[(T.int64(128), T.int64(128)), "float32"],
+        A: T.Buffer((T.int64(128), T.int64(128)), "float32"),
+        B: T.Buffer((T.int64(128), T.int64(128)), "float32"),
     ) -> None:
         for i in range(T.int64(0), T.int64(128)):
             for j in range(T.int64(0), T.int64(128)):
@@ -375,12 +375,12 @@ def int64_grid_expanded(
 
 def test_implicit_evaluate_assume():
     @T.prim_func
-    def explicit(A: T.Buffer[1, "int32"]):
+    def explicit(A: T.Buffer(1, "int32")):
         T.evaluate(T.assume(A[0] == 5))
         A[0] = 10
 
     @T.prim_func
-    def implicit(A: T.Buffer[1, "int32"]):
+    def implicit(A: T.Buffer(1, "int32")):
         T.assume(A[0] == 5)
         A[0] = 10
 
@@ -389,11 +389,11 @@ def implicit(A: T.Buffer[1, "int32"]):
 
 def test_implicit_evaluate_call_extern():
     @T.prim_func
-    def explicit(A: T.Buffer[1, "int32"]):
+    def explicit(A: T.Buffer(1, "int32")):
         T.evaluate(T.call_extern("extern_func", A.data, dtype="int32"))
 
     @T.prim_func
-    def implicit(A: T.Buffer[1, "int32"]):
+    def implicit(A: T.Buffer(1, "int32")):
         T.call_extern("extern_func", A.data, dtype="int32")
 
     assert_structural_equal(implicit, explicit)